In [26]:
import pandas as pd
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Pandas library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv' 
    
    NUMERIC_COL = 'estimated_spend'
    CATEGORICAL_COL = 'bylines'
    GROUPING_COL = 'page_id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pandas ---")
    
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return
        
    df[NUMERIC_COL] = pd.to_numeric(df[NUMERIC_COL], errors='coerce')

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df[[NUMERIC_COL]].describe())
    
    print(f"\nTop 10 Value Counts for '{CATEGORICAL_COL}':")
    print(df[CATEGORICAL_COL].value_counts().head(10))
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.groupby(GROUPING_COL)[NUMERIC_COL].describe()
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPandas script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv with Pandas ---

--- Overall Analysis ---
Numeric Description for 'estimated_spend':
       estimated_spend
count    246745.000000
mean       1061.291434
std        4992.560749
min          49.000000
25%          49.000000
50%          49.000000
75%         449.000000
max      474999.000000

Top 10 Value Counts for 'bylines':
bylines
HARRIS FOR PRESIDENT                        49788
HARRIS VICTORY FUND                         32612
BIDEN VICTORY FUND                          15539
DONALD J. TRUMP FOR PRESIDENT 2024, INC.    15112
Trump National Committee JFC                 7279
TRUMP 47 COMMITTEE, INC.                     5027
BIDEN FOR PRESIDENT                          3972
MOVEON.ORG POLITICAL ACTION                  3535
RALLY BY RELENTLESS PBC                      3186
Working America                              2814
Name: count, dtype: int64

--- Grouped Analysis by 'page_id' ---
       

In [29]:
import pandas as pd
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Pandas library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv' 
    
    NUMERIC_COL = 'Total Interactions'
    CATEGORICAL_COL = 'Page Category'
    GROUPING_COL = 'Facebook_Id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pandas ---")
    
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return
        
    df[NUMERIC_COL] = pd.to_numeric(df[NUMERIC_COL], errors='coerce')

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df[[NUMERIC_COL]].describe())
    
    print(f"\nTop 10 Value Counts for '{CATEGORICAL_COL}':")
    print(df[CATEGORICAL_COL].value_counts().head(10))
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.groupby(GROUPING_COL)[NUMERIC_COL].describe()
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPandas script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv with Pandas ---

--- Overall Analysis ---
Numeric Description for 'Total Interactions':
       Total Interactions
count        14398.000000
mean          2210.154813
std          13066.622357
min              0.000000
25%             47.000000
50%            133.000000
75%            452.750000
max         470087.000000

Top 10 Value Counts for 'Page Category':
Page Category
PERSON                 9453
ACTOR                  3304
POLITICIAN             2595
POLITICAL_CANDIDATE    1161
ENTREPRENEUR             23
YOUTH_ORGANIZATION        1
Name: count, dtype: int64

--- Grouped Analysis by 'Facebook_Id' ---
                                                     count          mean  \
Facebook_Id                                                                
058bd86861262fa71733f8515c34b729d9ebcad3d318e2a...   210.0    105.733333   
278df0e37b61bf8a5a6374873b38faddd50fffea73eaeb5...    23.0     

In [30]:
import pandas as pd
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Pandas library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv' 
    
    NUMERIC_COL = 'viewCount'
    CATEGORICAL_COL = 'source'
    GROUPING_COL = 'id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pandas ---")
    
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return
        
    df[NUMERIC_COL] = pd.to_numeric(df[NUMERIC_COL], errors='coerce')

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df[[NUMERIC_COL]].describe())
    
    print(f"\nTop 10 Value Counts for '{CATEGORICAL_COL}':")
    print(df[CATEGORICAL_COL].value_counts().head(10))
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.groupby(GROUPING_COL)[NUMERIC_COL].describe()
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPandas script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv with Pandas ---

--- Overall Analysis ---
Numeric Description for 'viewCount':
          viewCount
count  2.730400e+04
mean   5.070847e+05
std    3.212174e+06
min    5.000000e+00
25%    2.785275e+04
50%    7.094200e+04
75%    3.036630e+05
max    3.335028e+08

Top 10 Value Counts for 'source':
source
Twitter Web App            14930
Twitter for iPhone          8494
Sprout Social               2933
Twitter Media Studio         499
Twitter for iPad             266
Periscope                    103
Hootsuite Inc.                47
Loomly                        10
Twitter for Advertisers        7
TweetDeck Web App              7
Name: count, dtype: int64

--- Grouped Analysis by 'id' ---
                                                    count      mean  std  \
id                                                                         
0000635d0c9e7bdf89dfc13811d080dbffac54894594354...    1.0   487