In [5]:
import polars as pl
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Polars library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv'

    # --- Define column names based on the dataset ---
    NUMERIC_COL = 'estimated_spend'
    CATEGORICAL_COL = 'bylines'
    GROUPING_COL = 'page_id'
    
    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Polars ---")
    
    # --- Load and Prepare Data ---
    try:
        df = pl.read_csv(filepath).with_columns(
            pl.col(NUMERIC_COL).cast(pl.Float64, strict=False)
        )
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df.select(pl.col(NUMERIC_COL)).describe())
    
    #print(f"\nTop 10 Value Counts for '{CATEGORICAL_COL}':")
    #print(df.select(pl.col(CATEGORICAL_COL)).value_counts().head(10))

    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.group_by(GROUPING_COL).agg(
        pl.col(NUMERIC_COL).count().alias('count'),
        pl.col(NUMERIC_COL).mean().alias('mean'),
        pl.col(NUMERIC_COL).std().alias('std'),
        pl.col(NUMERIC_COL).min().alias('min'),
        pl.col(NUMERIC_COL).max().alias('max')
    )
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPolars script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv with Polars ---

--- Overall Analysis ---
Numeric Description for 'estimated_spend':
shape: (9, 2)
┌────────────┬─────────────────┐
│ statistic  ┆ estimated_spend │
│ ---        ┆ ---             │
│ str        ┆ f64             │
╞════════════╪═════════════════╡
│ count      ┆ 246745.0        │
│ null_count ┆ 0.0             │
│ mean       ┆ 1061.291434     │
│ std        ┆ 4992.560749     │
│ min        ┆ 49.0            │
│ 25%        ┆ 49.0            │
│ 50%        ┆ 49.0            │
│ 75%        ┆ 449.0           │
│ max        ┆ 474999.0        │
└────────────┴─────────────────┘

--- Grouped Analysis by 'page_id' ---
shape: (10, 6)
┌─────────────────────────────────┬───────┬─────────────┬─────────────┬────────┬────────┐
│ page_id                         ┆ count ┆ mean        ┆ std         ┆ min    ┆ max    │
│ ---                             ┆ ---   ┆ ---         ┆ ---         ┆ ---    ┆

In [3]:
import polars as pl
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Polars library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv'
    # --- Define column names based on the dataset ---
    NUMERIC_COL = 'Total Interactions'
    CATEGORICAL_COL = 'Page Category'
    GROUPING_COL = 'Facebook_Id'
    
    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Polars ---")
    
    # --- Load and Prepare Data ---
    try:
        df = pl.read_csv(filepath).with_columns(
            pl.col(NUMERIC_COL).cast(pl.Float64, strict=False)
        )
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df.select(pl.col(NUMERIC_COL)).describe())

    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.group_by(GROUPING_COL).agg(
        pl.col(NUMERIC_COL).count().alias('count'),
        pl.col(NUMERIC_COL).mean().alias('mean'),
        pl.col(NUMERIC_COL).std().alias('std'),
        pl.col(NUMERIC_COL).min().alias('min'),
        pl.col(NUMERIC_COL).max().alias('max')
    )
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPolars script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv with Polars ---

--- Overall Analysis ---
Numeric Description for 'Total Interactions':
shape: (9, 2)
┌────────────┬────────────────────┐
│ statistic  ┆ Total Interactions │
│ ---        ┆ ---                │
│ str        ┆ f64                │
╞════════════╪════════════════════╡
│ count      ┆ 14398.0            │
│ null_count ┆ 4611.0             │
│ mean       ┆ 2210.154813        │
│ std        ┆ 13066.622357       │
│ min        ┆ 0.0                │
│ 25%        ┆ 47.0               │
│ 50%        ┆ 133.0              │
│ 75%        ┆ 453.0              │
│ max        ┆ 470087.0           │
└────────────┴────────────────────┘

--- Grouped Analysis by 'Facebook_Id' ---
shape: (10, 6)
┌─────────────────────────────────┬───────┬─────────────┬──────────────┬───────┬──────────┐
│ Facebook_Id                     ┆ count ┆ mean        ┆ std          ┆ min   ┆ max      │
│ ---                 

In [4]:
import polars as pl
import time

def main():
    """
    Performs descriptive statistical analysis on the ad dataset using the Polars library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv' 
    
    NUMERIC_COL = 'viewCount'
    CATEGORICAL_COL = 'source'
    GROUPING_COL = 'id'
    
    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Polars ---")
    
    # --- Load and Prepare Data ---
    try:
        df = pl.read_csv(filepath).with_columns(
            pl.col(NUMERIC_COL).cast(pl.Float64, strict=False)
        )
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    print(f"Numeric Description for '{NUMERIC_COL}':")
    print(df.select(pl.col(NUMERIC_COL)).describe())

    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    grouped_stats = df.group_by(GROUPING_COL).agg(
        pl.col(NUMERIC_COL).count().alias('count'),
        pl.col(NUMERIC_COL).mean().alias('mean'),
        pl.col(NUMERIC_COL).std().alias('std'),
        pl.col(NUMERIC_COL).min().alias('min'),
        pl.col(NUMERIC_COL).max().alias('max')
    )
    print(grouped_stats.head(10)) # Print top 10 groups for brevity

    end_time = time.perf_counter()
    print(f"\nPolars script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv with Polars ---

--- Overall Analysis ---
Numeric Description for 'viewCount':
shape: (9, 2)
┌────────────┬───────────────┐
│ statistic  ┆ viewCount     │
│ ---        ┆ ---           │
│ str        ┆ f64           │
╞════════════╪═══════════════╡
│ count      ┆ 27304.0       │
│ null_count ┆ 0.0           │
│ mean       ┆ 507084.731834 │
│ std        ┆ 3.2122e6      │
│ min        ┆ 5.0           │
│ 25%        ┆ 27853.0       │
│ 50%        ┆ 70942.0       │
│ 75%        ┆ 303661.0      │
│ max        ┆ 3.33502775e8  │
└────────────┴───────────────┘

--- Grouped Analysis by 'id' ---
shape: (10, 6)
┌─────────────────────────────────┬───────┬──────────┬──────┬──────────┬──────────┐
│ id                              ┆ count ┆ mean     ┆ std  ┆ min      ┆ max      │
│ ---                             ┆ ---   ┆ ---      ┆ ---  ┆ ---      ┆ ---      │
│ str                             ┆ u32   ┆ f64