In [28]:
import pandas as pd
import polars as pl

# Load dataset
df_pandas = pd.read_csv("/content/2024_fb_ads_president_scored_anon.csv",
                        on_bad_lines='skip',
                        encoding='utf-8',
                        engine='python')

In [32]:
# ----- Analysis with Pandas -----
print("\n Pandas Summary for Facebook Ads")
print(f"Total Rows: {df_pandas.shape[0]}, Columns: {df_pandas.shape[1]}")
print("\nMissing Values Per Column:")
print(df_pandas.isnull().sum())

print("\nStatistical Summary (Numerical Columns):")
print(df_pandas.describe().T)

print("\nTop 5 Rows:")
print(df_pandas.head())



 Pandas Summary for Facebook Ads
Total Rows: 246745, Columns: 41

Missing Values Per Column:
page_id                                         0
ad_id                                           0
ad_creation_time                                0
bylines                                      1009
currency                                        0
delivery_by_region                              0
demographic_distribution                        0
estimated_audience_size                         0
estimated_impressions                           0
estimated_spend                                 0
publisher_platforms                             0
illuminating_scored_message                     0
illuminating_mentions                           0
scam_illuminating                               0
election_integrity_Truth_illuminating           0
advocacy_msg_type_illuminating                  0
issue_msg_type_illuminating                     0
attack_msg_type_illuminating                    0
image_

In [31]:
# ----- Analysis with Polars -----
print("\n Polars Summary for Facebook Ads")
df_polars = pl.read_csv("/content/2024_fb_ads_president_scored_anon.csv", ignore_errors=True)

print("Shape:", df_polars.shape)
print("Missing Value Count:\n", df_polars.null_count())
print("\nSchema (Data Types):")
print(df_polars.schema)

try:
    stats = df_polars.describe()
    print("\nDescriptive Stats (Polars):")
    print(stats)
except Exception as e:
    print("Polars describe() failed:", e)




 Polars Summary for Facebook Ads
Shape: (246745, 41)
Missing Value Count:
 shape: (1, 41)
┌─────────┬───────┬─────────────┬─────────┬───┬─────────────┬────────────┬────────────┬────────────┐
│ page_id ┆ ad_id ┆ ad_creation ┆ bylines ┆ … ┆ womens_issu ┆ incivility ┆ freefair_i ┆ fraud_illu │
│ ---     ┆ ---   ┆ _time       ┆ ---     ┆   ┆ e_topic_ill ┆ _illuminat ┆ lluminatin ┆ minating   │
│ u32     ┆ u32   ┆ ---         ┆ u32     ┆   ┆ uminatin…   ┆ ing        ┆ g          ┆ ---        │
│         ┆       ┆ u32         ┆         ┆   ┆ ---         ┆ ---        ┆ ---        ┆ u32        │
│         ┆       ┆             ┆         ┆   ┆ u32         ┆ u32        ┆ u32        ┆            │
╞═════════╪═══════╪═════════════╪═════════╪═══╪═════════════╪════════════╪════════════╪════════════╡
│ 0       ┆ 0     ┆ 0           ┆ 1009    ┆ … ┆ 0           ┆ 0          ┆ 0          ┆ 0          │
└─────────┴───────┴─────────────┴─────────┴───┴─────────────┴────────────┴────────────┴────────────┘


In [33]:
# ----- Base Python Summary -----
print("\n Base Python Summary (Numerical Columns)")
numeric_cols = df_pandas.select_dtypes(include=['int64', 'float64']).columns

for col in numeric_cols:
    values = df_pandas[col].dropna().tolist()
    n = len(values)
    if n == 0:
        continue
    mean = sum(values) / n
    min_val = min(values)
    max_val = max(values)
    print(f"{col}: Count={n}, Mean={mean:.2f}, Min={min_val}, Max={max_val}")


 Base Python Summary (Numerical Columns)
estimated_audience_size: Count=246745, Mean=556462.86, Min=0, Max=1000001
estimated_impressions: Count=246745, Mean=45601.53, Min=499, Max=1000000
estimated_spend: Count=246745, Mean=1061.29, Min=49, Max=474999
scam_illuminating: Count=246745, Mean=0.07, Min=0, Max=1
election_integrity_Truth_illuminating: Count=246745, Mean=0.05, Min=0, Max=1
advocacy_msg_type_illuminating: Count=246745, Mean=0.55, Min=0, Max=1
issue_msg_type_illuminating: Count=246745, Mean=0.38, Min=0, Max=1
attack_msg_type_illuminating: Count=246745, Mean=0.27, Min=0, Max=1
image_msg_type_illuminating: Count=246745, Mean=0.22, Min=0, Max=1
cta_msg_type_illuminating: Count=246745, Mean=0.57, Min=0, Max=1
engagement_cta_subtype_illuminating: Count=246745, Mean=0.12, Min=0, Max=1
fundraising_cta_subtype_illuminating: Count=246745, Mean=0.23, Min=0, Max=1
voting_cta_subtype_illuminating: Count=246745, Mean=0.14, Min=0, Max=1
covid_topic_illuminating: Count=246745, Mean=0.02, Min

**SUMMARY FO THE CODE**

Loaded the dataset (2024_fb_ads_president_scored_anon.csv)

Handled malformed rows using on_bad_lines='skip' and engine='python' in Pandas.

Used ignore_errors=True in Polars for robust parsing.

Pandas Analysis

Printed shape, column-wise missing value counts, statistical summary, and first 5 rows.

Polars Analysis

Displayed schema, null counts, shape, and summary statistics using pl.describe().

Base Python Summary

For each numeric column:

Calculated count, mean, min, and max manually using Python lists.

