In [None]:
# visualize_all_datasets_pandas.ipynb

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Paths to datasets - update paths as needed
fb_ads_path = '2024_fb_ads_president_scored_anon.csv'
fb_posts_path = '2024_fb_posts_president_scored_anon.csv'
tw_posts_path = '2024_tw_posts_president_scored_anon.csv'

# Load datasets
fb_ads = pd.read_csv(fb_ads_path)
fb_posts = pd.read_csv(fb_posts_path)
tw_posts = pd.read_csv(tw_posts_path)

datasets = {
    'Facebook Ads': fb_ads,
    'Facebook Posts': fb_posts,
    'Twitter Posts': tw_posts
}

def analyze_and_plot(df, dataset_name):
    print(f"\n--- Analyzing {dataset_name} ---\n")

    # Identify numeric and categorical columns
    numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
    cat_cols = df.select_dtypes(include=['object']).columns.tolist()

    print(f"Numeric columns: {numeric_cols}")
    print(f"Categorical columns: {cat_cols}")

    # Numeric columns: Histograms & Boxplots
    for col in numeric_cols:
        plt.figure(figsize=(10, 4))
        sns.histplot(df[col].dropna(), kde=True, bins=30)
        plt.title(f'{dataset_name} - Histogram of {col}')
        plt.xlabel(col)
        plt.ylabel('Frequency')
        plt.show()

        plt.figure(figsize=(8, 3))
        sns.boxplot(x=df[col].dropna())
        plt.title(f'{dataset_name} - Boxplot of {col}')
        plt.show()

    # Categorical columns: Top 10 value counts Bar plots
    for col in cat_cols:
        counts = df[col].value_counts(dropna=True).head(10)
        plt.figure(figsize=(12, 5))
        sns.barplot(x=counts.index, y=counts.values)
        plt.xticks(rotation=45, ha='right')
        plt.title(f'{dataset_name} - Top 10 Value Counts for {col}')
        plt.xlabel(col)
        plt.ylabel('Count')
        plt.show()

    # Print basic narrative summary
    print(f"{dataset_name} Summary:")
    print(f"- Total records: {len(df)}")
    print(f"- Number of numeric columns: {len(numeric_cols)}")
    print(f"- Number of categorical columns: {len(cat_cols)}")
    print("- Numeric distributions show central tendency, spread, and outliers.")
    print("- Top categorical values indicate dominant categories.")
    print("- These visualizations can guide further data cleaning and feature engineering.\n")

# Run for all datasets
for name, data in datasets.items():
    analyze_and_plot(data, name)
