In [31]:
import csv
import math
import time
from collections import Counter

def calculate_stats(data_list):
    """Helper function to calculate stats for a list of numbers."""
    if not data_list:
        return {'count': 0, 'mean': 0, 'min': None, 'max': None, 'std_dev': 0}
    
    count = len(data_list)
    mean = sum(data_list) / count
    min_val, max_val = min(data_list), max(data_list)
    
    # Calculate standard deviation
    variance = sum([(x - mean) ** 2 for x in data_list]) / count
    std_dev = math.sqrt(variance)
    
    return {'count': count, 'mean': mean, 'min': min_val, 'max': max_val, 'std_dev': std_dev}

def main():
    """
    Performs descriptive statistical analysis using only Python's standard library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv'

    NUMERIC_COL = 'estimated_spend'
    CATEGORICAL_COL = 'bylines'
    GROUPING_COL = 'page_id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pure Python ---")
    
    try:
        with open(filepath, mode='r', encoding='utf-8') as infile:
            data = list(csv.DictReader(infile))
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    
    numeric_data = []
    for row in data:
        try:
            numeric_data.append(float(row[NUMERIC_COL])) 
        except (ValueError, KeyError, TypeError):
            continue
            
    print(f"Statistics for '{NUMERIC_COL}': {calculate_stats(numeric_data)}")

    categorical_data = [row[CATEGORICAL_COL] for row in data if CATEGORICAL_COL in row]
    cat_counts = Counter(categorical_data)
    print(f"\nTop 10 most common for '{CATEGORICAL_COL}': {cat_counts.most_common(10)}")
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    
    grouped_data = {}
    for row in data:
        try:
            key = row[GROUPING_COL]
            value = float(row[NUMERIC_COL])
            if key not in grouped_data:
                grouped_data[key] = []
            grouped_data[key].append(value)
        except (ValueError, KeyError, TypeError):
            continue

    for key, values in list(grouped_data.items())[:5]:
        print(f"Group {key}: {calculate_stats(values)}")

    end_time = time.perf_counter()
    print(f"\nPure Python script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_ads_president_scored_anon.csv with Pure Python ---

--- Overall Analysis ---
Statistics for 'estimated_spend': {'count': 246745, 'mean': 1061.2914344768892, 'min': 49.0, 'max': 474999.0, 'std_dev': 4992.550631940592}

Top 10 most common for 'bylines': [('HARRIS FOR PRESIDENT', 49788), ('HARRIS VICTORY FUND', 32612), ('BIDEN VICTORY FUND', 15539), ('DONALD J. TRUMP FOR PRESIDENT 2024, INC.', 15112), ('Trump National Committee JFC', 7279), ('TRUMP 47 COMMITTEE, INC.', 5027), ('BIDEN FOR PRESIDENT', 3972), ('MOVEON.ORG POLITICAL ACTION', 3535), ('RALLY BY RELENTLESS PBC', 3186), ('Working America', 2814)]

--- Grouped Analysis by 'page_id' ---
Group 4ff23a48b53d988df50ddfebb0e442a984ab8f94e874ef9b9cb34394e0c5d230: {'count': 33, 'mean': 467.1818181818182, 'min': 49.0, 'max': 2249.0, 'std_dev': 652.9441692474709}
Group b9eb7e353e596d5fc99568d4ef77d4b11ced3243537cbd0895dde3195b69b6be: {'count': 3, 'mean': 82.33333333333333, 'min':

In [32]:
import csv
import math
import time
from collections import Counter

def calculate_stats(data_list):
    """Helper function to calculate stats for a list of numbers."""
    if not data_list:
        return {'count': 0, 'mean': 0, 'min': None, 'max': None, 'std_dev': 0}
    
    count = len(data_list)
    mean = sum(data_list) / count
    min_val, max_val = min(data_list), max(data_list)
    
    # Calculate standard deviation
    variance = sum([(x - mean) ** 2 for x in data_list]) / count
    std_dev = math.sqrt(variance)
    
    return {'count': count, 'mean': mean, 'min': min_val, 'max': max_val, 'std_dev': std_dev}

def main():
    """
    Performs descriptive statistical analysis using only Python's standard library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv'

    NUMERIC_COL = 'Total Interactions'
    CATEGORICAL_COL = 'Page Category'
    GROUPING_COL = 'Facebook_Id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pure Python ---")
    
    try:
        with open(filepath, mode='r', encoding='utf-8') as infile:
            data = list(csv.DictReader(infile))
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    
    numeric_data = []
    for row in data:
        try:
            numeric_data.append(float(row[NUMERIC_COL])) 
        except (ValueError, KeyError, TypeError):
            continue
            
    print(f"Statistics for '{NUMERIC_COL}': {calculate_stats(numeric_data)}")

    categorical_data = [row[CATEGORICAL_COL] for row in data if CATEGORICAL_COL in row]
    cat_counts = Counter(categorical_data)
    print(f"\nTop 10 most common for '{CATEGORICAL_COL}': {cat_counts.most_common(10)}")
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    
    grouped_data = {}
    for row in data:
        try:
            key = row[GROUPING_COL]
            value = float(row[NUMERIC_COL])
            if key not in grouped_data:
                grouped_data[key] = []
            grouped_data[key].append(value)
        except (ValueError, KeyError, TypeError):
            continue

    for key, values in list(grouped_data.items())[:5]:
        print(f"Group {key}: {calculate_stats(values)}")

    end_time = time.perf_counter()
    print(f"\nPure Python script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_fb_posts_president_scored_anon.csv with Pure Python ---

--- Overall Analysis ---
Statistics for 'Total Interactions': {'count': 14398, 'mean': 2210.154813168496, 'min': 0.0, 'max': 470087.0, 'std_dev': 13066.168583849912}

Top 10 most common for 'Page Category': [('PERSON', 9453), ('ACTOR', 3304), ('POLITICIAN', 2595), ('', 2472), ('POLITICAL_CANDIDATE', 1161), ('ENTREPRENEUR', 23), ('YOUTH_ORGANIZATION', 1)]

--- Grouped Analysis by 'Facebook_Id' ---
Group a6cb7db6850459b954f7272e14d770617022639f8847cf25e6859b9453a03813: {'count': 29, 'mean': 115.51724137931035, 'min': 29.0, 'max': 454.0, 'std_dev': 94.54778784029513}
Group 058bd86861262fa71733f8515c34b729d9ebcad3d318e2a133a6c572dd80ba0b: {'count': 210, 'mean': 105.73333333333333, 'min': 11.0, 'max': 920.0, 'std_dev': 127.46788702344689}
Group ac24f31c4d4b3d5555065fa9558bdca4ab4b5e1379922875ae9e6e32f1d46d25: {'count': 420, 'mean': 15243.30238095238, 'min': 96.0, 'max': 152355

In [33]:
import csv
import math
import time
from collections import Counter

def calculate_stats(data_list):
    """Helper function to calculate stats for a list of numbers."""
    if not data_list:
        return {'count': 0, 'mean': 0, 'min': None, 'max': None, 'std_dev': 0}
    
    count = len(data_list)
    mean = sum(data_list) / count
    min_val, max_val = min(data_list), max(data_list)
    
    # Calculate standard deviation
    variance = sum([(x - mean) ** 2 for x in data_list]) / count
    std_dev = math.sqrt(variance)
    
    return {'count': count, 'mean': mean, 'min': min_val, 'max': max_val, 'std_dev': std_dev}

def main():
    """
    Performs descriptive statistical analysis using only Python's standard library.
    """
    filepath = 'C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv' 
    
    NUMERIC_COL = 'viewCount'
    CATEGORICAL_COL = 'source'
    GROUPING_COL = 'id'

    start_time = time.perf_counter()
    print(f"--- Analyzing {filepath} with Pure Python ---")
    
    try:
        with open(filepath, mode='r', encoding='utf-8') as infile:
            data = list(csv.DictReader(infile))
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return

    print("\n--- Overall Analysis ---")
    
    numeric_data = []
    for row in data:
        try:
            numeric_data.append(float(row[NUMERIC_COL])) 
        except (ValueError, KeyError, TypeError):
            continue
            
    print(f"Statistics for '{NUMERIC_COL}': {calculate_stats(numeric_data)}")

    categorical_data = [row[CATEGORICAL_COL] for row in data if CATEGORICAL_COL in row]
    cat_counts = Counter(categorical_data)
    print(f"\nTop 10 most common for '{CATEGORICAL_COL}': {cat_counts.most_common(10)}")
    
    print(f"\n--- Grouped Analysis by '{GROUPING_COL}' ---")
    
    grouped_data = {}
    for row in data:
        try:
            key = row[GROUPING_COL]
            value = float(row[NUMERIC_COL])
            if key not in grouped_data:
                grouped_data[key] = []
            grouped_data[key].append(value)
        except (ValueError, KeyError, TypeError):
            continue

    for key, values in list(grouped_data.items())[:5]:
        print(f"Group {key}: {calculate_stats(values)}")

    end_time = time.perf_counter()
    print(f"\nPure Python script finished in {end_time - start_time:.4f} seconds.")

if __name__ == '__main__':
    main()

--- Analyzing C://Users//ninad//Downloads//period_03//2024_tw_posts_president_scored_anon.csv with Pure Python ---

--- Overall Analysis ---
Statistics for 'viewCount': {'count': 27304, 'mean': 507084.7318341635, 'min': 5.0, 'max': 333502775.0, 'std_dev': 3212115.163351335}

Top 10 most common for 'source': [('Twitter Web App', 14930), ('Twitter for iPhone', 8494), ('Sprout Social', 2933), ('Twitter Media Studio', 499), ('Twitter for iPad', 266), ('Periscope', 103), ('Hootsuite Inc.', 47), ('Loomly', 10), ('Twitter for Advertisers', 7), ('TweetDeck Web App', 7)]

--- Grouped Analysis by 'id' ---
Group cc46051622b8a9c1b883a3bbf12c640b12ac1cbdc7f48a773b6cc2a65f03aa2d: {'count': 1, 'mean': 15610.0, 'min': 15610.0, 'max': 15610.0, 'std_dev': 0.0}
Group 0e3db0c35a290c6df3b737d15882846c108cc80a9b7e5ca64a0129d4e3cd3ad7: {'count': 1, 'mean': 158324.0, 'min': 158324.0, 'max': 158324.0, 'std_dev': 0.0}
Group 256905919085d2946d5d187abc6cbe81a8abe3384793b362fa6aa4144bf42776: {'count': 1, 'mean': 3