# Part 2.3 | Bivariate Relationships by Category

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D

plt.rcParams.update({
    'font.family': 'serif',
    'font.serif': ['Times New Roman'],
    'font.size': 16,
    'axes.titlesize': 20,
    'axes.labelsize': 16,
    'font.style': 'italic',
    'figure.dpi': 400
})

## Create Elections_By_County.csv for Exercise

In [None]:
# Load raw election data
raw = pd.read_csv('data/County-Presidential-Election-Returns-2000-2024.csv')
raw = raw.dropna(subset=['county_fips'])
raw['county_fips'] = raw['county_fips'].astype(int).astype(str)

# Get Democratic and Republican votes
dem = raw[raw['party'] == 'DEMOCRAT'][['county_fips', 'year', 'candidatevotes']].copy()
dem.columns = ['county_fips', 'year', 'dem_votes']

rep = raw[raw['party'] == 'REPUBLICAN'][['county_fips', 'year', 'candidatevotes']].copy()
rep.columns = ['county_fips', 'year', 'rep_votes']

total = raw[['county_fips', 'year', 'totalvotes']].drop_duplicates()

# Merge and calculate two-party dem share
df = pd.merge(dem, rep, on=['county_fips', 'year'])
df = pd.merge(df, total, on=['county_fips', 'year'])
df['dem_share'] = df['dem_votes'] / (df['dem_votes'] + df['rep_votes'])
df = df[['county_fips', 'year', 'dem_share', 'totalvotes']].drop_duplicates()
df.columns = ['county_fips', 'year', 'dem_share', 'total_votes']
df = df.groupby(['county_fips', 'year']).first().reset_index()

# Pivot to wide format
elections = df.pivot(index='county_fips', columns='year', values=['dem_share', 'total_votes'])
elections.columns = [f'{col[0]}_{col[1]}' for col in elections.columns]
elections = elections.reset_index()

# Save
elections.to_csv('data/Elections_By_County.csv', index=False)
print(f"Saved Elections_By_County.csv: {len(elections)} counties")
elections.head()

## Load Data for Figures

In [None]:
# Income data
income = pd.read_csv('data/Income_By_County.csv', index_col=0)
income = income[['county_fips', 'Median Household Income']].dropna()
income['county_fips'] = income['county_fips'].astype(int).astype(str)
income.columns = ['county_fips', 'income']

# Elections data
elections = pd.read_csv('data/Elections_By_County.csv')
elections['county_fips'] = elections['county_fips'].astype(str)

# Merge
data = pd.merge(income, elections, on='county_fips')
data['log_income'] = np.log(data['income'])
data['log_votes'] = np.log(data['total_votes_2024'])
print(f"Merged data: {len(data)} counties")

# Long format for year comparisons
data_long = data.melt(
    id_vars=['county_fips', 'income', 'log_income', 'total_votes_2024', 'log_votes'],
    value_vars=['dem_share_2000', 'dem_share_2024'],
    var_name='Year', value_name='dem_share'
)
data_long['Year'] = data_long['Year'].str.replace('dem_share_', '')
data_long = data_long.dropna()

## Figure 1: Democratic Share by Year

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))

sns.boxplot(data=data_long, x='Year', y='dem_share', 
            order=['2000', '2024'], hue='Year', hue_order=['2000', '2024'],
            palette=['C0', 'C1'], whis=(0, 100), ax=ax, legend=False)

sns.stripplot(data=data_long, x='Year', y='dem_share',
              order=['2000', '2024'], color='black', alpha=0.1, size=3, ax=ax)

ax.set_xlabel('')
ax.set_ylabel('Democratic Vote Share')
ax.set_ylim(0, 1)
ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
ax.axhline(0.5, color='grey', linestyle='--', alpha=0.5)
ax.set_title('Democratic Vote Share by Year')

plt.tight_layout()
plt.savefig('i/dem_share_by_year.png')
plt.show()

## Figure 2: County Size vs Democratic Share

In [None]:
g = sns.lmplot(data=data, x='log_votes', y='dem_share_2024', 
               height=5, aspect=1.4,
               scatter_kws={'alpha': 0.3, 's': 20},
               line_kws={'color': 'firebrick', 'linewidth': 2})

g.ax.set_xlabel('County Size (Total Votes, log)')
g.ax.set_xticks([np.log(1000), np.log(10000), np.log(100000), np.log(1000000)])
g.ax.set_xticklabels(['1k', '10k', '100k', '1M'])

g.ax.set_ylabel('Democratic Vote Share (2024)')
g.ax.set_ylim(0, 1)
g.ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
g.ax.axhline(0.5, color='grey', linestyle='--', alpha=0.5)
g.ax.set_title('County Size and Democratic Voting (2024)')

plt.tight_layout()
plt.savefig('i/size_vs_dem_2024.png')
plt.show()

## Figure 3: Income vs Democratic Share

In [None]:
g = sns.lmplot(data=data, x='log_income', y='dem_share_2024', 
               height=5, aspect=1.4,
               scatter_kws={'alpha': 0.3, 's': 20},
               line_kws={'color': 'firebrick', 'linewidth': 2})

income_ticks = [25000, 50000, 100000, 200000]
g.ax.set_xlabel('Median Household Income (log scale)')
g.ax.set_xticks([np.log(x) for x in income_ticks])
g.ax.set_xticklabels(['$25k', '$50k', '$100k', '$200k'])
g.ax.set_xlim(np.log(20000), np.log(250000))

g.ax.set_ylabel('Democratic Vote Share (2024)')
g.ax.set_ylim(0, 1)
g.ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
g.ax.axhline(0.5, color='grey', linestyle='--', alpha=0.5)
g.ax.set_title('Income and Democratic Voting (2024)')

plt.tight_layout()
plt.savefig('i/income_vs_dem_2024.png')
plt.show()

## Figure 4: Income vs Democratic Share by Year

In [None]:
g = sns.lmplot(data=data_long, x='log_income', y='dem_share', hue='Year',
               hue_order=['2000', '2024'], palette=['C0', 'C1'],
               height=5, aspect=1.4,
               scatter_kws={'alpha': 0.3, 's': 20},
               line_kws={'linewidth': 3},
               legend=False)

income_ticks = [25000, 50000, 100000, 200000]
g.ax.set_xlabel('Median Household Income (log scale)')
g.ax.set_xticks([np.log(x) for x in income_ticks])
g.ax.set_xticklabels(['$25k', '$50k', '$100k', '$200k'])
g.ax.set_xlim(np.log(20000), np.log(250000))

g.ax.set_ylabel('Democratic Vote Share')
g.ax.set_ylim(0, 1)
g.ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
g.ax.axhline(0.5, color='grey', linestyle='--', alpha=0.5)
g.ax.set_title('Income and Democratic Voting: 2000 vs 2024')

legend_elements = [Line2D([0], [0], color='C0', linewidth=3, label='2000'),
                   Line2D([0], [0], color='C1', linewidth=3, label='2024')]
g.ax.legend(handles=legend_elements, loc='upper right', frameon=True,
            fancybox=True, edgecolor='black', facecolor='white', framealpha=1)

plt.tight_layout()
plt.savefig('i/income_vs_dem_by_year.png')
plt.show()