In [None]:
"""Visualization Playground."""
# Load Packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as sm

from statsmodels.iolib.summary2 import summary_col

In [None]:
df = pd.read_csv("../data/clean_anes.csv")

In [None]:
df.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
sns.kdeplot(
   ax=ax, data=df, x="affective_polarization", hue="political_interest",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)
ax.set(xlabel="Affective Polarization",ylabel="")
ax.figure.savefig("/tmp/ap_by_int.png", transparent=True)

In [None]:
# More Models
# Use the cleaned data stored in `df` to build a regression model.
# We will include control variables such as age, sex, education, income, and ideology.

# First we define the model formula
# It takes the following form DV ~ IVs

form_ap_conf = "affective_polarization ~ confidence + age + ideology"
form_ap = "affective_polarization ~ political_knowledge_scale"
form_identity_imp = "party_id_imp ~ political_knowledge_scale"
form_dem_ap = "feeling_democrat ~ political_knowledge_scale"
form_rep_ap = "feeling_republican ~ political_knowledge_scale"
form_all = "affective_polarization ~ age + education + ideology + political_knowledge_scale + sex"

In [None]:
# Fit the regression model
models = [
  sm.ols(formula=form_ap_conf, data=df).fit(),
  sm.ols(formula=form_ap, data=df).fit(),
  sm.ols(formula=form_identity_imp, data=df).fit(),
  sm.ols(formula=form_dem_ap, data=df).fit(),
  sm.ols(formula=form_rep_ap, data=df).fit(),
  sm.ols(formula=form_all, data=df).fit(),
]

In [None]:
# Print the summary of the regression model | 
models[0].summary()

In [None]:
# Export model to Markdown/Latex
print(summary_col(models).as_latex())

In [None]:
# Step 5: Visualize Results
# Create a visualization to summarize the results of the regression model.

# Create a DataFrame with coefficients and confidence intervals
def make_coefs(model):
    return pd.DataFrame({
        'coef': model.params,
        'lower_ci': model.conf_int()[0],
        'upper_ci': model.conf_int()[1],
        'pval': model.pvalues
    }).drop('Intercept')

coef_df = make_coefs(models[5])


In [None]:
# Make the figure
plt.figure(figsize=(8, 4))
# Plot each coefficient with its confidence interval
plt.errorbar(coef_df['coef'], coef_df.index, xerr=(coef_df['coef'] - coef_df['lower_ci'], coef_df['upper_ci'] - coef_df['coef']), fmt='o', color='b', elinewidth=2, capsize=4)
plt.axvline(x=0, color='grey', linestyle='--')  # Add a vertical line at zero for reference
plt.title('Regression Coefficients with Confidence Intervals')
plt.xlabel('Coefficient')
plt.ylabel('Variables')
plt.yticks(ticks=range(len(coef_df)), labels=coef_df.index)
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()

In [None]:
# Create a visualization of the predicted values of affective polarization
# across the range of political knowledge scores.
predicted_values_full = model.predict(df)
plt.figure(figsize=(10, 6))
plt.scatter(df['political_knowledge_scale'], df['affective_polarization'], label='Actual Values', color='blue', alpha=0.5)
plt.scatter(df['political_knowledge_scale'], predicted_values_full, label='Predicted Values', color='red', alpha=0.5)
# Add labels and title
plt.title('Actual vs. Predicted Affective Polarization')
plt.xlabel('Political Knowledge Scale')
plt.ylabel('Affective Polarization')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()

In [None]:
# What is this?
import geopandas as gpd

In [None]:
# Load file
url = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/us_states_hexgrid.geojson.json"
map_us = gpd.read_file(url)

In [None]:
map_us.head()

In [None]:
map_us['google_name'] = map_us['google_name'].str.replace(' (United States)','')

In [None]:
state_polarization = df.groupby('state')['affective_polarization'].mean()

In [None]:
map_us = map_us.merge(state_polarization, left_on='google_name', right_on='state', how='left')

In [None]:
map_us['centroid'] = map_us['geometry'].apply(lambda x: x.centroid)

In [None]:
# Set up the figure for plotting
colors = "BuPu"
fig, ax = plt.subplots(1, figsize=(10, 5))
# map counties with the right color:
map_us.plot(
    ax=ax,
    column="affective_polarization",
    cmap=colors,
    norm=plt.Normalize(vmin=0, vmax=100),
    edgecolor='black',
    linewidth=.5
);
# Remove useless axis
ax.axis('off');
# Add title, subtitle and ...
ax.annotate('Affective Polarization in the US', xy=(10,420),  xycoords='axes pixels', horizontalalignment='left', verticalalignment='top', fontsize=14, color='black')
ax.annotate('You can add your question description here.', xy=(10,400),  xycoords='axes pixels', horizontalalignment='left', verticalalignment='top', fontsize=11, color='#808080')
ax.annotate('Some other random message', xy=(500,0),  xycoords='axes pixels', horizontalalignment='left', verticalalignment='top', fontsize=8, color='#808080')
# for each county, annotate with the county name located at the centroid coordinates
for idx, row in map_us.iterrows():
    ax.annotate(
        text=row['iso3166_2'],
        xy=row['centroid'].coords[0],
        horizontalalignment='center',
        va='center',
        color="white"
    )
# Add a color bar
sm = plt.cm.ScalarMappable(cmap=colors, norm=plt.Normalize(vmin=0, vmax=100))
fig.colorbar(sm, ax=ax, orientation="horizontal", aspect=50, fraction=0.01, pad=0 );
