In [None]:

import matplotlib.pyplot as plt

import pandas as pd
import altair as alt
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
df_survey = pd.read_csv("data/surveydata.9.24.23.csv")
df_full = pd.read_csv("data/data_500k.csv")


In [None]:
# # Choose the row index you want to print (for example, row 1)
# row_index = 60000


# # Loop through each column and print column name and value
# for col_name, col_value in df_full.iloc[row_index].items():
#     print(f"{col_name}: {col_value}")

In [None]:
# counties = df_full.AI_COUNTY_NAME.unique()
# len(list(counties))

# len(df.columns)

# df[df['AI_COUNTY_NAME'] =='MARICOPA']


In [None]:


def create_bar_chart(data, column):
    non_null_data = data[data[column].notnull()]  # Filter out rows where the column is null
    chart = alt.Chart(non_null_data).mark_bar().encode(
        alt.X('count()', title='Count'),
        alt.Y(column + ':O', title=column),
        color=alt.Color(column + ':N', legend=None)
    ).properties(
        width=200,
        height=150
    )
    return chart

columns_to_plot = ['Q1_Candidate', 'Q2_Support', 'Q3_Party', 'Q4_LikelyVoter', 'Q5_TrumpSupport']
for col in columns_to_plot:
    df_survey[col] = df_survey[col].str.lower()
    
charts = [create_bar_chart(df_survey, column) for column in columns_to_plot]
concatenated_charts = alt.hconcat(*charts)

In [None]:
concatenated_charts

In [None]:


survey_data = df_survey


# List of columns of interest
columns_of_interest_updated = [
    "PRFL_BIDEN_SUPPORT", "PRFL_BORDER_SECURITY", "PRFL_CONSERVATIVE_NEWS", "PRFL_METOO_SUPPORT", "PRFL_LIBERAL_NEWS", 
    "PRFL_TAXES", "PRFL_AMZN_PRIME", "PRFL_INSURANCE", "PRFL_ANML_RIGHTS", "PRFL_ENVIRONMENT", "PRFL_EVANGELICAL", 
    "PRFL_TRUMP_SUPPORT", "PRFL_SANDERS_SUPPORT", "PRFL_POLITICAL_IDEOLOGY", "PRFL_FENCE_SITTER", "PRFL_PERSUADABLE_VOTER",
    "PRFL_OBAMA", "PRFL_2NDAMEND", "PARTY_MIX", "PARTY_CODE"
]

# Party filters
parties = ["Republican", "Democrat", "Unaffiliated/Other"]

# Function to reorder the Series to have "Y" as the rightmost column if it exists
def reorder_series_for_Y(series):
    if 'Y' in series:
        without_Y = series.drop('Y')
        return without_Y.append(pd.Series(series['Y'], index=['Y']))
    return series

# Calculating the difference in 'Y' responses between "Republicans" and "Democrats" for each column of interest
y_difference_scores = {}
for col in columns_of_interest_updated:
    # Compute the proportion of 'Y' responses for each party
    rep_y_prop = survey_data[(survey_data["Q3_Party"] == "republican") & (survey_data[col] == "Y")].shape[0] / survey_data[survey_data["Q3_Party"] == "republican"].shape[0]
    dem_y_prop = survey_data[(survey_data["Q3_Party"] == "democrat") & (survey_data[col] == "Y")].shape[0] / survey_data[survey_data["Q3_Party"] == "democrat"].shape[0]
    # Store the absolute difference in 'Y' proportions for the column
    y_difference_scores[col] = abs(rep_y_prop - dem_y_prop)

# Sort the columns based on the difference in 'Y' proportions
sorted_columns_y_difference = sorted(y_difference_scores.keys(), key=lambda x: y_difference_scores[x], reverse=True)

# Visualizing the sorted columns based on the differences in 'Y' responses
fig, axs = plt.subplots(nrows=len(sorted_columns_y_difference), ncols=len(parties), figsize=(20, 40))
for idx, col in enumerate(sorted_columns_y_difference):
    for jdx, party in enumerate(parties):
        filtered_data = survey_data[survey_data["Q3_Party"] == party.lower()]
        reordered_series = reorder_series_for_Y(filtered_data[col].value_counts(dropna=False))
        reordered_series.plot(kind='bar', ax=axs[idx, jdx], color='lightcoral' if jdx == 0 else ('lightgreen' if jdx == 1 else 'lightblue'))
        axs[idx, jdx].set_title(f"{col} ({party})", fontsize=12)
        axs[idx, jdx].tick_params(axis='x', rotation=45)
        if jdx == 0:
            axs[idx, jdx].set_ylabel('Count')
        if idx == 0:
            axs[idx, jdx].set_xlabel('Response')
plt.tight_layout()
plt.show()


In [None]:
# List of columns of interest
columns_of_interest_updated = [
    "PRFL_BIDEN_SUPPORT", "PRFL_BORDER_SECURITY", "PRFL_CONSERVATIVE_NEWS", "PRFL_METOO_SUPPORT", "PRFL_LIBERAL_NEWS", 
    "PRFL_TAXES", "PRFL_AMZN_PRIME", "PRFL_INSURANCE", "PRFL_ANML_RIGHTS", "PRFL_ENVIRONMENT", "PRFL_EVANGELICAL", 
    "PRFL_TRUMP_SUPPORT", "PRFL_SANDERS_SUPPORT", "PRFL_POLITICAL_IDEOLOGY", "PRFL_FENCE_SITTER", "PRFL_PERSUADABLE_VOTER",
    "PRFL_OBAMA", "PRFL_2NDAMEND", "PARTY_MIX", "PARTY_CODE"
]

# Party filters
candidates = ["president donald trump", "president joe biden", "governor ron desantis", "nikki haley",  "undecided"]

# Function to reorder the Series to have "Y" as the rightmost column if it exists
def reorder_series_for_Y(series):
    if 'Y' in series:
        without_Y = series.drop('Y')
        return without_Y.append(pd.Series(series['Y'], index=['Y']))
    return series

# Calculating the difference in 'Y' responses between Biden and Trump for each column of interest
y_difference_scores = {}
for col in columns_of_interest_updated:
    # Compute the proportion of 'Y' responses for Biden v Trump
    rep_y_prop = survey_data[(survey_data["Q1_Candidate"] == "president donald trump") & (survey_data[col] == "Y")].shape[0] / survey_data[survey_data["Q1_Candidate"] == "president donald trump"].shape[0]
    dem_y_prop = survey_data[(survey_data["Q1_Candidate"] == "president joe biden") & (survey_data[col] == "Y")].shape[0] / survey_data[survey_data["Q1_Candidate"] == "president joe biden"].shape[0]
    # Store the absolute difference in 'Y' proportions for the column
    y_difference_scores[col] = abs(rep_y_prop - dem_y_prop)

# Sort the columns based on the difference in 'Y' proportions
sorted_columns_y_difference = sorted(y_difference_scores.keys(), key=lambda x: y_difference_scores[x], reverse=True)

# Visualizing the sorted columns based on the differences in 'Y' responses
fig, axs = plt.subplots(nrows=len(sorted_columns_y_difference), ncols=len(candidates), figsize=(20, 40))
for idx, col in enumerate(sorted_columns_y_difference):
    for jdx, candidate in enumerate(candidates):
        filtered_data = survey_data[survey_data["Q1_Candidate"] == candidate]
        reordered_series = reorder_series_for_Y(filtered_data[col].value_counts(dropna=False))
        reordered_series.plot(kind='bar', ax=axs[idx, jdx], color='lightcoral' if jdx == 0 else ('lightgreen' if jdx == 1 else 'lightblue'))
        axs[idx, jdx].set_title(f"{col} ({candidate})", fontsize=12)
        axs[idx, jdx].tick_params(axis='x', rotation=45)
        if jdx == 0:
            axs[idx, jdx].set_ylabel('Count')
        if idx == 0:
            axs[idx, jdx].set_xlabel('Response')
plt.tight_layout()
plt.show()