In [70]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path  

In [71]:
dem_candidates = pd.read_csv('../datasets/dem_candidates.csv')

In [72]:
endorsement_cols = dem_candidates.columns[pd.Series(dem_candidates.columns).str.contains('Endorsed|Party Support|Support|\\?')]
dem_candidates[endorsement_cols] = dem_candidates[endorsement_cols].fillna(0)
dem_candidates[endorsement_cols] = dem_candidates[endorsement_cols].replace({'No': 0, 'Yes': 1})
dem_candidates['General Status'] = dem_candidates['General Status'].replace({'None': 0, 'On the Ballot': 1})
dem_candidates['Race'] = dem_candidates['Race'].replace({'Nonwhite': 1, 'White': 0})

In [80]:
def count_candidates_by_district(df, district_column, new_column_name):
    """
    Adds a column to the DataFrame with the count of candidates in each district.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the candidates and districts.
    district_column (str): The name of the column with district information.
    new_column_name (str): The name of the new column to hold the counts.

    Returns:
    pd.DataFrame: The original DataFrame with an additional column for candidate counts per district.
    """
    # Calculate the number of candidates in each district
    district_counts = df.groupby(district_column).size().reset_index(name=new_column_name)
    
    # Merge the counts back into the original DataFrame
    df_merged = df.merge(district_counts, on=district_column)

    return df_merged

In [91]:
dem_candidates = count_candidates_by_district(dem_candidates, 'District', 'total_runners')

In [92]:
govs = dem_candidates[(dem_candidates['Office Type'] == 'Governor')]
house = dem_candidates[dem_candidates['Office Type'] == 'Representative']
senate = dem_candidates[dem_candidates['Office Type'] == 'Senator']

In [93]:
govs.shape

(100, 35)

In [94]:
house.shape

(687, 35)

In [95]:
senate.shape

(24, 35)

In [99]:
dem_candidates = dem_candidates[dem_candidates['total_runners']>1]
govs = dem_candidates[(dem_candidates['Office Type'] == 'Governor')]
house = dem_candidates[dem_candidates['Office Type'] == 'Representative']
senate = dem_candidates[dem_candidates['Office Type'] == 'Senator']

In [100]:
house.shape

(645, 35)

In [86]:
dem_candidates.to_csv(Path('dem_candidates_cleaned.csv'))
govs.to_csv(Path('govs.csv'))
house.to_csv(Path('house.csv'))
senate.to_csv(Path('senate.csv'))

In [87]:
# house['dist_num'] = house['District'].str.extract('(\d+)$')
# house['District Abbrev'] = house['State']+ '-' +house['dist_num']
# house

In [88]:
# brookings = pd.read_csv('candidatesdatafinal2018.csv')
# brookings['Candidate'] = brookings['Candidate.First.Name'] + ' ' + brookings['Candidate.Last.Name']
# brookings['District'] = brookings['Candidate.State'] + '-' + brookings['Candidate.District'].astype(str)
# brookings

In [65]:
# merged_df = pd.merge(brookings, house, left_on=['Candidate', 'District'], right_on=['Candidate', 'District Abbrev'], how='inner')
# merged_df
#merged_df.drop(columns =['Unnamed: 0_x', 'Candidate.First.Name', 'Candidate.Last.Name','Candidate.State', 'Candidate.District', 'Candidate.Gender','Listed.military.service.', 'Party.Category.1','Incumbency',  'Democrat', 'Republican', 'District_x','Unnamed: 0_y', 'State', 'District_y','dist_num'])

In [54]:
# # Assuming you have a list of endorsement column names
# endorsement_columns = endorsement_cols

# def endorsement_effect(data):
#     # Calculating the 'Partisan Lean' percentiles and bins
#     percentiles = data['Partisan Lean'].quantile([0.25, 0.5, 0.75]).tolist()
#     data['Partisan Lean Percentile Bins'] = pd.cut(data['Partisan Lean'], bins=[-np.inf] + percentiles + [np.inf], labels=['0-25%', '26-50%', '51-75%', '76-100%'])

#     # Looping through each endorsement column
#     for endorsement in endorsement_columns:
#         plt.figure(figsize=(12, 8))
#         for i, bin in enumerate(data['Partisan Lean Percentile Bins'].cat.categories, start=1):
#             bin_data = data[data['Partisan Lean Percentile Bins'] == bin]

#             # Calculating success rate for each endorsement status within the bin
#             success_rate_endorsed = bin_data[bin_data[endorsement] == 1]['General Status'].mean()
#             success_rate_not_endorsed = bin_data[bin_data[endorsement] == 0]['General Status'].mean()

#             plt.subplot(2, 2, i)
#             sns.barplot(x=[f'{endorsement[:-1]}', f'Not {endorsement[:-1]}'], y=[success_rate_endorsed, success_rate_not_endorsed])
#             plt.title(f'Success Rate in Partisan Lean Percentile Bin: {bin}')
#             plt.ylabel('Success Rate')

#         plt.suptitle(f'Success Rate Analysis for {endorsement}')
#         plt.tight_layout()
#         plt.show()

In [55]:
# endorsement_effect(house)