# Datasets

In [488]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## 2008-2016 Electoral data

In [489]:
import pandas as pd

# Cargar los archivos CSV
data_2008_2016 = pd.read_csv("data/US_County_Level_Presidential_Results_08-16.csv")
data_2020 = pd.read_csv("data/2020_US_County_Level_Presidential_Results.csv")
data_2024 = pd.read_csv("data/2024_US_County_Level_Presidential_Results.csv")

# Convertir formato fips
data_2008_2016["county_fips"] = data_2008_2016["county_fips"].astype(str).str.zfill(5)
data_2020["county_fips"] = data_2020["county_fips"].astype(str).str.zfill(5)
data_2024["county_fips"] = data_2024["county_fips"].astype(str).str.zfill(5)

# Calcular el porcentaje de votos para cada partido en cada año (2008-2016)
data_2008_2016["per_dem_2008"] = data_2008_2016["dem_2008"] / data_2008_2016["total_2008"] * 100
data_2008_2016["per_gop_2008"] = data_2008_2016["gop_2008"] / data_2008_2016["total_2008"] * 100
data_2008_2016["per_oth_2008"] = data_2008_2016["oth_2008"] / data_2008_2016["total_2008"] * 100

data_2008_2016["per_dem_2012"] = data_2008_2016["dem_2012"] / data_2008_2016["total_2012"] * 100
data_2008_2016["per_gop_2012"] = data_2008_2016["gop_2012"] / data_2008_2016["total_2012"] * 100
data_2008_2016["per_oth_2012"] = data_2008_2016["oth_2012"] / data_2008_2016["total_2012"] * 100

data_2008_2016["per_dem_2016"] = data_2008_2016["dem_2016"] / data_2008_2016["total_2016"] * 100
data_2008_2016["per_gop_2016"] = data_2008_2016["gop_2016"] / data_2008_2016["total_2016"] * 100
data_2008_2016["per_oth_2016"] = data_2008_2016["oth_2016"] / data_2008_2016["total_2016"] * 100

# Calcular el cambio en porcentaje (delta %)
data_2008_2016["delta_per_dem_2012"] = data_2008_2016["per_dem_2012"] - data_2008_2016["per_dem_2008"]
data_2008_2016["delta_per_gop_2012"] = data_2008_2016["per_gop_2012"] - data_2008_2016["per_gop_2008"]
data_2008_2016["delta_per_oth_2012"] = data_2008_2016["per_oth_2012"] - data_2008_2016["per_oth_2008"]

data_2008_2016["delta_per_dem_2016"] = data_2008_2016["per_dem_2016"] - data_2008_2016["per_dem_2012"]
data_2008_2016["delta_per_gop_2016"] = data_2008_2016["per_gop_2016"] - data_2008_2016["per_gop_2012"]
data_2008_2016["delta_per_oth_2016"] = data_2008_2016["per_oth_2016"] - data_2008_2016["per_oth_2012"]

# Ordenar los datos por "county_fips"
data_2008_2016 = data_2008_2016.sort_values(by="county_fips")

# Preparar los datos de 2020 y 2024
data_2020 = data_2020[["county_fips", "votes_dem", "votes_gop", "total_votes", "per_dem", "per_gop"]]
data_2024 = data_2024[["county_fips", "votes_dem", "votes_gop", "total_votes", "per_dem", "per_gop"]]

# Convertir los valores de per_dem y per_gop de 0-1 a porcentaje (0-100)
data_2020["per_dem_2020"] = data_2020["per_dem"] * 100
data_2020["per_gop_2020"] = data_2020["per_gop"] * 100
data_2024["per_dem_2024"] = data_2024["per_dem"] * 100
data_2024["per_gop_2024"] = data_2024["per_gop"] * 100

# Calcular el porcentaje de otros partidos en 2020 y 2024
data_2020["per_oth_2020"] = 100 - data_2020["per_dem_2020"] - data_2020["per_gop_2020"]
data_2024["per_oth_2024"] = 100 - data_2024["per_dem_2024"] - data_2024["per_gop_2024"]

# Unir los datos de 2020 y 2024 con los de 2008-2016 usando "county_fips"
merged_data = data_2008_2016.merge(data_2020, on="county_fips", how="left")
merged_data = merged_data.merge(data_2024, on="county_fips", how="left")

# Calcular el cambio en porcentaje entre 2016 y 2020
merged_data["delta_per_dem_2020"] = merged_data["per_dem_2020"] - merged_data["per_dem_2016"]
merged_data["delta_per_gop_2020"] = merged_data["per_gop_2020"] - merged_data["per_gop_2016"]
merged_data["delta_per_oth_2020"] = merged_data["per_oth_2020"] - merged_data["per_oth_2016"]

# Calcular el cambio en porcentaje entre 2020 y 2024
merged_data["delta_per_dem_2024"] = merged_data["per_dem_2024"] - merged_data["per_dem_2020"]
merged_data["delta_per_gop_2024"] = merged_data["per_gop_2024"] - merged_data["per_gop_2020"]
merged_data["delta_per_oth_2024"] = merged_data["per_oth_2024"] - merged_data["per_oth_2020"]

### Incluir cambios en los condados de Connecticut (efectivos en 2024)

In [490]:
# Volver a definir el diccionario de mapeo de FIPS y nombres de condados en Connecticut
fips_mapping = {
    "09001": ("09110", "Capitol Planning Region"),
    "09003": ("09120", "Greater Bridgeport Planning Region"),
    "09005": ("09130", "Lower Connecticut River Valley Planning Region"),
    "09007": ("09140", "Naugatuck Valley Planning Region"),
    "09009": ("09150", "Northeastern Connecticut Planning Region"),
    "09011": ("09160", "Northwest Hills Planning Region"),
    "09013": ("09170", "South Central Connecticut Planning Region"),
    "09015": ("09180", "Southeastern Connecticut Planning Region"),
}

# Actualizar los valores de 'county_fips' y 'county' en el dataframe
for old_fips, (new_fips, new_county) in fips_mapping.items():
    merged_data.loc[merged_data['county_fips'] == old_fips, ['county_fips', 'county_name']] = [new_fips, new_county]

# Mostrar la tabla actualizada
# data_2016[data_2016['state'] == "CT"]

In [491]:
# Guardar los resultados en un nuevo archivo
output_path = "data/2008_2020_delta_pct_US_County_Level_Presidential_Results.csv"
merged_data.to_csv(output_path, index=False)

print(f"Archivo guardado en: {output_path}")
merged_data

Archivo guardado en: data/2008_2020_delta_pct_US_County_Level_Presidential_Results.csv


Unnamed: 0,county_fips,county_name,total_2008,dem_2008,gop_2008,oth_2008,total_2012,dem_2012,gop_2012,oth_2012,...,per_gop_y,per_dem_2024,per_gop_2024,per_oth_2024,delta_per_dem_2020,delta_per_gop_2020,delta_per_oth_2020,delta_per_dem_2024,delta_per_gop_2024,delta_per_oth_2024
0,01001,Autauga County,23641,6093,17403,145,23909,6354,17366,189,...,0.726641,26.388790,72.664065,0.947144,3.061510,-1.998987,-1.062523,-0.629575,1.227263,-0.597688
1,01003,Baldwin County,81413,19386,61271,756,84988,18329,65772,887,...,0.786467,20.469920,78.646723,0.883357,2.843720,-1.180099,-1.663621,-1.939110,2.475350,-0.536240
2,01005,Barbour County,11630,5697,5866,67,11459,5873,5539,47,...,0.570179,42.290480,57.017901,0.691619,-0.872078,1.179812,-0.307734,-3.497693,3.566674,-0.068982
3,01007,Bibb County,8644,2299,6262,83,8391,2200,6131,60,...,0.819392,17.519749,81.939184,0.541067,-0.723759,1.460100,-0.736341,-3.178531,3.512920,-0.334389
4,01009,Blount County,24267,3522,20389,356,23980,2961,20741,278,...,0.901796,9.162369,90.179619,0.658012,1.099476,-0.280322,-0.819153,-0.407009,0.608067,-0.201057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,56037,Sweetwater County,16703,5762,10360,581,16750,4773,11427,550,...,0.756895,22.517955,75.689541,1.792504,3.621362,0.712563,-4.333925,-0.508004,2.034177,-1.526173
3108,56039,Teton County,12316,7472,4565,279,11356,6211,4858,287,...,0.316128,66.896077,31.612755,1.491168,7.037406,-2.617592,-4.419814,-0.202104,2.035866,-1.833762
3109,56041,Uinta County,8383,2317,5763,303,8453,1628,6613,212,...,0.810552,17.375334,81.055209,1.569457,1.995817,3.308992,-5.304809,0.453402,1.327492,-1.780894
3110,56043,Washakie County,4089,1042,2956,91,3911,794,3013,104,...,0.813590,17.078886,81.359021,1.562093,1.905998,2.524345,-4.430343,0.852565,0.476668,-1.329233


## 2016

In [492]:
CENSUS_YEAR = 2015
ELECTORAL_YEAR = CENSUS_YEAR + 1

### Cargar características socioeconómicas y demográficas por condado

📊 Los datos socioeconómicos y demográficos son de elaboración propia y han sido extraídos usando la [`API del Censo de Estados Unidos`](https://www.census.gov/data/developers/data-sets.html) 

In [493]:
# Cargar datos
all_data = pd.read_csv(f"data/census_data_acs_{CENSUS_YEAR}.csv", header=0, sep=",")

# Renombrar columnas
all_data.rename(columns={"state": "state_code"}, inplace=True)
all_data.rename(columns={"county": "county_code"}, inplace=True)

# Filtrar los datos para excluir el estado 02 (Alaska)
all_data = all_data[all_data["state_code"] != 2]

# Filtrar los datos para excluir el estado 72 (Puerto Rico)
all_data = all_data[all_data["state_code"] != 72]

# Mostrar la tabla
all_data

Unnamed: 0,state_code,county_code,pop_total_2015,median_age_2015,median_income_2015,households_median_value_2015,households_avg_size_2015,households_total_2015,households_median_gross_rent_2015,mean_travel_time_2015,...,two_more_races_rate_2015,high_school_rate_2015,bachelors_rate_2015,poverty_rate_2015,households_renter_rate_2015,households_owner_rate_2015,unemployment_rate_2015,public_transport_rate_2015,no_health_insurance_rate_2015,county_fips
0,1,1,55221,37.7,51281.0,141300.0,2.68,20396,883.0,23675,...,1.57,17.86,8.87,12.78,26.08,73.92,7.64,0.04,9.97,1001
1,1,3,195121,42.2,50254.0,169300.0,2.60,74104,879.0,81184,...,1.74,16.53,13.52,13.22,28.48,71.52,7.50,0.06,12.81,1003
2,1,5,26932,38.8,32964.0,92200.0,2.61,9222,579.0,8329,...,1.33,18.29,5.24,23.84,36.41,63.59,17.65,0.13,13.86,1005
3,1,7,22604,38.9,38678.0,102700.0,2.95,7027,651.0,8089,...,1.37,23.59,4.19,15.43,24.89,75.11,8.31,0.19,8.88,1007
4,1,9,57710,40.7,45813.0,119800.0,2.74,20816,601.0,21450,...,1.70,18.75,5.88,16.55,21.10,78.90,7.67,0.15,11.54,1009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56,37,44772,33.6,69022.0,190900.0,2.64,16679,892.0,22118,...,3.35,18.69,8.78,11.30,29.01,70.99,5.41,1.21,13.40,56037
3138,56,39,22311,38.0,75325.0,689000.0,2.63,8187,1113.0,13082,...,1.31,11.71,28.52,7.34,39.50,60.50,2.61,4.87,16.81,56039
3139,56,41,20930,34.6,56569.0,176700.0,2.76,7502,641.0,9551,...,3.54,19.33,8.67,13.59,26.95,73.05,4.40,1.29,13.93,56041
3140,56,43,8400,43.4,47652.0,160800.0,2.34,3512,605.0,3583,...,3.19,17.49,10.39,13.54,26.25,73.75,6.80,0.01,16.60,56043


### Cargar los resultados electorales

📊 Los datos electorales han sido extraídos de la siguiente fuente:
[`US_County_Level_Election_Results_08-20.csv`](https://github.com/tonmcg/US_County_Level_Election_Results_08-20) 

In [494]:
# Cargar datos
d1 = pd.read_csv(f"data/{ELECTORAL_YEAR}_US_County_Level_Presidential_Results.csv")

# Crear nueva columna 'votes_others'
d1['votes_others'] = d1['total_votes'] - (d1['votes_gop'] + d1['votes_dem'])

# Crear nueva columna 'perc votes_others'
d1['perc votes_others'] = 1 - d1['per_dem'] - d1['per_gop']

# Quedarse con columnas específicas
d1 = d1[['state_name', 'county_fips', 'county_name', 'votes_gop', 'votes_dem', 'total_votes', 'votes_others', 'perc votes_others', 'per_dem', 'per_gop']]

# Renombrar columnas
# d1.columns = ["state", "fips", "county", f"Trump_{ELECTORAL_YEAR}", f"Biden_{ELECTORAL_YEAR}", f"Total_{ELECTORAL_YEAR}", f"Other_{ELECTORAL_YEAR}", f"Perc_Other_{ELECTORAL_YEAR}", f"Perc_Biden_{ELECTORAL_YEAR}", f"Perc_Trump_{ELECTORAL_YEAR}"]
d1.rename(columns={"state_name": "state"}, inplace=True)
d1.rename(columns={"county_name": "county"}, inplace=True)
d1.rename(columns={"votes_gop": f"votes_gop_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_dem": f"votes_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"total_votes": f"total_votes_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_others": f"votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"perc votes_others": f"per_votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_dem": f"per_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_gop": f"per_gop_{ELECTORAL_YEAR}"}, inplace=True)

# Mostrar la tabla
d1

Unnamed: 0,state,county_fips,county,votes_gop_2016,votes_dem_2016,total_votes_2016,votes_others_2016,per_votes_others_2016,per_dem_2016,per_gop_2016
0,AK,2013,Alaska,130413.0,93003.0,246588.0,23172.0,0.093971,0.377159,0.528870
1,AK,2016,Alaska,130413.0,93003.0,246588.0,23172.0,0.093971,0.377159,0.528870
2,AK,2020,Alaska,130413.0,93003.0,246588.0,23172.0,0.093971,0.377159,0.528870
3,AK,2050,Alaska,130413.0,93003.0,246588.0,23172.0,0.093971,0.377159,0.528870
4,AK,2060,Alaska,130413.0,93003.0,246588.0,23172.0,0.093971,0.377159,0.528870
...,...,...,...,...,...,...,...,...,...,...
3136,WY,56037,Sweetwater County,12153.0,3233.0,16661.0,1275.0,0.076526,0.194046,0.729428
3137,WY,56039,Teton County,3920.0,7313.0,12176.0,943.0,0.077447,0.600608,0.321945
3138,WY,56041,Uinta County,6154.0,1202.0,8053.0,697.0,0.086552,0.149261,0.764187
3139,WY,56043,Washakie County,2911.0,532.0,3715.0,272.0,0.073217,0.143203,0.783580


### Unir los dos datasets

In [495]:
# Unir los dos datasets
data_2016 = pd.merge(d1, all_data, how='right', on=["county_fips"])

# Código FIPS tiene que tener 5 dígitos
data_2016['county_fips'] = data_2016['county_fips'].astype(str).str.zfill(5)

# Mostrar la tabla
data_2016

Unnamed: 0,state,county_fips,county,votes_gop_2016,votes_dem_2016,total_votes_2016,votes_others_2016,per_votes_others_2016,per_dem_2016,per_gop_2016,...,hispanic_rate_2015,two_more_races_rate_2015,high_school_rate_2015,bachelors_rate_2015,poverty_rate_2015,households_renter_rate_2015,households_owner_rate_2015,unemployment_rate_2015,public_transport_rate_2015,no_health_insurance_rate_2015
0,AL,01001,Autauga County,18110.0,5908.0,24661.0,643.0,0.026074,0.239569,0.734358,...,2.61,1.57,17.86,8.87,12.78,26.08,73.92,7.64,0.04,9.97
1,AL,01003,Baldwin County,72780.0,18409.0,94090.0,2901.0,0.030832,0.195653,0.773515,...,4.50,1.74,16.53,13.52,13.22,28.48,71.52,7.50,0.06,12.81
2,AL,01005,Barbour County,5431.0,4848.0,10390.0,111.0,0.010683,0.466603,0.522714,...,4.61,1.33,18.29,5.24,23.84,36.41,63.59,17.65,0.13,13.86
3,AL,01007,Bibb County,6733.0,1874.0,8748.0,141.0,0.016118,0.214220,0.769662,...,2.22,1.37,23.59,4.19,15.43,24.89,75.11,8.31,0.19,8.88
4,AL,01009,Blount County,22808.0,2150.0,25384.0,426.0,0.016782,0.084699,0.898519,...,8.63,1.70,18.75,5.88,16.55,21.10,78.90,7.67,0.15,11.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3108,WY,56037,Sweetwater County,12153.0,3233.0,16661.0,1275.0,0.076526,0.194046,0.729428,...,15.76,3.35,18.69,8.78,11.30,29.01,70.99,5.41,1.21,13.40
3109,WY,56039,Teton County,3920.0,7313.0,12176.0,943.0,0.077447,0.600608,0.321945,...,15.14,1.31,11.71,28.52,7.34,39.50,60.50,2.61,4.87,16.81
3110,WY,56041,Uinta County,6154.0,1202.0,8053.0,697.0,0.086552,0.149261,0.764187,...,8.88,3.54,19.33,8.67,13.59,26.95,73.05,4.40,1.29,13.93
3111,WY,56043,Washakie County,2911.0,532.0,3715.0,272.0,0.073217,0.143203,0.783580,...,13.99,3.19,17.49,10.39,13.54,26.25,73.75,6.80,0.01,16.60


### Incluir cambios en los condados de Connecticut (efectivos en 2024)

In [496]:
# Volver a definir el diccionario de mapeo de FIPS y nombres de condados en Connecticut
fips_mapping = {
    "09001": ("09110", "Capitol Planning Region"),
    "09003": ("09120", "Greater Bridgeport Planning Region"),
    "09005": ("09130", "Lower Connecticut River Valley Planning Region"),
    "09007": ("09140", "Naugatuck Valley Planning Region"),
    "09009": ("09150", "Northeastern Connecticut Planning Region"),
    "09011": ("09160", "Northwest Hills Planning Region"),
    "09013": ("09170", "South Central Connecticut Planning Region"),
    "09015": ("09180", "Southeastern Connecticut Planning Region"),
}

# Actualizar los valores de 'county_fips' y 'county' en el dataframe
for old_fips, (new_fips, new_county) in fips_mapping.items():
    data_2016.loc[data_2016['county_fips'] == old_fips, ['county_fips', 'county']] = [new_fips, new_county]

# Mostrar la tabla actualizada
# data_2016[data_2016['state'] == "CT"]

### Calcular el ganador en cada condado

<div style="border-left: 4px solid #0074cc; background-color: #e6f4ff; padding: 10px;">
    <strong>Nota:</strong> Por simplicidad, para calcular el ganador en cada condado asumimos que si hay un empate el ganador es Biden. En cualquier otro caso, el ganador será el que haya obtenido más del 50% de los votos.
</div>


In [497]:
data_2016[f'winner_{ELECTORAL_YEAR}'] = data_2016[f'per_dem_{ELECTORAL_YEAR}'].apply(lambda x: "dem" if x >= 0.50 else "gop")

### Valores ausentes (NAs)

In [498]:
missing_fips_2016 = data_2016[data_2016.isna().any(axis=1)]['county_fips'].tolist()
missing_fips_2016

['15005', '32029', '46102', '48261', '48301']

In [499]:
data_2016 = data_2016.dropna(how="any")
data_2016[data_2016.isna().any(axis=1)]

Unnamed: 0,state,county_fips,county,votes_gop_2016,votes_dem_2016,total_votes_2016,votes_others_2016,per_votes_others_2016,per_dem_2016,per_gop_2016,...,two_more_races_rate_2015,high_school_rate_2015,bachelors_rate_2015,poverty_rate_2015,households_renter_rate_2015,households_owner_rate_2015,unemployment_rate_2015,public_transport_rate_2015,no_health_insurance_rate_2015,winner_2016


### Añadir columnas de delta (2016 - 2012)

In [500]:
# Seleccionar las columnas de delta de 2016
delta_cols_2016 = ["county_fips", "delta_per_dem_2016", "delta_per_gop_2016", "delta_per_oth_2016"]

# Unir los datos por "county_fips"
data_2016 = data_2016.merge(merged_data[delta_cols_2016], on="county_fips", how="left")

# Verificar los valores NaN en las columnas añadidas
missing_values = data_2016[["delta_per_dem_2016", "delta_per_gop_2016", "delta_per_oth_2016"]].isna().sum()

print(missing_values)

delta_per_dem_2016    0
delta_per_gop_2016    0
delta_per_oth_2016    0
dtype: int64


### Quitar "_2016" del nombre de las variables

In [501]:
# Renombrar columnas para quitar el sufijo "_{CENSUS_YEAR}" o "_{ELECTORAL_YEAR}"
data_2016.columns = [col.replace(f'_{CENSUS_YEAR}', '').replace(f'_{ELECTORAL_YEAR}', '') for col in data_2016.columns]

In [502]:
data_2016

Unnamed: 0,state,county_fips,county,votes_gop,votes_dem,total_votes,votes_others,per_votes_others,per_dem,per_gop,...,poverty_rate,households_renter_rate,households_owner_rate,unemployment_rate,public_transport_rate,no_health_insurance_rate,winner,delta_per_dem,delta_per_gop,delta_per_oth
0,AL,01001,Autauga County,18110.0,5908.0,24661.0,643.0,0.026074,0.239569,0.734358,...,12.78,26.08,73.92,7.64,0.04,9.97,gop,-2.618911,0.802053,1.816858
1,AL,01003,Baldwin County,72780.0,18409.0,94090.0,2901.0,0.030832,0.195653,0.773515,...,13.22,28.48,71.52,7.50,0.06,12.81,gop,-2.001264,-0.038277,2.039541
2,AL,01005,Barbour County,5431.0,4848.0,10390.0,111.0,0.010683,0.466603,0.522714,...,23.84,36.41,63.59,17.65,0.13,13.86,gop,-4.592041,3.933864,0.658177
3,AL,01007,Bibb County,6733.0,1874.0,8748.0,141.0,0.016118,0.214220,0.769662,...,15.43,24.89,75.11,8.31,0.19,8.88,gop,-4.796528,3.899783,0.896745
4,AL,01009,Blount County,22808.0,2150.0,25384.0,426.0,0.016782,0.084699,0.898519,...,16.55,21.10,78.90,7.67,0.15,11.54,gop,-3.877888,3.358964,0.518923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,WY,56037,Sweetwater County,12153.0,3233.0,16661.0,1275.0,0.076526,0.194046,0.729428,...,11.30,29.01,70.99,5.41,1.21,13.40,gop,-9.090925,4.721905,4.369020
3104,WY,56039,Teton County,3920.0,7313.0,12176.0,943.0,0.077447,0.600608,0.321945,...,7.34,39.50,60.50,2.61,4.87,16.81,dem,5.367221,-10.584667,5.217445
3105,WY,56041,Uinta County,6154.0,1202.0,8053.0,697.0,0.086552,0.149261,0.764187,...,13.59,26.95,73.05,4.40,1.29,13.93,gop,-4.333320,-1.813854,6.147174
3106,WY,56043,Washakie County,2911.0,532.0,3715.0,272.0,0.073217,0.143203,0.783580,...,13.54,26.25,73.75,6.80,0.01,16.60,gop,-5.981390,1.318888,4.662502


## 2020

In [503]:
CENSUS_YEAR = 2019
ELECTORAL_YEAR = CENSUS_YEAR + 1

### Cargar características socioeconómicas y demográficas por condado

📊 Los datos socioeconómicos y demográficos son de elaboración propia y han sido extraídos usando la [`API del Censo de Estados Unidos`](https://www.census.gov/data/developers/data-sets.html) 

In [504]:
# Cargar datos
all_data = pd.read_csv(f"data/census_data_acs_{CENSUS_YEAR}.csv", header=0, sep=",")

# Renombrar columnas
all_data.rename(columns={"state": "state_code"}, inplace=True)
all_data.rename(columns={"county": "county_code"}, inplace=True)

# Filtrar los datos para excluir el estado 02 (Alaska)
all_data = all_data[all_data["state_code"] != 2]

# Filtrar los datos para excluir el estado 72 (Puerto Rico)
all_data = all_data[all_data["state_code"] != 72]

# Mostrar la tabla
all_data

Unnamed: 0,state_code,county_code,pop_total_2019,median_age_2019,median_income_2019,households_median_value_2019,households_avg_size_2019,households_total_2019,households_median_gross_rent_2019,mean_travel_time_2019,...,two_more_races_rate_2019,high_school_rate_2019,bachelors_rate_2019,poverty_rate_2019,households_renter_rate_2019,households_owner_rate_2019,unemployment_rate_2019,public_transport_rate_2019,no_health_insurance_rate_2019,county_fips
0,1,1,55380,38.2,58731,154500,2.56,21397,986,23796,...,2.16,19.21,10.87,15.06,26.71,73.29,3.68,0.29,6.96,1001
1,1,3,212830,43.0,58320,197900,2.59,80930,1020,87084,...,1.69,15.90,14.94,10.20,24.75,75.25,4.26,0.02,8.78,1003
2,1,5,25361,40.4,32525,90700,2.41,9345,576,8108,...,1.20,20.10,5.39,27.11,39.10,60.90,9.17,0.11,10.03,1005
3,1,7,22493,40.9,47542,92800,2.99,6891,734,7910,...,0.63,25.35,4.64,16.63,25.58,74.42,7.28,0.00,9.79,1007
4,1,9,57681,40.7,49358,127800,2.74,20847,667,21361,...,1.59,18.57,5.95,13.42,21.22,78.78,3.36,0.05,10.68,1009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56,37,43521,35.3,74843,205600,2.77,15523,861,20717,...,2.27,16.58,9.50,11.31,23.78,76.22,6.09,1.41,11.18,56037
3138,56,39,23280,39.3,84678,866600,2.47,9019,1376,13079,...,0.65,10.19,28.98,6.13,39.59,60.41,1.08,2.12,12.71,56039
3139,56,41,20479,35.8,63403,175000,2.66,7597,698,8960,...,3.55,21.49,7.00,11.22,22.94,77.06,6.28,1.47,11.09,56041
3140,56,43,8027,42.9,54158,165800,2.34,3365,640,3512,...,3.79,15.72,10.64,10.28,23.51,76.49,3.92,0.00,14.73,56043


### Cargar los resultados electorales

📊 Los datos electorales han sido extraídos de la siguiente fuente:
[`US_County_Level_Election_Results_08-20.csv`](https://github.com/tonmcg/US_County_Level_Election_Results_08-20) 

In [505]:
# Cargar datos
d1 = pd.read_csv(f"data/{ELECTORAL_YEAR}_US_County_Level_Presidential_Results.csv")

# Crear nueva columna 'votes_others'
d1['votes_others'] = d1['total_votes'] - (d1['votes_gop'] + d1['votes_dem'])

# Crear nueva columna 'perc votes_others'
d1['perc votes_others'] = 1 - d1['per_dem'] - d1['per_gop']

# Quedarse con columnas específicas
d1 = d1[['state_name', 'county_fips', 'county_name', 'votes_gop', 'votes_dem', 'total_votes', 'votes_others', 'perc votes_others', 'per_dem', 'per_gop']]

# Renombrar columnas
# d1.columns = ["state", "fips", "county", f"Trump_{ELECTORAL_YEAR}", f"Biden_{ELECTORAL_YEAR}", f"Total_{ELECTORAL_YEAR}", f"Other_{ELECTORAL_YEAR}", f"Perc_Other_{ELECTORAL_YEAR}", f"Perc_Biden_{ELECTORAL_YEAR}", f"Perc_Trump_{ELECTORAL_YEAR}"]
d1.rename(columns={"state_name": "state"}, inplace=True)
d1.rename(columns={"county_name": "county"}, inplace=True)
d1.rename(columns={"votes_gop": f"votes_gop_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_dem": f"votes_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"total_votes": f"total_votes_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_others": f"votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"perc votes_others": f"per_votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_dem": f"per_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_gop": f"per_gop_{ELECTORAL_YEAR}"}, inplace=True)

# Mostrar la tabla
d1

Unnamed: 0,state,county_fips,county,votes_gop_2020,votes_dem_2020,total_votes_2020,votes_others_2020,per_votes_others_2020,per_dem_2020,per_gop_2020
0,Alabama,1001,Autauga County,19838,7503,27770,429,0.015448,0.270184,0.714368
1,Alabama,1003,Baldwin County,83544,24578,109679,1557,0.014196,0.224090,0.761714
2,Alabama,1005,Barbour County,5622,4816,10518,80,0.007606,0.457882,0.534512
3,Alabama,1007,Bibb County,7525,1986,9595,84,0.008755,0.206983,0.784263
4,Alabama,1009,Blount County,24711,2640,27588,237,0.008591,0.095694,0.895716
...,...,...,...,...,...,...,...,...,...,...
3147,Wyoming,56037,Sweetwater County,12229,3823,16603,551,0.033187,0.230260,0.736554
3148,Wyoming,56039,Teton County,4341,9848,14677,488,0.033249,0.670982,0.295769
3149,Wyoming,56041,Uinta County,7496,1591,9402,315,0.033504,0.169219,0.797277
3150,Wyoming,56043,Washakie County,3245,651,4012,116,0.028913,0.162263,0.808824


### Unir los dos datasets

In [506]:
# Unir los dos datasets
data_2020 = pd.merge(d1, all_data, how='right', on=["county_fips"])

# Código FIPS tiene que tener 5 dígitos
data_2020['county_fips'] = data_2020['county_fips'].astype(str).str.zfill(5)

# Mostrar la tabla
data_2020

Unnamed: 0,state,county_fips,county,votes_gop_2020,votes_dem_2020,total_votes_2020,votes_others_2020,per_votes_others_2020,per_dem_2020,per_gop_2020,...,hispanic_rate_2019,two_more_races_rate_2019,high_school_rate_2019,bachelors_rate_2019,poverty_rate_2019,households_renter_rate_2019,households_owner_rate_2019,unemployment_rate_2019,public_transport_rate_2019,no_health_insurance_rate_2019
0,Alabama,01001,Autauga County,19838.0,7503.0,27770.0,429.0,0.015448,0.270184,0.714368,...,2.83,2.16,19.21,10.87,15.06,26.71,73.29,3.68,0.29,6.96
1,Alabama,01003,Baldwin County,83544.0,24578.0,109679.0,1557.0,0.014196,0.224090,0.761714,...,4.56,1.69,15.90,14.94,10.20,24.75,75.25,4.26,0.02,8.78
2,Alabama,01005,Barbour County,5622.0,4816.0,10518.0,80.0,0.007606,0.457882,0.534512,...,4.36,1.20,20.10,5.39,27.11,39.10,60.90,9.17,0.11,10.03
3,Alabama,01007,Bibb County,7525.0,1986.0,9595.0,84.0,0.008755,0.206983,0.784263,...,2.57,0.63,25.35,4.64,16.63,25.58,74.42,7.28,0.00,9.79
4,Alabama,01009,Blount County,24711.0,2640.0,27588.0,237.0,0.008591,0.095694,0.895716,...,9.26,1.59,18.57,5.95,13.42,21.22,78.78,3.36,0.05,10.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3108,Wyoming,56037,Sweetwater County,12229.0,3823.0,16603.0,551.0,0.033187,0.230260,0.736554,...,15.88,2.27,16.58,9.50,11.31,23.78,76.22,6.09,1.41,11.18
3109,Wyoming,56039,Teton County,4341.0,9848.0,14677.0,488.0,0.033249,0.670982,0.295769,...,15.03,0.65,10.19,28.98,6.13,39.59,60.41,1.08,2.12,12.71
3110,Wyoming,56041,Uinta County,7496.0,1591.0,9402.0,315.0,0.033504,0.169219,0.797277,...,9.13,3.55,21.49,7.00,11.22,22.94,77.06,6.28,1.47,11.09
3111,Wyoming,56043,Washakie County,3245.0,651.0,4012.0,116.0,0.028913,0.162263,0.808824,...,14.23,3.79,15.72,10.64,10.28,23.51,76.49,3.92,0.00,14.73


### Incluir cambios en los condados de Connecticut (efectivos en 2024)
https://appliedgeographic.com/2023/04/changing-of-the-counties/

In [507]:
# Volver a definir el diccionario de mapeo de FIPS y nombres de condados en Connecticut
fips_mapping = {
    "09001": ("09110", "Capitol Planning Region"),
    "09003": ("09120", "Greater Bridgeport Planning Region"),
    "09005": ("09130", "Lower Connecticut River Valley Planning Region"),
    "09007": ("09140", "Naugatuck Valley Planning Region"),
    "09009": ("09150", "Northeastern Connecticut Planning Region"),
    "09011": ("09160", "Northwest Hills Planning Region"),
    "09013": ("09170", "South Central Connecticut Planning Region"),
    "09015": ("09180", "Southeastern Connecticut Planning Region"),
}

# Actualizar los valores de 'county_fips' y 'county' en el dataframe
for old_fips, (new_fips, new_county) in fips_mapping.items():
    data_2020.loc[data_2020['county_fips'] == old_fips, ['county_fips', 'county']] = [new_fips, new_county]

# Mostrar la tabla actualizada
data_2020[data_2020['state'] == "Connecticut"]

Unnamed: 0,state,county_fips,county,votes_gop_2020,votes_dem_2020,total_votes_2020,votes_others_2020,per_votes_others_2020,per_dem_2020,per_gop_2020,...,hispanic_rate_2019,two_more_races_rate_2019,high_school_rate_2019,bachelors_rate_2019,poverty_rate_2019,households_renter_rate_2019,households_owner_rate_2019,unemployment_rate_2019,public_transport_rate_2019,no_health_insurance_rate_2019
279,Connecticut,9110,Capitol Planning Region,169039.0,297505.0,472990.0,6446.0,0.013628,0.628988,0.357384,...,19.7,3.15,13.15,18.24,8.72,32.95,67.05,6.56,4.98,8.35
280,Connecticut,9120,Greater Bridgeport Planning Region,159024.0,283368.0,449336.0,6944.0,0.015454,0.630637,0.353909,...,18.03,3.57,16.12,14.88,10.54,35.89,64.11,5.84,1.57,4.0
281,Connecticut,9130,Lower Connecticut River Valley Planning Region,55601.0,50164.0,107544.0,1779.0,0.016542,0.466451,0.517007,...,6.28,2.18,19.33,15.18,6.82,23.51,76.49,4.98,0.74,3.84
282,Connecticut,9140,Naugatuck Valley Planning Region,40665.0,56848.0,99203.0,1690.0,0.017036,0.573047,0.409917,...,6.19,2.06,17.03,17.27,6.71,26.48,73.52,4.65,0.79,3.17
283,Connecticut,9150,Northeastern Connecticut Planning Region,169892.0,242629.0,417980.0,5459.0,0.01306,0.58048,0.40646,...,18.11,3.25,19.1,12.96,11.38,38.15,61.85,6.44,1.88,4.94
284,Connecticut,9160,Northwest Hills Planning Region,57110.0,79459.0,139604.0,3035.0,0.02174,0.569174,0.409086,...,10.55,5.34,17.91,12.9,8.99,33.63,66.37,5.93,0.74,3.84
285,Connecticut,9170,South Central Connecticut Planning Region,34819.0,44006.0,80546.0,1721.0,0.021367,0.546346,0.432287,...,5.52,2.37,14.92,15.06,6.62,28.24,71.76,4.89,0.95,2.52
286,Connecticut,9180,Southeastern Connecticut Planning Region,29141.0,26701.0,57077.0,1235.0,0.021637,0.467807,0.510556,...,11.8,3.7,19.49,10.33,10.88,31.03,68.97,5.54,0.29,4.06


### Calcular el ganador en cada condado

<div style="border-left: 4px solid #0074cc; background-color: #e6f4ff; padding: 10px;">
    <strong>Nota:</strong> Por simplicidad, para calcular el ganador en cada condado asumimos que si hay un empate el ganador es Biden. En cualquier otro caso, el ganador será el que haya obtenido más del 50% de los votos.
</div>


In [508]:
data_2020[f'winner_{ELECTORAL_YEAR}'] = data_2020[f'per_dem_{ELECTORAL_YEAR}'].apply(lambda x: "dem" if x >= 0.50 else "gop")

### Valores ausentes (NAs)

In [509]:
missing_fips_2020 = data_2020[data_2020.isna().any(axis=1)]['county_fips'].tolist()
missing_fips_2020

['15005']

In [510]:
data_2020 = data_2020.dropna(how="any")
data_2020[data_2020.isna().any(axis=1)]

Unnamed: 0,state,county_fips,county,votes_gop_2020,votes_dem_2020,total_votes_2020,votes_others_2020,per_votes_others_2020,per_dem_2020,per_gop_2020,...,two_more_races_rate_2019,high_school_rate_2019,bachelors_rate_2019,poverty_rate_2019,households_renter_rate_2019,households_owner_rate_2019,unemployment_rate_2019,public_transport_rate_2019,no_health_insurance_rate_2019,winner_2020


### Añadir columnas de delta (2020 - 2016)

In [511]:
# Seleccionar las columnas de delta de 2020
delta_cols_2020 = ["county_fips", "delta_per_dem_2020", "delta_per_gop_2020", "delta_per_oth_2020"]

# Unir los datos por "county_fips"
data_2020 = data_2020.merge(merged_data[delta_cols_2020], on="county_fips", how="left")

In [512]:
# Identificar los county_fips con valores NaN
missing_fips = data_2020[data_2020["delta_per_dem_2020"].isna()]["county_fips"].iloc[0]
print(missing_fips)

missing_fips_2020.append(missing_fips)
print(missing_fips_2020)


46102
['15005', '46102']


### Quitar "_2020" del nombre de las variables

In [513]:
# Renombrar columnas para quitar el sufijo "_{CENSUS_YEAR}" o "_{ELECTORAL_YEAR}"
data_2020.columns = [col.replace(f'_{CENSUS_YEAR}', '').replace(f'_{ELECTORAL_YEAR}', '') for col in data_2020.columns]

## 2024

In [514]:
CENSUS_YEAR = 2023
ELECTORAL_YEAR = CENSUS_YEAR + 1

### Cargar características socioeconómicas y demográficas por condado

📊 Los datos socioeconómicos y demográficos son de elaboración propia y han sido extraídos usando la [`API del Censo de Estados Unidos`](https://www.census.gov/data/developers/data-sets.html) 

In [515]:
# Cargar datos
all_data = pd.read_csv(f"data/census_data_acs_{CENSUS_YEAR}.csv", header=0, sep=",")

# Renombrar columnas
all_data.rename(columns={"state": "state_code"}, inplace=True)
all_data.rename(columns={"county": "county_code"}, inplace=True)

# Filtrar los datos para excluir el estado 02 (Alaska)
all_data = all_data[all_data["state_code"] != 2]

# Filtrar los datos para excluir el estado 72 (Puerto Rico)
all_data = all_data[all_data["state_code"] != 72]

# Mostrar la tabla
all_data

Unnamed: 0,state_code,county_code,pop_total_2023,median_age_2023,median_income_2023,households_median_value_2023,households_avg_size_2023,households_total_2023,households_median_gross_rent_2023,mean_travel_time_2023,...,two_more_races_rate_2023,high_school_rate_2023,bachelors_rate_2023,poverty_rate_2023,households_renter_rate_2023,households_owner_rate_2023,unemployment_rate_2023,public_transport_rate_2023,no_health_insurance_rate_2023,county_fips
0,1,1,59285,39.2,69841,197900,2.61,22523,1200,25415,...,4.59,18.70,10.99,10.58,25.09,74.91,2.54,0.11,7.20,1001
1,1,3,239945,43.7,75019,287000,2.50,94642,1211,97455,...,5.54,16.15,14.73,10.34,22.46,77.54,3.19,0.04,8.06,1003
2,1,5,24757,40.7,44290,109900,2.39,9080,644,7989,...,2.95,21.48,4.76,19.17,32.51,67.49,5.71,0.07,9.49,1005
3,1,7,22152,41.3,51215,132600,2.74,7571,802,7999,...,2.82,24.60,5.06,19.22,22.82,77.18,9.98,0.04,7.81,1007
4,1,9,59292,40.9,61096,169700,2.67,21977,743,23976,...,5.15,19.58,6.89,13.95,20.49,79.51,5.84,0.00,10.11,1009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3139,56,37,41786,37.4,76464,242900,2.45,16863,913,19567,...,9.26,15.46,8.73,13.25,26.02,73.98,5.79,1.14,12.85,56037
3140,56,39,23358,40.1,112681,1371900,2.39,9383,1758,12293,...,6.47,11.62,31.24,6.99,38.47,61.53,2.45,1.31,10.00,56039
3141,56,41,20605,36.7,82672,255400,2.73,7479,882,8936,...,6.18,18.53,10.02,7.44,23.47,76.53,3.51,0.44,10.85,56041
3142,56,43,7708,44.2,62648,188600,2.21,3417,725,3482,...,13.84,19.30,10.57,8.45,25.81,74.19,2.03,0.01,12.22,56043


### Cargar los resultados electorales

📊 Los datos electorales han sido extraídos de la siguiente fuente:
[`US_County_Level_Election_Results_08-20.csv`](https://github.com/tonmcg/US_County_Level_Election_Results_08-20) 

In [516]:
# Cargar datos
d1 = pd.read_csv(f"data/{ELECTORAL_YEAR}_US_County_Level_Presidential_Results.csv")

# Crear nueva columna 'votes_others'
d1['votes_others'] = d1['total_votes'] - (d1['votes_gop'] + d1['votes_dem'])

# Crear nueva columna 'perc votes_others'
d1['perc votes_others'] = 1 - d1['per_dem'] - d1['per_gop']

# Quedarse con columnas específicas
d1 = d1[['state_name', 'county_fips', 'county_name', 'votes_gop', 'votes_dem', 'total_votes', 'votes_others', 'perc votes_others', 'per_dem', 'per_gop']]

# Renombrar columnas
# d1.columns = ["state", "fips", "county", f"Trump_{ELECTORAL_YEAR}", f"Biden_{ELECTORAL_YEAR}", f"Total_{ELECTORAL_YEAR}", f"Other_{ELECTORAL_YEAR}", f"Perc_Other_{ELECTORAL_YEAR}", f"Perc_Biden_{ELECTORAL_YEAR}", f"Perc_Trump_{ELECTORAL_YEAR}"]
d1.rename(columns={"state_name": "state"}, inplace=True)
d1.rename(columns={"county_name": "county"}, inplace=True)
d1.rename(columns={"votes_gop": f"votes_gop_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_dem": f"votes_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"total_votes": f"total_votes_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"votes_others": f"votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"perc votes_others": f"per_votes_others_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_dem": f"per_dem_{ELECTORAL_YEAR}"}, inplace=True)
d1.rename(columns={"per_gop": f"per_gop_{ELECTORAL_YEAR}"}, inplace=True)

# Mostrar la tabla
d1

Unnamed: 0,state,county_fips,county,votes_gop_2024,votes_dem_2024,total_votes_2024,votes_others_2024,per_votes_others_2024,per_dem_2024,per_gop_2024
0,Alabama,1001,Autauga County,20484,7439,28190,267,0.009471,0.263888,0.726641
1,Alabama,1003,Baldwin County,95798,24934,121808,1076,0.008834,0.204699,0.786467
2,Alabama,1005,Barbour County,5606,4158,9832,68,0.006916,0.422905,0.570179
3,Alabama,1007,Bibb County,7572,1619,9241,50,0.005411,0.175197,0.819392
4,Alabama,1009,Blount County,25354,2576,28115,185,0.006580,0.091624,0.901796
...,...,...,...,...,...,...,...,...,...,...
3155,Wyoming,56037,Sweetwater County,12541,3731,16569,297,0.017925,0.225180,0.756895
3156,Wyoming,56039,Teton County,4134,8748,13077,195,0.014912,0.668961,0.316128
3157,Wyoming,56041,Uinta County,7282,1561,8984,141,0.015695,0.173753,0.810552
3158,Wyoming,56043,Washakie County,3125,656,3841,60,0.015621,0.170789,0.813590


### Unir los dos datasets

In [517]:
# Unir los dos datasets
data_2024 = pd.merge(d1, all_data, how='right', on=["county_fips"])

# Código FIPS tiene que tener 5 dígitos
data_2024['county_fips'] = data_2024['county_fips'].astype(str).str.zfill(5)

# Mostrar la tabla
data_2024

Unnamed: 0,state,county_fips,county,votes_gop_2024,votes_dem_2024,total_votes_2024,votes_others_2024,per_votes_others_2024,per_dem_2024,per_gop_2024,...,hispanic_rate_2023,two_more_races_rate_2023,high_school_rate_2023,bachelors_rate_2023,poverty_rate_2023,households_renter_rate_2023,households_owner_rate_2023,unemployment_rate_2023,public_transport_rate_2023,no_health_insurance_rate_2023
0,Alabama,01001,Autauga County,20484.0,7439.0,28190.0,267.0,0.009471,0.263888,0.726641,...,3.69,4.59,18.70,10.99,10.58,25.09,74.91,2.54,0.11,7.20
1,Alabama,01003,Baldwin County,95798.0,24934.0,121808.0,1076.0,0.008834,0.204699,0.786467,...,5.58,5.54,16.15,14.73,10.34,22.46,77.54,3.19,0.04,8.06
2,Alabama,01005,Barbour County,5606.0,4158.0,9832.0,68.0,0.006916,0.422905,0.570179,...,6.02,2.95,21.48,4.76,19.17,32.51,67.49,5.71,0.07,9.49
3,Alabama,01007,Bibb County,7572.0,1619.0,9241.0,50.0,0.005411,0.175197,0.819392,...,3.36,2.82,24.60,5.06,19.22,22.82,77.18,9.98,0.04,7.81
4,Alabama,01009,Blount County,25354.0,2576.0,28115.0,185.0,0.006580,0.091624,0.901796,...,10.06,5.15,19.58,6.89,13.95,20.49,79.51,5.84,0.00,10.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,Wyoming,56037,Sweetwater County,12541.0,3731.0,16569.0,297.0,0.017925,0.225180,0.756895,...,16.57,9.26,15.46,8.73,13.25,26.02,73.98,5.79,1.14,12.85
3110,Wyoming,56039,Teton County,4134.0,8748.0,13077.0,195.0,0.014912,0.668961,0.316128,...,14.75,6.47,11.62,31.24,6.99,38.47,61.53,2.45,1.31,10.00
3111,Wyoming,56041,Uinta County,7282.0,1561.0,8984.0,141.0,0.015695,0.173753,0.810552,...,9.91,6.18,18.53,10.02,7.44,23.47,76.53,3.51,0.44,10.85
3112,Wyoming,56043,Washakie County,3125.0,656.0,3841.0,60.0,0.015621,0.170789,0.813590,...,14.10,13.84,19.30,10.57,8.45,25.81,74.19,2.03,0.01,12.22


### Incluir cambios en los condados de Connecticut (efectivos en 2024)
https://appliedgeographic.com/2023/04/changing-of-the-counties/

In [518]:
# Eliminar condado que no está en el dataset de datos del censo de 2016 y 2019
data_2024 = data_2024[data_2024['county_fips'] != "09190"]
data_2024[data_2024['state'] == "Connecticut"]

Unnamed: 0,state,county_fips,county,votes_gop_2024,votes_dem_2024,total_votes_2024,votes_others_2024,per_votes_others_2024,per_dem_2024,per_gop_2024,...,hispanic_rate_2023,two_more_races_rate_2023,high_school_rate_2023,bachelors_rate_2023,poverty_rate_2023,households_renter_rate_2023,households_owner_rate_2023,unemployment_rate_2023,public_transport_rate_2023,no_health_insurance_rate_2023
279,Connecticut,9110,Capitol Planning Region,181038.0,285105.0,474268.0,8125.0,0.017132,0.601147,0.381721,...,17.38,8.93,15.24,15.96,9.96,34.61,65.39,5.26,1.31,4.16
280,Connecticut,9120,Greater Bridgeport Planning Region,51519.0,83461.0,137015.0,2035.0,0.014852,0.609138,0.37601,...,26.06,9.82,14.47,15.98,12.9,34.62,65.38,7.43,2.94,8.33
281,Connecticut,9130,Lower Connecticut River Valley Planning Region,44318.0,58360.0,104480.0,1802.0,0.017247,0.558576,0.424177,...,7.45,7.41,16.34,18.75,6.64,24.77,75.23,3.88,0.62,3.38
282,Connecticut,9140,Naugatuck Valley Planning Region,115290.0,99237.0,217520.0,2993.0,0.01376,0.45622,0.53002,...,19.19,10.97,18.13,13.51,10.21,32.36,67.64,6.1,0.72,4.42
283,Connecticut,9150,Northeastern Connecticut Planning Region,29028.0,21165.0,50962.0,769.0,0.01509,0.415309,0.569601,...,5.05,6.08,22.52,11.04,8.49,23.8,76.2,6.23,0.07,3.69
284,Connecticut,9160,Northwest Hills Planning Region,31944.0,31137.0,64076.0,995.0,0.015528,0.485939,0.498533,...,8.37,6.88,18.25,16.96,9.06,24.14,75.86,5.43,0.63,3.87
285,Connecticut,9170,South Central Connecticut Planning Region,103087.0,156248.0,264854.0,5519.0,0.020838,0.58994,0.389222,...,19.1,9.04,17.55,14.88,11.2,39.44,60.56,5.91,1.54,5.09
286,Connecticut,9180,Southeastern Connecticut Planning Region,58392.0,76146.0,136954.0,2416.0,0.017641,0.555997,0.426362,...,14.67,9.26,17.54,13.58,10.15,34.61,65.39,5.07,0.64,4.37


### Calcular el ganador en cada condado

<div style="border-left: 4px solid #0074cc; background-color: #e6f4ff; padding: 10px;">
    <strong>Nota:</strong> Por simplicidad, para calcular el ganador en cada condado asumimos que si hay un empate el ganador es Biden. En cualquier otro caso, el ganador será el que haya obtenido más del 50% de los votos.
</div>


In [519]:
data_2024[f'winner_{ELECTORAL_YEAR}'] = data_2024[f'per_dem_{ELECTORAL_YEAR}'].apply(lambda x: "dem" if x >= 0.50 else "gop")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_2024[f'winner_{ELECTORAL_YEAR}'] = data_2024[f'per_dem_{ELECTORAL_YEAR}'].apply(lambda x: "dem" if x >= 0.50 else "gop")


### Valores ausentes (NAs)

In [520]:
missing_fips_2024 = data_2024[data_2024.isna().any(axis=1)]['county_fips'].tolist()
missing_fips_2024

['15005']

In [521]:
data_2024 = data_2024.dropna(how="any")
data_2024[data_2024.isna().any(axis=1)]

Unnamed: 0,state,county_fips,county,votes_gop_2024,votes_dem_2024,total_votes_2024,votes_others_2024,per_votes_others_2024,per_dem_2024,per_gop_2024,...,two_more_races_rate_2023,high_school_rate_2023,bachelors_rate_2023,poverty_rate_2023,households_renter_rate_2023,households_owner_rate_2023,unemployment_rate_2023,public_transport_rate_2023,no_health_insurance_rate_2023,winner_2024


### Añadir columnas de delta (2024 - 2020)

In [522]:
# Seleccionar las columnas de delta de 2024
delta_cols_2024 = ["county_fips", "delta_per_dem_2024", "delta_per_gop_2024", "delta_per_oth_2024"]

# Unir los datos por "county_fips"
data_2024 = data_2024.merge(merged_data[delta_cols_2024], on="county_fips", how="left")

In [523]:
# Identificar los county_fips con valores NaN
missing_fips = data_2024[data_2024["delta_per_dem_2024"].isna()]["county_fips"].iloc[0]
print(missing_fips)

missing_fips_2024.append(missing_fips)
print(missing_fips_2024)


09110
['15005', '09110']


### Quitar "_2024" del nombre de las variables

In [524]:
# Renombrar columnas para quitar el sufijo "_{CENSUS_YEAR}" o "_{ELECTORAL_YEAR}"
data_2024.columns = [col.replace(f'_{CENSUS_YEAR}', '').replace(f'_{ELECTORAL_YEAR}', '') for col in data_2024.columns]

## Guardar datos

In [525]:
fips_to_remove = missing_fips_2016 + missing_fips_2020 + missing_fips_2024

data_2016 = data_2016[~data_2016['county_fips'].isin(fips_to_remove)]
data_2020 = data_2020[~data_2020['county_fips'].isin(fips_to_remove)]
data_2024 = data_2024[~data_2024['county_fips'].isin(fips_to_remove)]

In [526]:
# Cargar el diccionario desde el archivo JSON
import json

with open('data/code_to_state.json', 'r') as file:
    code_to_state = json.load(file)

# Usar el diccionario invertido para mapear los códigos a nombres en la columna "state"
data_2016['state'] = data_2016['state'].map(code_to_state)

# Guardar el DataFrame en un archivo CSV
data_2016.to_csv("data/final_data_2016.csv", index=False)

# Mostrar la tabla resultante
data_2016


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_2016['state'] = data_2016['state'].map(code_to_state)


Unnamed: 0,state,county_fips,county,votes_gop,votes_dem,total_votes,votes_others,per_votes_others,per_dem,per_gop,...,poverty_rate,households_renter_rate,households_owner_rate,unemployment_rate,public_transport_rate,no_health_insurance_rate,winner,delta_per_dem,delta_per_gop,delta_per_oth
0,Alabama,01001,Autauga County,18110.0,5908.0,24661.0,643.0,0.026074,0.239569,0.734358,...,12.78,26.08,73.92,7.64,0.04,9.97,gop,-2.618911,0.802053,1.816858
1,Alabama,01003,Baldwin County,72780.0,18409.0,94090.0,2901.0,0.030832,0.195653,0.773515,...,13.22,28.48,71.52,7.50,0.06,12.81,gop,-2.001264,-0.038277,2.039541
2,Alabama,01005,Barbour County,5431.0,4848.0,10390.0,111.0,0.010683,0.466603,0.522714,...,23.84,36.41,63.59,17.65,0.13,13.86,gop,-4.592041,3.933864,0.658177
3,Alabama,01007,Bibb County,6733.0,1874.0,8748.0,141.0,0.016118,0.214220,0.769662,...,15.43,24.89,75.11,8.31,0.19,8.88,gop,-4.796528,3.899783,0.896745
4,Alabama,01009,Blount County,22808.0,2150.0,25384.0,426.0,0.016782,0.084699,0.898519,...,16.55,21.10,78.90,7.67,0.15,11.54,gop,-3.877888,3.358964,0.518923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,Wyoming,56037,Sweetwater County,12153.0,3233.0,16661.0,1275.0,0.076526,0.194046,0.729428,...,11.30,29.01,70.99,5.41,1.21,13.40,gop,-9.090925,4.721905,4.369020
3104,Wyoming,56039,Teton County,3920.0,7313.0,12176.0,943.0,0.077447,0.600608,0.321945,...,7.34,39.50,60.50,2.61,4.87,16.81,dem,5.367221,-10.584667,5.217445
3105,Wyoming,56041,Uinta County,6154.0,1202.0,8053.0,697.0,0.086552,0.149261,0.764187,...,13.59,26.95,73.05,4.40,1.29,13.93,gop,-4.333320,-1.813854,6.147174
3106,Wyoming,56043,Washakie County,2911.0,532.0,3715.0,272.0,0.073217,0.143203,0.783580,...,13.54,26.25,73.75,6.80,0.01,16.60,gop,-5.981390,1.318888,4.662502


In [527]:
# Guardar el dataframe en un archivo CSV
data_2020.to_csv(f"data/final_data_{2020}.csv", index=False)

# Mostrar la tabla
data_2020

Unnamed: 0,state,county_fips,county,votes_gop,votes_dem,total_votes,votes_others,per_votes_others,per_dem,per_gop,...,poverty_rate,households_renter_rate,households_owner_rate,unemployment_rate,public_transport_rate,no_health_insurance_rate,winner,delta_per_dem,delta_per_gop,delta_per_oth
0,Alabama,01001,Autauga County,19838.0,7503.0,27770.0,429.0,0.015448,0.270184,0.714368,...,15.06,26.71,73.29,3.68,0.29,6.96,gop,3.061510,-1.998987,-1.062523
1,Alabama,01003,Baldwin County,83544.0,24578.0,109679.0,1557.0,0.014196,0.224090,0.761714,...,10.20,24.75,75.25,4.26,0.02,8.78,gop,2.843720,-1.180099,-1.663621
2,Alabama,01005,Barbour County,5622.0,4816.0,10518.0,80.0,0.007606,0.457882,0.534512,...,27.11,39.10,60.90,9.17,0.11,10.03,gop,-0.872078,1.179812,-0.307734
3,Alabama,01007,Bibb County,7525.0,1986.0,9595.0,84.0,0.008755,0.206983,0.784263,...,16.63,25.58,74.42,7.28,0.00,9.79,gop,-0.723759,1.460100,-0.736341
4,Alabama,01009,Blount County,24711.0,2640.0,27588.0,237.0,0.008591,0.095694,0.895716,...,13.42,21.22,78.78,3.36,0.05,10.68,gop,1.099476,-0.280322,-0.819153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,Wyoming,56037,Sweetwater County,12229.0,3823.0,16603.0,551.0,0.033187,0.230260,0.736554,...,11.31,23.78,76.22,6.09,1.41,11.18,gop,3.621362,0.712563,-4.333925
3108,Wyoming,56039,Teton County,4341.0,9848.0,14677.0,488.0,0.033249,0.670982,0.295769,...,6.13,39.59,60.41,1.08,2.12,12.71,dem,7.037406,-2.617592,-4.419814
3109,Wyoming,56041,Uinta County,7496.0,1591.0,9402.0,315.0,0.033504,0.169219,0.797277,...,11.22,22.94,77.06,6.28,1.47,11.09,gop,1.995817,3.308992,-5.304809
3110,Wyoming,56043,Washakie County,3245.0,651.0,4012.0,116.0,0.028913,0.162263,0.808824,...,10.28,23.51,76.49,3.92,0.00,14.73,gop,1.905998,2.524345,-4.430343


In [528]:
# Guardar el dataframe en un archivo CSV
data_2024.to_csv(f"data/final_data_{2024}.csv", index=False)

# Mostrar la tabla
data_2024

Unnamed: 0,state,county_fips,county,votes_gop,votes_dem,total_votes,votes_others,per_votes_others,per_dem,per_gop,...,poverty_rate,households_renter_rate,households_owner_rate,unemployment_rate,public_transport_rate,no_health_insurance_rate,winner,delta_per_dem,delta_per_gop,delta_per_oth
0,Alabama,01001,Autauga County,20484.0,7439.0,28190.0,267.0,0.009471,0.263888,0.726641,...,10.58,25.09,74.91,2.54,0.11,7.20,gop,-0.629575,1.227263,-0.597688
1,Alabama,01003,Baldwin County,95798.0,24934.0,121808.0,1076.0,0.008834,0.204699,0.786467,...,10.34,22.46,77.54,3.19,0.04,8.06,gop,-1.939110,2.475350,-0.536240
2,Alabama,01005,Barbour County,5606.0,4158.0,9832.0,68.0,0.006916,0.422905,0.570179,...,19.17,32.51,67.49,5.71,0.07,9.49,gop,-3.497693,3.566674,-0.068982
3,Alabama,01007,Bibb County,7572.0,1619.0,9241.0,50.0,0.005411,0.175197,0.819392,...,19.22,22.82,77.18,9.98,0.04,7.81,gop,-3.178531,3.512920,-0.334389
4,Alabama,01009,Blount County,25354.0,2576.0,28115.0,185.0,0.006580,0.091624,0.901796,...,13.95,20.49,79.51,5.84,0.00,10.11,gop,-0.407009,0.608067,-0.201057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,Wyoming,56037,Sweetwater County,12541.0,3731.0,16569.0,297.0,0.017925,0.225180,0.756895,...,13.25,26.02,73.98,5.79,1.14,12.85,gop,-0.508004,2.034177,-1.526173
3108,Wyoming,56039,Teton County,4134.0,8748.0,13077.0,195.0,0.014912,0.668961,0.316128,...,6.99,38.47,61.53,2.45,1.31,10.00,dem,-0.202104,2.035866,-1.833762
3109,Wyoming,56041,Uinta County,7282.0,1561.0,8984.0,141.0,0.015695,0.173753,0.810552,...,7.44,23.47,76.53,3.51,0.44,10.85,gop,0.453402,1.327492,-1.780894
3110,Wyoming,56043,Washakie County,3125.0,656.0,3841.0,60.0,0.015621,0.170789,0.813590,...,8.45,25.81,74.19,2.03,0.01,12.22,gop,0.852565,0.476668,-1.329233


In [529]:
# Encontrar los `county_fips` comunes en los tres conjuntos de datos
common_fips = set(data_2016["county_fips"]).intersection(data_2020["county_fips"]).intersection(data_2024["county_fips"])
len(common_fips)

3107

In [530]:
# Guardamos las variables en vectores para uso futuro
state_names = data_2024['state']
fips_codes = data_2024['county_fips']
county_names = data_2024['county']

# Crear df para guardarlos
df_vectors = pd.DataFrame({
    'state': state_names,
    'fips': fips_codes,
    'county': county_names
})

# Guardar en archivo CSV
df_vectors.to_csv('data/state_fips_county.csv', index=False)