In [5]:
# Import necessary libraries
import pandas as pd
import requests

# Download FIPS codes from a reliable source
#fips_url = 'https://raw.githubusercontent.com/kjhealy/fips-codes/master/state_and_county_fips_master.csv'
fips_path = '/Users/samyakshrestha/Desktop/ER CASE/Life Expectancy/Data/Modified_fips_by_state_v4.csv'
#fips_df = pd.read_csv(fips_url, dtype={'fips': str})
fips_df = pd.read_csv(fips_path, dtype={'fips': str})

# Ensure the 'fips' column is correctly formatted to 5 digits
fips_df['fips'] = fips_df['fips'].str.zfill(5)

# Preview the FIPS dataframe
print("FIPS DataFrame Preview:")
print(fips_df.head())

# Load your 2019 life expectancy dataset
life_expectancy_path = '/Users/samyakshrestha/Desktop/ER CASE/Life Expectancy/Data/2014 data/Single_year_Dataframe.csv'  # Update this path
life_df = pd.read_csv(life_expectancy_path)

# Preview the life expectancy dataframe
print("\nLife Expectancy DataFrame Preview:")
print(life_df.head())

# Clean and separate 'location_name' into 'County' and 'State' columns
life_df[['County', 'State']] = life_df['location_name'].str.extract(r'^(.*) \((.*)\)$')

# Standardize state names to match FIPS data
life_df['State'] = life_df['State'].str.strip()

# Preview cleaned life expectancy dataframe
print("\nCleaned Life Expectancy DataFrame Preview:")
print(life_df.head())

# Inspect the FIPS dataframe's column names to find the correct ones
print(fips_df.columns)

# Ensure the county names in life_df match the format in fips_df
life_df['County'] = life_df['County'].str.strip()

# Separate 'fips' into 'State_FIPS' and 'County_FIPS'
fips_df['State_FIPS'] = fips_df['fips'].str[:2]  # First two characters for state FIPS
fips_df['County_FIPS'] = fips_df['fips'].str[2:]  # Remaining three characters for county FIPS

state_abbr_to_full = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
    'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
    'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
    'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
    'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
    'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi',
    'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
    'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York',
    'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
    'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
    'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia',
    'WI': 'Wisconsin', 'WY': 'Wyoming'
}

# Correctly map the state abbreviations using the 'state' column
fips_df['state_full'] = fips_df['state'].map(state_abbr_to_full)

# Check the mapping
print(fips_df[['state', 'state_full']].head())

# Merge using the updated 'state_full' and 'name' columns
merged_df = pd.merge(
    life_df,
    fips_df[['State_FIPS', 'County_FIPS', 'name', 'state_full']],
    left_on=['State', 'County'],  # Corresponding to State and County in life_df
    right_on=['state_full', 'name'],  # Match with the updated state_full and name in fips_df
    how='left'
)

# Drop redundant columns if they are no longer needed
merged_df = merged_df.drop(columns=['name', 'state_full'])

# Preview the cleaned DataFrame
print(merged_df.head())

# Fetch Total Female Population Data from ACS API
api_key = '2a58865a16f7670d452bcfcb4a5b767db1ce8973'  # Replace with your actual API key
acs_endpoint = 'https://api.census.gov/data/2014/acs/acs5'

params = {
    'get': 'B01001_026E',  # Total female population
    'for': 'county:*',      # Get data for all counties
    'in': 'state:*',        # In all states
    'key': api_key
}

response = requests.get(acs_endpoint, params=params)

if response.status_code == 200:
    print("\nSuccessfully fetched total female population data from ACS API.")
    female_population_data = response.json()
else:
    print(f"\nFailed to fetch data. Status code: {response.status_code}")
    exit()

# Convert the API Response to a DataFrame
female_population_df = pd.DataFrame(columns=female_population_data[0], data=female_population_data[1:])

# Rename columns for clarity
female_population_df = female_population_df.rename(columns={
    'B01001_026E': 'Total_Female_Population',
    'state': 'State_FIPS',
    'county': 'County_FIPS'
})

# Ensure FIPS codes have leading zeros where necessary
female_population_df['State_FIPS'] = female_population_df['State_FIPS'].str.zfill(2)
female_population_df['County_FIPS'] = female_population_df['County_FIPS'].str.zfill(3)

# Convert Total_Female_Population to numeric, handling missing values
female_population_df['Total_Female_Population'] = pd.to_numeric(female_population_df['Total_Female_Population'], errors='coerce')

# Step 4: Align FIPS Codes Before Merging
# Ensure both DataFrames have FIPS codes formatted correctly
merged_df['State_FIPS'] = merged_df['State_FIPS'].astype(str).str.zfill(2)
merged_df['County_FIPS'] = merged_df['County_FIPS'].astype(str).str.zfill(3)

# Step 5: Merge the DataFrames on State_FIPS and County_FIPS
final_df_with_female_population = pd.merge(
    merged_df,
    female_population_df[['State_FIPS', 'County_FIPS', 'Total_Female_Population']],
    on=['State_FIPS', 'County_FIPS'],
    how='left'
)

# Step 6: Diagnose Any Missing Total Female Population Data
missing_female_population = final_df_with_female_population[final_df_with_female_population['Total_Female_Population'].isnull()]

if not missing_female_population.empty:
    print("\nMissing Total Female Population data for the following FIPS codes:")
    print(missing_female_population[['State', 'County', 'State_FIPS', 'County_FIPS']].drop_duplicates())
else:
    print("\nAll records have corresponding Total Female Population data.")

# Step 7: Save the Final DataFrame to a CSV File
output_path = '/Users/samyakshrestha/Desktop/ER CASE/Life Expectancy/Data/2014 data/final_dataset_with_female_population.csv'  # Update this path
final_df_with_female_population.to_csv(output_path, index=False)
print(f"\nFinal dataset with total female population saved to: {output_path}")


FIPS DataFrame Preview:
    fips            name state
0  01001  Autauga County    AL
1  01003  Baldwin County    AL
2  01005  Barbour County    AL
3  01007     Bibb County    AL
4  01009   Blount County    AL

Life Expectancy DataFrame Preview:
   Unnamed: 0             location_name  year  MeanLifeExpectency
0         228  Autauga County (Alabama)  2014           76.092384
1         234  Baldwin County (Alabama)  2014           77.959026
2         240  Barbour County (Alabama)  2014           75.723760
3         246     Bibb County (Alabama)  2014           74.070479
4         252   Blount County (Alabama)  2014           75.576238

Cleaned Life Expectancy DataFrame Preview:
   Unnamed: 0             location_name  year  MeanLifeExpectency  \
0         228  Autauga County (Alabama)  2014           76.092384   
1         234  Baldwin County (Alabama)  2014           77.959026   
2         240  Barbour County (Alabama)  2014           75.723760   
3         246     Bibb County (Alabama