In [3]:
import pandas as pd

# Define the file path
file_path = 'INSERT PATH'

# Define the states we want to keep
target_states = ['WV', 'KY', 'OH', 'VA', 'PA']

# Read the CSV file with low_memory=False to handle mixed types
df = pd.read_csv(file_path, low_memory=False)

# First, let's inspect the actual column names
print("Columns in the CSV file:")
print(df.columns.tolist())

# Try common alternatives for state column
state_column = None
for possible_name in ['stateCode', 'state', 'State', 'STATE', 'state_code']:
    if possible_name in df.columns:
        state_column = possible_name
        break

if state_column is None:
    raise ValueError("Could not find state column in the CSV file")

# Filter for only the target states
df = df[df[state_column].isin(target_states)]

# Define column mapping (new name : old name)
# Updated based on the columns that likely exist
column_mapping = {
    'disasterNumber': 'disasterNumber',
    'state': state_column,
    'incidentType': 'incidentType',
    'incidentBeginDate': 'incidentBeginDate',
    'incidentEndDate': 'incidentEndDate',
    'ihProgramDeclared': 'ihProgramDeclared',
    'paProgramDeclared': 'paProgramDeclared'
}

# Select and rename columns - only those that exist
available_columns = [col for col in column_mapping.values() if col in df.columns]
filtered_df = df[available_columns]

# Fix the typo in column name if needed
if 'incidentEndDate' not in filtered_df.columns and 'incidentEndDate' in column_mapping.values():
    if 'incidentEndDate' in df.columns:
        filtered_df['incidentEndDate'] = df['incidentEndDate']

# Rename the columns
filtered_df = filtered_df.rename(columns={v: k for k, v in column_mapping.items() if v in filtered_df.columns})

# Save the filtered dataframe to a new CSV file
output_path = 'INSERT PATH'
filtered_df.to_csv(output_path, index=False)

print("\nFiltered data saved to:", output_path)
print("Columns kept:", list(filtered_df.columns))
print("Number of records:", len(filtered_df))

Columns in the CSV file:
['femaDeclarationString', 'disasterNumber', 'state', 'declarationType', 'declarationDate', 'fyDeclared', 'incidentType', 'declarationTitle', 'ihProgramDeclared', 'iaProgramDeclared', 'paProgramDeclared', 'hmProgramDeclared', 'incidentBeginDate', 'incidentEndDate', 'disasterCloseoutDate', 'tribalRequest', 'fipsStateCode', 'fipsCountyCode', 'placeCode', 'designatedArea', 'declarationRequestNumber', 'lastIAFilingDate', 'incidentId', 'region', 'designatedIncidentTypes', 'lastRefresh', 'hash', 'id']

Filtered data saved to: /Users/Spence604/Documents/Data Analyst stuff/Projectcs/Appalachia /Filtered_DisasterDeclarations.csv
Columns kept: ['disasterNumber', 'state', 'incidentType', 'incidentBeginDate', 'incidentEndDate', 'ihProgramDeclared', 'paProgramDeclared']
Number of records: 9530
