In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Read the CSV file
df = pd.read_csv('/content/state_regularized.csv', parse_dates=['Valid Date'])

df = df[~df['State'].str.contains('DIRTY', na=False) & df['State'].str.strip().notna() & (df['State'].str.strip() != '')]

df['Year'] = df['Valid Date'].dt.year  # Extract the year
# Handle multiple states in a single entry
# Split 'State' column into a list of states, trimming spaces around each state name
df['State'] = df['State'].apply(lambda x: [state.strip() for state in x.split(',') if state.strip()])

# Explode the DataFrame so each state has its own row
df = df.explode('State').reset_index(drop=True)
# Step 2: Count entries per State and Year
# No numerical aggregation needed, just count occurrences
entry_counts = df.groupby(['State', 'Year']).size().reset_index(name='Counts')

us_states = {
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
    'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
    'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
    'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
    'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
    'West Virginia', 'Wisconsin', 'Wyoming'
}

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

# Step 3: Create an interactive heatmap
def create_choropleth(year):
    filtered_data = entry_counts[entry_counts['Year'] == year]
    filtered_data = filtered_data[filtered_data['State'].apply(lambda x: x in us_states)]
    states = filtered_data['State'].apply(lambda x: us_state_to_abbrev[x])
    filtered_data['Counts'] = filtered_data['Counts'].astype(float)
    colors = filtered_data['Counts']
    fig = px.choropleth(locations=states, locationmode="USA-states", color=colors, scope="usa")
    fig.show()


In [None]:
create_choropleth(2022)

In [None]:
from ipywidgets import interact

years = sorted(entry_counts['Year'].unique())
interact(create_choropleth, year=years)

interactive(children=(Dropdown(description='year', options=(2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 20…