In [1]:
from IPython.display import clear_output
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.io as pio
import requests
import json
import ast

In [2]:
# Load the main dataset
df = pd.read_csv(
    Path("../../../../../data/processed_data/ev_main_dataset.csv"),
)

# Conver the `ev_adoption_rate` and `ev_growth_rate` to percentages
df["ev_adoption_rate"] = df["ev_adoption_rate"] * 100
df["ev_growth_rate"] = df["ev_growth_rate"] * 100

# Display the data
print("Shape:", df.shape)
df.head()

Shape: (2011, 10)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income,ev_adoption_rate,ev_growth_rate
0,2010,CA,Alameda County,"[94501, 94502, 94536, 94537, 94538, 94539, 945...",1512986,20,0,69384,0.001322,
1,2010,CA,Amador County,"[95601, 95629, 95640, 95642, 95654, 95665, 956...",37886,1,1,54758,0.002639,
2,2010,CA,Contra Costa County,"[94505, 94506, 94507, 94509, 94511, 94513, 945...",1052540,10,0,78385,0.00095,
3,2010,CA,Fresno County,"[93210, 93234, 93242, 93602, 93605, 93606, 936...",932039,2,0,46430,0.000215,
4,2010,CA,Humboldt County,"[95501, 95502, 95503, 95511, 95514, 95518, 955...",135009,2,0,40089,0.001481,


In [3]:
# Create a seperate DataFrame for Florida
tx_df = df[df["state"] == "TX"].sort_values(["year", "state", "county"]).reset_index(drop=True)

# Display the DataFrames
display(tx_df.head(2))

Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income,ev_adoption_rate,ev_growth_rate
0,2017,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58175,1,0,42313,0.001719,
1,2017,TX,Andrews County,[79714],17603,1,0,70753,0.005681,


In [4]:
ex_tx_df = tx_df.copy()

# Filter for the latest year values
ex_tx_df = ex_tx_df.loc[ex_tx_df["year"] == 2022]

# Inspect the 'zip_codes' column class
print("Before:", type(ex_tx_df["zip_codes"].values[0]))

# Convert the strings in the 'zip_codes' column back into lists
ex_tx_df["zip_codes"] = ex_tx_df["zip_codes"].apply(ast.literal_eval)

# Confirm the conversion from string to list for the 'zip_codes' column
print("After:", type(ex_tx_df["zip_codes"].values[0]))
ex_tx_df.head()

Before: <class 'str'>
After: <class 'list'>


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income,ev_adoption_rate,ev_growth_rate
924,2022,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",57227,150,4,57445,0.262114,336.622074
925,2022,TX,Andrews County,[79714],18324,60,0,86458,0.327439,301.899149
926,2022,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87124,181,0,57055,0.20775,150.252035
927,2022,TX,Aransas County,"[78358, 78381, 78382]",24963,129,4,58168,0.516765,224.766809
928,2022,TX,Archer County,"[76351, 76366, 76370, 76379, 76389]",8849,9,0,69954,0.101706,77.009832


In [5]:
# Explode the 'zip_codes' column so that each zip code has its own row
ex_tx_df = ex_tx_df.explode("zip_codes")

# Check the `zip_codes` column values data type
print(type(ex_tx_df["zip_codes"].values[0]))
ex_tx_df.head()

<class 'int'>


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income,ev_adoption_rate,ev_growth_rate
924,2022,TX,Anderson County,75763,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75779,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75801,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75802,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75803,57227,150,4,57445,0.262114,336.622074


In [6]:
# Convert again to string since it is now an integer after exploding
ex_tx_df["zip_codes"] = ex_tx_df["zip_codes"].astype(str).str.strip()

# Confirm the conversion from integer to string for the 'zip_codes' column
print(type(ex_tx_df["zip_codes"].values[0]))
ex_tx_df.head()

<class 'str'>


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income,ev_adoption_rate,ev_growth_rate
924,2022,TX,Anderson County,75763,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75779,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75801,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75802,57227,150,4,57445,0.262114,336.622074
924,2022,TX,Anderson County,75803,57227,150,4,57445,0.262114,336.622074


In [7]:
# Obtain the geojson data for Texas
state_geojson_url = "https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/tx_texas_zip_codes_geo.min.json"
texas_geojson = requests.get(state_geojson_url).json()

# Inspect the geojson data
print(json.dumps(texas_geojson, indent=4))

In [8]:
# Use `iframe` to display the plot in JupyterLab

# pio.renderers.default = 'notebook'
pio.renderers.default = 'iframe' 

In [9]:
# Use plotly express to create a choropleth map and display it
# The zip_code' field in the DataFrame matches the field in the GeoJSON "ZCTA5CE10" 
# The 'featureidkey' parameter is used to link the GeoJSON to the DataFrame
# Documentation: https://plotly.github.io/plotly.py-docs/generated/plotly.express.choropleth_mapbox.html

fig = px.choropleth_mapbox(ex_tx_df,
                           geojson=texas_geojson,
                           featureidkey="properties.ZCTA5CE10",
                           locations='zip_codes',
                           color='ev_adoption_rate',
                           hover_name='county',
                           hover_data=['ev_registrations', 'median_income'],
                           labels={
                               'county': 'County',
                               'zip_codes': 'Zip Code',
                               'ev_adoption_rate': 'EV Adoption Rate (%)',
                               'ev_registrations': 'EV Registrations',
                               'median_income': 'Median Income'
                           },
                           opacity=0.8,
                           zoom=5,
                           center={"lat": 31.9686, "lon": -99.9018},
                           mapbox_style="carto-positron",
                           title="Florida EV Adoption Rate by County",
                           )

fig.update_layout(
    coloraxis_colorbar_title="EV Adoption Rate (%)",
    margin={"r": 0, "t": 0, "l": 0, "b": 0}
)

file_name = "tx_geo_map_ev_adoption_rate.html"
file_path = Path(f"../../../../../reports/geospatial_maps/{file_name}")
fig.write_html(file_path)

clear_output()