In [1]:
import pandas as pd
import numpy as np
import geopandas as gp
from pyproj import Transformer
from scipy.spatial import cKDTree

## Getting population with Traffic flow data (with zipcodes)

In [2]:
pop_data = pd.read_csv("/Users/katevu/Desktop/E-Valuating_Seattle_Charging/data/raw/seattle_pop_2024_zip1.csv")
zip_code = pd.read_csv("/Users/katevu/Desktop/E-Valuating_Seattle_Charging/data/raw/USZipsWithLatLon_20231227.csv")
flow_data = pd.read_csv("/Users/katevu/Desktop/E-Valuating_Seattle_Charging/data/raw/Traffic_Study_Flow_Counts_-6217427247880187641.csv")

In [3]:
seattle_zip = zip_code[zip_code["place name"] == "Seattle"]

In [4]:
flow_data.drop(['FLOWMAP', "HPMS",'STUDY_LENGTH',
         'SCREENLINE','STDY_TITLE_PART', 'ONE_HOUR_REPORT_URL',
         'COMMENTS', 'SE_ANNO_CAD_DATA', 'SEG_COMPKEY',
         'INTENDED_DAYS', 'START_DATE', 'END_DATE','OBJECTID', 'STUDY_MAX8', 'STUDY_AMPK',
       'STUDY_PMPK', 'UNITID', 'UNITID2', 'O_STREET', 'X_STREET',
       'DIR_FROM_CROSS_STREET', 'STDY_LABEL', 'STDY_TITLE_PART_WITH_FLOW', 'ACTUAL_DAYS'], axis = 1, inplace= True)


In [5]:
flow_data = flow_data[flow_data['STDY_YEAR'] >= 2025.0]


In [6]:

transformer = Transformer.from_crs("EPSG:2926", "EPSG:4326", always_xy=True)

flow_data["lon"], flow_data["lat"] = transformer.transform(flow_data["x"].values, flow_data["y"].values)


In [7]:
# Rename ZIP reference columns (adjust based on your actual column names)
zip_ref = seattle_zip.rename(columns={'lat': 'latitude', 'longitude': 'longitude'})
# Rename traffic data columns (adjust based on your actual column names)
traffic_df = flow_data.rename(columns={'lon': 'longitude', 'lat': 'latitude'})

# Function to find nearest ZIP code
def assign_nearest_zipcode(traffic_df, zip_ref):
    """
    Assigns ZIP codes to traffic data by finding the nearest ZIP code centroid
    """
    # Create coordinate arrays
    zip_coords = zip_ref[['latitude', 'longitude']].values
    traffic_coords = traffic_df[['latitude', 'longitude']].values
    
    # Build KD-tree for fast nearest neighbor search
    tree = cKDTree(zip_coords)
    
    # Find nearest ZIP for each traffic point
    distances, indices = tree.query(traffic_coords)
    
    # Assign ZIP codes
    traffic_df['zip_code'] = zip_ref.iloc[indices]['postal code'].values
    
    return traffic_df

# Apply the function
traffic_df = assign_nearest_zipcode(traffic_df, zip_ref)


In [8]:
merged_df = pd.merge(traffic_df, pop_data, on='zip_code', how='inner')

In [9]:
merged_df.drop(["Unnamed: 0"], axis = 1, inplace=True)

In [10]:
merged_df.to_csv('/Users/katevu/Desktop/E-Valuating_Seattle_Charging/data/interm/traffic_flow_with_zip.csv', index = False)

## Registration data for evs

In [11]:
ev_reg = pd.read_csv("/Users/katevu/Desktop/E-Valuating_Seattle_Charging/data/raw/Vehicle_registration_2020_2025.csv")

In [12]:
ev_counts = ev_reg.groupby(['Postal Code', 'Transaction Type']).size().reset_index(name='count')

In [13]:
ev_pivot = ev_counts.pivot_table(
    index='Postal Code',
    columns='Transaction Type',
    values='count',
    fill_value=0
).reset_index()

In [14]:
ev_pivot.rename(columns={'zip_code': 'Postal Code'})

Transaction Type,Postal Code,Original Registration,Registration Renewal,Registration at time of Transfer
0,98001,2601.0,7820.0,1191.0
1,98002,1406.0,4334.0,992.0
2,98003,2683.0,8116.0,1699.0
3,98004,6532.0,14927.0,948.0
4,98005,3361.0,8078.0,673.0
...,...,...,...,...
106,98354,36.0,121.0,18.0
107,98375,0.0,4.0,0.0
108,98422,7.0,13.0,2.0
109,98554,0.0,6.0,0.0
