In [34]:
# Warning Management
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# DataFrames
import pandas as pd
pd.options.display.max_columns = 30

# Plotting
import matplotlib.pyplot as plt

# Maths
import numpy as np
import math

# Path management
import pathlib
import os
import sys

# Progress tracking
from tqdm import tqdm

# Regular Expressions
import re

# Logging
import logging

# Helpers
from helper_fns import convert_vin_valid, return_matched_vins, create_valid_zip, try_divide

# Read in Data

In [12]:
# Paths 
path = pathlib.Path().resolve().parent.parent/ "rn_home"
processed_data_path = path / "data" / "municipal_dataset_matched"

# Read in file
municipal_data = pd.read_csv(processed_data_path / "municipal_dataset_matched_110723.csv", index_col = [0])

# Process Data

In [22]:
# Extract EVS
evs = municipal_data[municipal_data["Fuel Type - Primary"] == "Electric"]

# Get FY
evs["fy"] = evs["record_from"].apply(lambda x: int("20"+re.split("\\.", re.split("_ALTERED", x)[0])[0][-2:]))

# Identify leased vehicles
lease_filter = evs["lease_street"].notna()| evs["lease_city"].notna() | evs["lease_state"].notna() | evs["lease_zip"].notna() 

# Get non-leased vehicles
evs_not_lease = evs[~lease_filter]

# Remove duplicate vehicle_ids
evs_not_lease_dedup = evs_not_lease.drop_duplicates("vehicle_id")

In [35]:
evs_not_lease_dedup[evs_not_lease_dedup["fy"]==2021]

Unnamed: 0,original_index,record_from,name,street,city,state,zip,vehicle_year,vehicle_make,vehicle_model,vehicle_class,vehicle_id,lease_street,UID,lease_city,lease_state,lease_zip,zip_corrected,vin_corrected,Manufacturer Name,Model,Model Year,Fuel Type - Primary,Electrification Level,fy
297,297,110_Plainville_MV_21.xlsx,ALEXANDER CONNIE L,1 FLEETWOOD DR,PLAINVILLE,CT,6062,2017,NISSA,LEAF SV,1,1N4BZ0CP2HC310643,,,,,,6062.0,1N4BZ0CP*HC,"NISSAN NORTH AMERICA, INC.",Leaf,2017.0,Electric,BEV (Battery Electric Vehicle),2021
651,651,110_Plainville_MV_21.xlsx,BABEY JUSTIN P,14 DOMINICS CT,PLAINVILLE,CT,6062,2021,TESLA,MODEL Y,1,5YJYGDEE7MF185763,,,,,,6062.0,5YJYGDEE*MF,TESLA,Model Y,2021.0,Electric,BEV (Battery Electric Vehicle),2021
898,898,110_Plainville_MV_21.xlsx,BARTOLUCCI AMY M,570 CAMP ST,PLAINVILLE,CT,6062,2019,TESLA,MODEL 3,1,5YJ3E1EA7KF307627,,,,,,6062.0,5YJ3E1EA*KF,"TESLA, INC.",Model 3,2019.0,Electric,BEV (Battery Electric Vehicle),2021
1663,1663,110_Plainville_MV_21.xlsx,BROCHU COLLEEN M,170 ROCKWELL AVE,PLAINVILLE,CT,6062,2017,NISSA,LEAF SV,1,1N4BZ0CPXHC311197,,,,,,6062.0,1N4BZ0CP*HC,"NISSAN NORTH AMERICA, INC.",Leaf,2017.0,Electric,BEV (Battery Electric Vehicle),2021
1664,1664,110_Plainville_MV_21.xlsx,BROCHU COLLEEN M,170 ROCKWELL AVE,PLAINVILLE,CT,6062,2017,NISSA,LEAF SV,1,1N4BZ0CP7HC308161,,,,,,6062.0,1N4BZ0CP*HC,"NISSAN NORTH AMERICA, INC.",Leaf,2017.0,Electric,BEV (Battery Electric Vehicle),2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7018,2777018,015_Bridgeport_MV_21.xlsx,WHEELER FLORENCE R,539 OLD TOWN RD,BRIDGEPORT,CT,6606,2017.0,FORD,FUSION S,1.0,3FA6P0PU8HR390703,,,,,,6606.0,3FA6P0PU*HR,"FORD MOTOR COMPANY, MEXICO",Fusion,2017.0,Electric,PHEV (Plug-in Hybrid Electric Vehicle),2021
8121,2778121,015_Bridgeport_MV_21.xlsx,WINTERHALDER ROBERT W 2ND,114 STATE ST,BRIDGEPORT,CT,6604,2021.0,TESLA,MODEL 3,1.0,5YJ3E1EB7MF869491,,,,,,6604.0,5YJ3E1EB*MF,"TESLA, INC.",Model 3,2021.0,Electric,BEV (Battery Electric Vehicle),2021
8834,2778834,015_Bridgeport_MV_21.xlsx,YOUTE GUY B,43 JEWETT AVE,BRIDGEPORT,CT,6606,2013.0,TOYOT,PRIUS PL,1.0,JTDKN3DP8D3043881,,,,,,6606.0,JTDKN3DP*D3,TOYOTA MOTOR CORPORATION,Prius Plug-in,2013.0,Electric,PHEV (Plug-in Hybrid Electric Vehicle),2021
9003,2779003,015_Bridgeport_MV_21.xlsx,ZAROS-JUNIOR ADELIO,982 CAPITOL AVE,BRIDGEPORT,CT,6606,2018.0,TOYOT,PRIUS PR,1.0,JTDKARFP1J3088233,,,,,,6606.0,JTDKARFP*J3,TOYOTA MOTOR CORPORATION,Prius Prime,2018.0,Electric,PHEV (Plug-in Hybrid Electric Vehicle),2021


In [18]:
# output_df = output_df.drop("Unnamed: 0.1", axis = 1).reset_index(drop=True)
output_df.to_csv(path.parent.parent / "data" / "municipal_dataset_extracts" / "all_evs.csv")