### __Project: Impact of EV adoption on air pollution.__

In [31]:
import os
import pandas as pd
import matplotlib.pyplot as plt

data_dir = '../data'

file_names = ['electric_car_population.csv', 
              'CO.csv', 
              'NO2.csv',
              'PM10_Speciation.csv',
              'PM25_Speciation.csv']

In [None]:

df_EV = pd.read_csv(os.path.join(data_dir, file_names[0]))

# Filter data for King county, WA
typeV = 'Passenger'
df_king = df_EV[(df_EV['county'] == 'King') & (df_EV['vehicle_primary_use'] == typeV)].copy()


print(df_king.info())

# Convert date column to datetime
df_king['date'] = pd.to_datetime(df_king['date'])
df_king.sort_values(by='date', inplace=True)

# Plot the time series chart
plt.figure(figsize=(7,4))
plt.plot(df_king['date'], df_king['total_vehicles'], label='Total Vehicles')
plt.plot(df_king['date'], df_king['non_electric_vehicles'], label='Non-Electric Vehicles')
plt.plot(df_king['date'], df_king['electric_vehicle_ev_total'], label='Electric Vehicles')
plt.xlabel('Time')
plt.ylabel('Number of Vehicles')
plt.title(f'EV Adoption in King, WA - {typeV} Vehicles')
plt.grid(True)
plt.legend()
# Save the plot as an SVG file
plt.savefig(f'plots/EV-adoption-{typeV}.svg', format='svg')
plt.show()

# Plot the time series chart
plt.figure(figsize=(7,2))
plt.plot(df_king['date'], df_king['percent_electric_vehicles'], label='Electric Vehicles %')
plt.xlabel('Time')
plt.ylabel(f'% electric Vehicles')
plt.title('EV Adoption in King, WA')
plt.grid(True)
plt.legend()
plt.show()





In [21]:
def df_remove_outlier_IQR(df, column):

    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    mean_value = df[column].mean()

    df.loc[(df[column] < lower_bound) | (df[column] > upper_bound), column] = mean_value

    return df

def df_remove_outlier_Z_Score(df, column, upperB, lowerB):
    
    from scipy.stats import zscore
    z_scores = zscore(df[column])
    return df[(z_scores >= lowerB)  & (z_scores <= upperB)]

In [None]:
df_CO = pd.read_csv(os.path.join(data_dir, file_names[3]))

# Convert date column to datetime
df_CO['Date Local'] = pd.to_datetime(df_CO['Date Local'])
df_CO.sort_values(by='Date Local', inplace=True)

# Filter data
method = 'R & P Partisol 2025 Teflon - ICPMS'
df_CO_1 = df_CO[(df_CO['Site Num'] == 80) & (df_CO['Method Name'] == method)].copy()
df_CO_1 = df_remove_outlier_IQR(df_CO_1, 'Arithmetic Mean')

#df_CO_2 = df_CO[(df_CO['Site Num'] == 30)].copy()
#df_CO_2 = df_remove_outlier_IQR(df_CO_2, 'Arithmetic Mean')
#df_CO_2 = df_remove_outlier_Z_Score(df_CO, 'Arithmetic Mean', 3, -3)

# Calculate 30-day moving average for both DataFrames
df_CO_1['Mean_MA'] = df_CO_1['Arithmetic Mean'].rolling(window=30, center=True).mean()
#df_CO_2['Mean_MA'] = df_CO_2['Arithmetic Mean'].rolling(window=30, center=True).mean()

# Plot the time series chart
plt.figure(figsize=(40,2))
plt.plot(df_CO_1['Date Local'], df_CO_1['Arithmetic Mean'], label='CO Daily Mean')
plt.plot(df_CO_1['Date Local'], df_CO_1['Mean_MA'], label='CO Daily Mean: Moving Average 30')

#plt.plot(df_CO_2['Date Local'], df_CO_2['Arithmetic Mean'], label='CO Daily Mean 30')
#plt.plot(df_CO_2['Date Local'], df_CO_2['Mean_MA'], label='CO Daily Mean 30')

#plt.plot(df_king['Date Local'], df_king['non_electric_vehicles'], label='Non-Electric Vehicles')
#plt.plot(df_king['Date Local'], df_king['total_vehicles'], label='Total Vehicles')
plt.xlabel('Time')
# Set the x-axis limits
plt.xlim(pd.Timestamp('2016-01-01'), pd.Timestamp('2025-01-01'))
plt.ylabel('CO')
plt.title('CO levels: Seattle Site Num. 80 (after removing outliers with IQR)')
plt.grid(True)
plt.legend()
#plt.savefig(f'plots/CO-IQR-levels-Site-80.svg', format='svg')
plt.show()


In [None]:
df_CO = pd.read_csv(os.path.join(data_dir, file_names[3]))

# Convert date column to datetime
df_CO['Date Local'] = pd.to_datetime(df_CO['Date Local'])
df_CO.sort_values(by='Date Local', inplace=True)

site = 80
df_CO_1 = df_CO[(df_CO['Site Num'] == site) & (df_CO['Method Name'] == method)].copy()
#df_CO_1 = df_remove_outlier_IQR(df_CO_1, 'Arithmetic Mean')
df_CO_1['Mean_MA'] = df_CO_1['Arithmetic Mean'].rolling(window=120, center=True).mean()
df_CO_1['Mean_MA'] = df_CO_1['Mean_MA'].rolling(window=30, center=True).mean()
#df_CO_1['Mean_MA'] = df_CO_1['Mean_MA'].rolling(window=60, center=True).mean()

# Assuming df_CO is your DataFrame with CO measurements
df_CO_1.loc[:, 'Year'] = df_CO_1['Date Local'].dt.year
df_CO_1.loc[:, 'DayOfYear'] = df_CO_1['Date Local'].dt.dayofyear

# Plot each year's CO emission on the same plot
plt.figure(figsize=(8, 6))

for year in df_CO_1['Year'].unique():
    df_year = df_CO_1[df_CO_1['Year'] == year]
    plt.plot(df_year['DayOfYear'], df_year['Mean_MA'], label=f'CO {year}')

    mean_value = df_year['Arithmetic Mean'].mean()
    print(f'Year: {year} - Mean: {mean_value}')

plt.xlabel('Day of Year')
plt.ylabel('CO Levels')
plt.title(f'CO Emissions by Year: Seattle Site Num. {site}')
plt.legend()
plt.grid(True)
#plt.savefig(f'plots/CO-levels-Site-{site}.svg', format='svg')
plt.show()

In [None]:
df = pd.read_csv(os.path.join(data_dir, file_names[4]))
print(len(df['Method Name'].unique().tolist()))