In [None]:
# Importing modules
import pandas as pd
from tqdm.notebook import tqdm, trange
import time
import geopandas as gpd
import requests
import matplotlib.pyplot as plt 
import io
import os
import math
import warnings
warnings.filterwarnings('ignore')

In [None]:
dat_completed = pd.read_csv(r"..\..\PQHS_427_Proj_Data\02_air_dat_merged.csv")
dat_remain = pd.read_csv(r"..\..\PQHS_427_Proj_Data\02_air_dat_remaining.csv")

In [None]:
dat_2_merged = pd.concat([dat_completed, dat_remain], ignore_index=True).drop_duplicates()
dat_2_merged

In [None]:
# A dataframe containing all the zip codes within the United States
# Taking the centroid of the zip code areas
zips = gpd.read_file(r"..\..\PQHS_427_Proj_Data\US Zip Codes\tl_2020_us_zcta520.shp")
zips = zips.to_crs('EPSG:26917')
zipCentroids = zips.centroid

In [None]:
# Geocoding the East Palestine train derailment
crash_site_lat = []
crash_site_long = []
crash_site_name = []
crash_site_name.append("east_palestine")
crash_site_lat.append(40.836)
crash_site_long.append(-80.5227)
crash_df = pd.DataFrame(crash_site_name, columns=['site_name'])
crash_df['latitude'] = crash_site_lat
crash_df['longitude'] = crash_site_long
crash_df.longitude = crash_df['longitude'].astype(float)
crash_df.latitude = crash_df['latitude'].astype(float)
# Creating a geometry column using longitude and latitude data
geometry = gpd.points_from_xy(crash_df.longitude, crash_df.latitude)
# Creating a GeoDataFrame with the site data and the geometry column. CRS is set to EPSG:4269
crash_gdf = gpd.GeoDataFrame(crash_df, geometry=geometry, crs="EPSG:4269")

In [None]:
# Reprojecting all the sites under study
crash_gdf = crash_gdf.to_crs('EPSG:26917')
zips = zips.to_crs('EPSG:26917')
zipCentroids = zipCentroids.to_crs('EPSG:26917')


In [None]:
# Creating a buffer zone of 30 miles around the crash site
crashBuffer = crash_gdf.buffer(30*1609.34)
# Finding the zip codes within the crash site buffer zone
crash_zips = zips[zipCentroids.within(crashBuffer.geometry.values[0])]

In [None]:
crash_zips = crash_zips[['ZCTA5CE20','INTPTLAT20','INTPTLON20','geometry']]
crash_zips.rename(columns={'ZCTA5CE20': 'Zipcode'}, inplace=True)
crash_zips

In [None]:
dat_2_merged = dat_2_merged.sort_values(by='DateObserved')

In [None]:
dat_2_merged.to_csv(r"..\..\PQHS_427_Proj_Data\02_air_dat_merged.csv")

In [None]:
pm25_2 = dat_2_merged[dat_2_merged['ParameterName'] == 'PM2.5']
ozone_2 = dat_2_merged[dat_2_merged['ParameterName'] == 'OZONE']

In [None]:
date_list = pm25_2['DateObserved'].unique()
pm25_2_grouped = pm25_2.groupby('DateObserved')
ozone_2_grouped = ozone_2.groupby('DateObserved')

In [None]:
pm25_df = pd.DataFrame(columns=['DateObserved','Zipcode','ParameterName', 'AQI'])

for date in date_list:
    pm25_date = pm25_2_grouped.get_group(date)
    pm25_date['Zipcode'] = pm25_date['Zipcode'].astype(str)
    pm25_date = pm25_date[['DateObserved','Zipcode', 'ParameterName', 'AQI']]
    pm25_date = pm25_date.merge(crash_zips, left_on = 'Zipcode', right_on = 'Zipcode', how = 'left')
    pm25_df = pd.concat([pm25_df, pm25_date], ignore_index=True).drop_duplicates()

In [None]:
pm25_df['DateObserved'] = pd.to_datetime(pm25_df['DateObserved'])
pm25_df['DateObserved'] = pm25_df['DateObserved'].astype(str)
pm25_gdf = gpd.GeoDataFrame(pm25_df, geometry = 'geometry',crs = 'EPSG:26917')
pm25_gdf

In [None]:
pm25_gdf.to_file(r"D:\PQHS_427_Final_Project\Final_Dashboard\PM2.5_grouped_by_date\pm2.5_grouped_jan.shp")

In [None]:
ozone_df = pd.DataFrame(columns=['DateObserved','Zipcode','ParameterName', 'AQI'])

for date in date_list:
    ozone_date = ozone_2_grouped.get_group(date)
    ozone_date['Zipcode'] = ozone_date['Zipcode'].astype(str)
    ozone_date = ozone_date[['DateObserved','Zipcode', 'ParameterName', 'AQI']]
    ozone_date = ozone_date.merge(crash_zips, left_on = 'Zipcode', right_on = 'Zipcode', how = 'left')
    ozone_df = pd.concat([pm25_df, pm25_date], ignore_index=True).drop_duplicates()
ozone_df['DateObserved'] = pd.to_datetime(ozone_df['DateObserved'])
ozone_df['DateObserved'] = ozone_df['DateObserved'].astype(str)
ozone_gdf = gpd.GeoDataFrame(ozone_df, geometry = 'geometry',crs = 'EPSG:26917')
ozone_gdf

In [None]:
ozone_gdf.to_file(r"D:\PQHS_427_Final_Project\Final_Dashboard\Ozone_grouped_by_date\ozone_grouped_jan.shp")