In [1]:
import pandas as pd

## Weather data annual aggregations

In [2]:
#Import merged weather data
df = pd.read_csv("weather_data.csv")

In [3]:
#Set date column as datetime
df['Date Local'] = pd.to_datetime(df['Date Local'])
#Create a new column with the year only
df['Year'] = df['Date Local'].dt.year

In [4]:
#Make annual aggregations
weather_yearly = df.groupby(['Year','State Code','State Name']).agg({'RH(%)':'mean', 
                                                    'Temperature(C)':'mean', 
                                                    'Temperature(C) Max Value':'mean', 
                                                    'Barometric pressure(mb)':'mean', 
                                                    'Wind Speed(kn)':'mean', 
                                                    'Wind Direction(deg)':'mean'
                                                   }).reset_index()

In [5]:
#Create a unique id for each value
weather_yearly['year_meas_id'] = weather_yearly['State Code'].astype(str) + '_' + weather_yearly['Year'].astype(str)

In [6]:
weather_yearly.head()

Unnamed: 0,Year,State Code,State Name,RH(%),Temperature(C),Temperature(C) Max Value,Barometric pressure(mb),Wind Speed(kn),Wind Direction(deg),year_meas_id
0,2004,1,Alabama,72.174107,,,,,,1_2004
1,2004,2,Alaska,73.875565,0.086509,4.0486,,1.964803,176.855277,2_2004
2,2004,4,Arizona,39.861618,18.867207,25.458893,,5.613356,169.300873,4_2004
3,2004,5,Arkansas,70.713115,,,,,,5_2004
4,2004,6,California,61.169253,15.986932,21.966234,987.820343,3.877942,189.534512,6_2004


In [None]:
#Export to csv
#weather_yearly.to_csv("weather_data_yearly.csv", index=False)

## AQI annual agreggations

In [7]:
#Import merged AQI data
aqi = pd.read_csv("daily_AQI.csv")

In [11]:
#Changing data types
aqi['State Code'] = aqi['State Code'].astype(int).astype(str)
aqi['County Code'] = aqi['County Code'].astype(int).astype(str)
aqi['Site Num'] = aqi['Site Num'].astype(int).astype(str)
aqi['Date Local'] = pd.to_datetime(aqi['Date Local'])
#Create a new column with the year only
aqi['Year'] = aqi['Date Local'].dt.year

In [13]:
# Grouping by year, State code, State Name
group_cols = ['Year', 'State Code', 'State Name']

aqi_yearly = aqi.groupby(group_cols).agg({'max_value': 'mean'}).reset_index()

# remaning aqi column
aqi_yearly.rename(columns={'max_value': 'AQI_mean'},inplace=True)

In [14]:
# Creating unique measurement id
aqi_yearly['year_meas_id'] = aqi_yearly['State Code'].astype(str) + '_' + aqi_yearly['Year'].astype(str)

## Merging AQI and weather yearly data

In [15]:
weather_aqi_yearly = pd.merge(aqi_yearly, weather_yearly, how='outer', on='year_meas_id')

In [16]:
df_cols = weather_aqi_yearly.columns

In [17]:
for column in df_cols:
    if column.endswith('_x'):
        corresponding_column = column[:-2] + '_y'
        if corresponding_column in weather_aqi_yearly.columns:
           weather_aqi_yearly[column] = weather_aqi_yearly[column].fillna(weather_aqi_yearly[corresponding_column])

In [18]:
weather_aqi_yearly = weather_aqi_yearly.drop(columns=weather_aqi_yearly.filter(regex='_y$').columns)

In [19]:
weather_aqi_yearly.columns

Index(['Year_x', 'State Code_x', 'State Name_x', 'AQI_mean', 'year_meas_id',
       'RH(%)', 'Temperature(C)', 'Temperature(C) Max Value',
       'Barometric pressure(mb)', 'Wind Speed(kn)', 'Wind Direction(deg)'],
      dtype='object')

In [20]:
weather_aqi_yearly.rename(columns={'Year_x':'Year', 'State Code_x':'State Code', 'State Name_x': 'State Name'},inplace=True)

In [21]:
weather_aqi_yearly.head()

Unnamed: 0,Year,State Code,State Name,AQI_mean,year_meas_id,RH(%),Temperature(C),Temperature(C) Max Value,Barometric pressure(mb),Wind Speed(kn),Wind Direction(deg)
0,2004,10,Delaware,45.115455,10_2004,,,,,,
1,2005,10,Delaware,49.378854,10_2005,,,,,,
2,2006,10,Delaware,46.456673,10_2006,,,,,,
3,2007,10,Delaware,45.317059,10_2007,,,,,,
4,2008,10,Delaware,43.456153,10_2008,,,,,,


In [None]:
# Saving final dataframe with yearly weather and AQI data
weather_aqi_yearly.to_csv("yearly_state_data.csv", index=False,encoding='utf-8')