In [10]:
import pandas as pd
import numpy as np
import datetime
from shapely.geometry import Point

In [7]:
def weather_clean(weather_2019):
    # Datetime format
    weather_2019['Datetime'] = pd.to_datetime(weather_2019['Date'] + " " + weather_2019['Time'])
    weather_2019 = weather_2019.drop(columns = {'Date', 'Time', 'Wind Gust'}, axis = 1)
    # process error data, i.e., temp = 0, convert it into nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Temperature'] = np.nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Dew Point'] = np.nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Humidity'] = np.nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Wind Speed'] = np.nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Pressure'] = np.nan
    weather_2019.loc[weather_2019['Temperature'] == 0, 'Precip.'] = np.nan
    # process temperature data from F to C
    weather_2019['Temperature'] = (weather_2019['Temperature'] - 32) / 1.8
    weather_2019['Dew Point'] = (weather_2019['Dew Point'] - 32) / 1.8
    # assign numeric value to wind direction
    wd_text = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW', 'VAR', 'CALM']
    wd_num = [0, 22.5, 45, 67.5, 90,  112.5, 135, 157.5, 180, 202.5, 225, 247.5, 270, 292.5, 315, 337.5, 0, 0, 0]
    for i in range (len(wd_text)):
        weather_2019.loc[weather_2019['Wind'] == wd_text[i], 'Wind'] = wd_num[i]
    # standard time data for each hour
    month_num = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    weather_data_arr = [[], [], [], [], [], [], [], [], []]
    for month_index in range (1, 13):
        for mm in range(1, month_num[month_index - 1] + 1):
            for nn in range (24):
                start_date = datetime.datetime(2019, month_index, mm, nn)
                end_date = start_date + datetime.timedelta(hours = 1)
                weather_data = weather_2019.loc[(weather_2019['Datetime'] >= start_date) & (weather_2019['Datetime'] <= end_date)]
                weather_data_arr[0].append(start_date)
                weather_data_arr[1].append(weather_data['Temperature'].mean())
                weather_data_arr[2].append(weather_data['Dew Point'].mean())
                weather_data_arr[3].append(weather_data['Humidity'].mean())
                if weather_data.empty:
                    weather_data_arr[4].append(weather_data['Temperature'].mean())
                    weather_data_arr[8].append(weather_data['Temperature'].mean())
                else:
                    weather_data_arr[4].append(weather_data['Wind'].iloc[0])
                    weather_data_arr[8].append(weather_data['Condition'].iloc[0])
                weather_data_arr[5].append(weather_data['Wind Speed'].mean())
                weather_data_arr[6].append(weather_data['Pressure'].mean())
                weather_data_arr[7].append(weather_data['Precip.'].mean())
    # output final dataframe
    weather_data_2019 = pd.DataFrame({'Datetime': weather_data_arr[0],
                                  'Temp': weather_data_arr[1],
                                  'Dew_Point': weather_data_arr[2],
                                  'Humidity': weather_data_arr[3],
                                  'WD': weather_data_arr[4],
                                  'WS': weather_data_arr[5],
                                  'Pressure': weather_data_arr[6],
                                  'Precipitation': weather_data_arr[7],
                                  'Condition': weather_data_arr[8]})
    return weather_data_2019

In [9]:
weather_DCA_2019 = weather_clean(pd.read_csv("Data/Weather_DCA_2019.csv"))
weather_CGS_2019 = weather_clean(pd.read_csv("Data/Weather_CGS_2019.csv"))
weather_ADW_2019 = weather_clean(pd.read_csv("Data/Weather_ADW_2019.csv"))
weather_IAD_2019 = weather_clean(pd.read_csv("Data/Weather_IAD_2019.csv"))

In [12]:
weather_station_info = pd.DataFrame({'Name': ['DCA', 'CGS', 'ADW', 'IAD'],
                                     'lat': [38.8512, 38.9805, 38.8108, 38.9531],
                                     'lon': [-77.0402, -76.9223, -76.8664, -77.4565]})
weather_station_info['geometry'] = weather_station_info[['lon', 'lat']].values.tolist()
weather_station_info['geometry'] = weather_station_info['geometry'].apply(Point)
weather_station_info

Unnamed: 0,Name,lat,lon,geometry
0,DCA,38.8512,-77.0402,POINT (-77.0402 38.8512)
1,CGS,38.9805,-76.9223,POINT (-76.9223 38.9805)
2,ADW,38.8108,-76.8664,POINT (-76.8664 38.8108)
3,IAD,38.9531,-77.4565,POINT (-77.4565 38.9531)
