# Weather Data

## 1. Import Libraries

In [125]:
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

%matplotlib inline
sns.set_style('ticks')

## 2.  Import Weather Data & Create Table

In [126]:
df_weather = pd.read_csv('./data/weather_data.csv')

In [127]:
df_weather.shape

(56454, 90)

In [128]:
df_weather.columns

Index([u'STATION', u'STATION_NAME', u'ELEVATION', u'LATITUDE', u'LONGITUDE',
       u'DATE', u'REPORTTPYE', u'HOURLYSKYCONDITIONS', u'HOURLYVISIBILITY',
       u'HOURLYPRSENTWEATHERTYPE', u'HOURLYDRYBULBTEMPF',
       u'HOURLYDRYBULBTEMPC', u'HOURLYWETBULBTEMPF', u'HOURLYWETBULBTEMPC',
       u'HOURLYDewPointTempF', u'HOURLYDewPointTempC',
       u'HOURLYRelativeHumidity', u'HOURLYWindSpeed', u'HOURLYWindDirection',
       u'HOURLYWindGustSpeed', u'HOURLYStationPressure',
       u'HOURLYPressureTendency', u'HOURLYPressureChange',
       u'HOURLYSeaLevelPressure', u'HOURLYPrecip', u'HOURLYAltimeterSetting',
       u'DAILYMaximumDryBulbTemp', u'DAILYMinimumDryBulbTemp',
       u'DAILYAverageDryBulbTemp', u'DAILYDeptFromNormalAverageTemp',
       u'DAILYAverageRelativeHumidity', u'DAILYAverageDewPointTemp',
       u'DAILYAverageWetBulbTemp', u'DAILYHeatingDegreeDays',
       u'DAILYCoolingDegreeDays', u'DAILYSunrise', u'DAILYSunset',
       u'DAILYWeather', u'DAILYPrecip', u'DAILYSnowfall

##### Create a function to remove characters from Precipitation Column

In [129]:
def remove_char(l):
    new_l = []
    for x in l:
        x = ''.join(re.findall('\d|[.]', str(x)))
        new_l.append(float(x))
    return new_l

In [130]:
df_weather.loc[df_weather['HOURLYPrecip'] == 'T', 'HOURLYPrecip'] = 0.05
df_weather.loc[df_weather['HOURLYPrecip'].isnull(), 'HOURLYPrecip'] = 0
df_weather['HOURLYPrecip'] = remove_char(df_weather['HOURLYPrecip'].values)

In [131]:
df_weather['DATETIME'] = pd.to_datetime(df_weather['DATE'], format='%Y-%m-%d %H:%M')

In [132]:
len(df_weather.loc[df_weather['HOURLYPrecip'] > 0, ['STATION', 'STATION_NAME', 'LATITUDE', 'LONGITUDE', 'DATETIME', 'HOURLYPrecip']])

7930

##### Create a new dataframe with only the columns needed

In [133]:
df = df_weather.loc[:, ['STATION', 'STATION_NAME', 'LATITUDE', 'LONGITUDE', 'DATETIME', 'HOURLYPrecip']]

In [134]:
df.sample(5)

Unnamed: 0,STATION,STATION_NAME,LATITUDE,LONGITUDE,DATETIME,HOURLYPrecip
27449,WBAN:94728,NY CITY CENTRAL PARK NY US,40.7889,-73.9669,2016-06-11 22:51:00,0.0
48929,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2016-09-18 23:51:00,0.0
27987,WBAN:94728,NY CITY CENTRAL PARK NY US,40.7889,-73.9669,2016-07-01 03:51:00,0.0
46095,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2016-06-30 04:51:00,0.0
38469,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2015-12-02 19:51:00,0.01


In [135]:
df_2017 = df.loc[df['DATETIME'] > pd.to_datetime('2017-01-01')]

In [136]:
df_2017.sample(10)

Unnamed: 0,STATION,STATION_NAME,LATITUDE,LONGITUDE,DATETIME,HOURLYPrecip
19768,WBAN:14732,LA GUARDIA AIRPORT NY US,40.7792,-73.88,2017-04-03 08:51:00,0.0
18472,WBAN:14732,LA GUARDIA AIRPORT NY US,40.7792,-73.88,2017-03-01 01:51:00,0.0
53887,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2017-02-02 19:00:00,0.0
54195,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2017-02-11 08:51:00,0.0
35998,WBAN:94728,NY CITY CENTRAL PARK NY US,40.7889,-73.9669,2017-03-19 09:51:00,0.0
17529,WBAN:14732,LA GUARDIA AIRPORT NY US,40.7792,-73.88,2017-02-04 08:51:00,0.0
53365,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2017-01-20 00:51:00,0.0
53085,WBAN:94789,JFK INTERNATIONAL AIRPORT NY US,40.6386,-73.7622,2017-01-12 14:51:00,0.0
36832,WBAN:94728,NY CITY CENTRAL PARK NY US,40.7889,-73.9669,2017-04-13 21:51:00,0.0
36544,WBAN:94728,NY CITY CENTRAL PARK NY US,40.7889,-73.9669,2017-04-04 11:51:00,0.0


In [137]:
df_2017.to_csv('./clean_data/weather_2017.csv')