-
Notifications
You must be signed in to change notification settings - Fork 0
/
join_weather.py
70 lines (52 loc) · 3.27 KB
/
join_weather.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import pandas as pd
import numpy as np
# os.chdir('/work/project2/')
airport_info = pd.read_csv('airports.dat', header=None, names=['id', 'name', 'city', 'country',
'iata', 'icao', 'lat', 'lot',
'altitude', 'timezone', 'dst', 'tz'])
airport_info = airport_info[['city','iata']]
airport_info = airport_info.set_index('iata')
airport_info.index.name = None
ontime = pd.read_csv('joined_holiday_2015.csv')
ontime = ontime.join(airport_info, on=['origin'])
###Uncomment to check how many cities have different names
#ontime.loc[ontime['origin_city_name'] != ontime['city']].groupby('city').count()
ontime['origin_city_name'] = ontime['city']
ontime = ontime.drop('city', 1)
ontime = ontime.join(airport_info, on=['dest'])
ontime['dest_city_name'] = ontime['city']
ontime = ontime.drop('city', 1)
weather = pd.read_csv('weather-by-year/weather_2015.csv')
weather = weather.rename(columns = {' wind_speed_mph':'wind_speed_mph','city':'original_city'})
weather = weather[['original_city','state','zip','airport_code','temperature_f','wind_speed_mph','precipitation_in','events','date','time_blk']]
airport_info = pd.read_csv('airports.dat', header=None, names=['id', 'name', 'city', 'country',
'iata', 'icao', 'lat', 'lot',
'altitude', 'timezone', 'dst', 'tz'])
airport_info = airport_info[['city','icao']]
airport_info = airport_info.set_index('icao')
airport_info.index.name = None
weather = weather.join(airport_info, on=['airport_code'])
weather = weather.drop('original_city', 1)
weather.precipitation_in.fillna(0,inplace=True)
weather.events.fillna("Good", inplace=True)
weather.wind_speed_mph.fillna(0,inplace=True)
weather.wind_speed_mph.replace(to_replace='Calm',value = 0,inplace=True)
weather['wind_speed_mph'] = pd.to_numeric(weather['wind_speed_mph'])
weather.drop_duplicates(['date','city', 'state', 'airport_code', 'time_blk'],keep= 'last', inplace=True)
ontime = pd.merge(ontime,weather, how = 'left',left_on=['origin_city_name','fl_date','dep_time_blk','origin_state_abr'],
right_on=['city','date','time_blk','state'])
ontime = ontime.drop(['city','date','time_blk','state','zip'],axis =1)
ontime.rename(columns= {'temperature_f':'dep_temp_f',
'wind_speed_mph':'dep_wind_speed_mph',
'precipitation_in':'dep_precipitation_in',
'events':'dep_conditions'},inplace=True)
ontime = pd.merge(ontime,weather, how = 'left',left_on=['dest_city_name','fl_date','arr_time_blk','dest_state_abr'],
right_on=['city','date','time_blk','state'])
ontime.drop(['city','date','time_blk','state','zip'],axis = 1, inplace= True)
ontime.rename(columns= {'temperature_f':'arr_temp_f',
'wind_speed_mph':'arr_wind_speed_mph',
'precipitation_in':'arr_precipitation_in',
'events':'arr_conditions'},inplace=True)
ontime = ontime[(pd.notnull(ontime.airport_code_x)) & (pd.notnull(ontime.airport_code_y))]
ontime.to_csv('joined_weather_2015.csv', index=False)