-
Notifications
You must be signed in to change notification settings - Fork 0
/
fillMissingStops.py
44 lines (38 loc) · 1.77 KB
/
fillMissingStops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 4 13:34:55 2018
@author: Tahereh
"""
import pandas as pd
from math import radians, cos, sin, asin, sqrt
def within(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# within formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
if __name__ == '__main__':
df = pd.read_csv('../2-CSV/91/91_AprilWeekday.csv', sep=",")
stops = pd.read_csv('../2-CSV/91/91_stops_locations.csv', sep=",")
#add flag
df['imputedStop_flag'] = 0
cnt = 0
for i in range(0,len(df)):
if df.iloc[i, df.columns.get_loc('Stop_Name')] == 'Not Identified - Cal':
#mindistance = 0.1
for j in range(0, len(stops)):
if (stops['direction_id'][j] == df.iloc[i, df.columns.get_loc('Direction')]):
distance = within(stops['stop_lon'][j], stops['stop_lat'][j],-1*df.iloc[i, df.columns.get_loc('Longitude')], df.iloc[i, df.columns.get_loc('Latitude')])
if (distance <= 0.1):
df.iloc[i, df.columns.get_loc('Uniqu')] = stops['stop_id'][j]
df.iloc[i, df.columns.get_loc('Stop_Name')] = stops['stop_name'][j]
df.iloc[i,df.columns.get_loc('imputedStop_flag')] = 1
df[1:].to_csv('../2-CSV/91/91_AprilWeekday_clean_filledmissings_withflag_only_within100.csv', sep=',')