In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#upload raw parking meter transaction data, 'treas_meters_2017_pole_by_mo_day_datasd.csv'
raw_trans = pd.read_csv('../raw_data/treas_meters_2017_pole_by_mo_day_datasd.csv')
raw_trans

Unnamed: 0,pole_id,meter_type,month,day,sum_trans_amt,num_trans
0,1-1004,SS,1,2,390,6
1,1-1004,SS,1,3,1055,14
2,1-1004,SS,1,4,725,7
3,1-1004,SS,1,5,1140,7
4,1-1004,SS,1,6,925,6
5,1-1004,SS,1,7,1184,23
6,1-1004,SS,1,9,345,5
7,1-1004,SS,1,10,500,2
8,1-1004,SS,1,11,1000,5
9,1-1004,SS,1,12,895,11


In [3]:
#sum of transaction amount based on pole_id
raw_trans['combined_parking_meter_data'] = raw_trans.groupby(['pole_id'])['sum_trans_amt'].transform('sum')
raw_trans


Unnamed: 0,pole_id,meter_type,month,day,sum_trans_amt,num_trans,combined_parking_meter_data
0,1-1004,SS,1,2,390,6,95394
1,1-1004,SS,1,3,1055,14,95394
2,1-1004,SS,1,4,725,7,95394
3,1-1004,SS,1,5,1140,7,95394
4,1-1004,SS,1,6,925,6,95394
5,1-1004,SS,1,7,1184,23,95394
6,1-1004,SS,1,9,345,5,95394
7,1-1004,SS,1,10,500,2,95394
8,1-1004,SS,1,11,1000,5,95394
9,1-1004,SS,1,12,895,11,95394


In [4]:
#drop columns 'meter_type', 'month', 'day', 'num_trans'
clean_trans = raw_trans.drop(['meter_type', 'month', 'day', 'num_trans'],axis = 1)
clean_trans

Unnamed: 0,pole_id,sum_trans_amt,combined_parking_meter_data
0,1-1004,390,95394
1,1-1004,1055,95394
2,1-1004,725,95394
3,1-1004,1140,95394
4,1-1004,925,95394
5,1-1004,1184,95394
6,1-1004,345,95394
7,1-1004,500,95394
8,1-1004,1000,95394
9,1-1004,895,95394


In [5]:
#eliminated duplicate sum value for each pole_id
clean_trans = clean_trans.drop_duplicates(subset=['pole_id'], keep='first')
clean_trans

Unnamed: 0,pole_id,sum_trans_amt,combined_parking_meter_data
0,1-1004,390,95394
114,1-1006,205,100581
227,1-1008,405,98064
340,1-1020,230,39725
453,1-1310,50,96666
569,1-1312,325,94493
684,1-1313,940,91660
797,1-1314,360,96900
912,1-1315,250,89538
1027,1-1317,55,86209


In [6]:
#export to new, clean data to csv file 'clean_combined_parking_meter_data'
clean_trans.to_csv('../clean_data/clean_combined_parking_meter_data.csv')
clean_trans

Unnamed: 0,pole_id,sum_trans_amt,combined_parking_meter_data
0,1-1004,390,95394
114,1-1006,205,100581
227,1-1008,405,98064
340,1-1020,230,39725
453,1-1310,50,96666
569,1-1312,325,94493
684,1-1313,940,91660
797,1-1314,360,96900
912,1-1315,250,89538
1027,1-1317,55,86209


In [11]:
#import cleaned parking meter location data file
clean_parking_loc = pd.read_csv('../clean_data/parking_meter_location.csv')
clean_parking_loc

Unnamed: 0.1,Unnamed: 0,pole,longitude,latitude
0,0,CC-1003,-117.145178,32.700353
1,1,CC-1005,-117.145178,32.700352
2,2,CC-1011,-117.145349,32.700155
3,3,CC-1013,-117.145405,32.700107
4,4,CC-1015,-117.145539,32.699987
5,5,CC-1017,-117.145540,32.699985
6,6,CC-1019,-117.145545,32.699981
7,7,CC-1103,-117.145973,32.699544
8,8,CC-1105,-117.145973,32.699545
9,9,CC-1107,-117.146065,32.699490


In [12]:
#drop rows with zero longitude and latitudes 
clean_parking_loc = clean_parking_loc.drop(clean_parking_loc[clean_parking_loc.longitude==0.000000].index)
clean_parking_loc


Unnamed: 0.1,Unnamed: 0,pole,longitude,latitude
0,0,CC-1003,-117.145178,32.700353
1,1,CC-1005,-117.145178,32.700352
2,2,CC-1011,-117.145349,32.700155
3,3,CC-1013,-117.145405,32.700107
4,4,CC-1015,-117.145539,32.699987
5,5,CC-1017,-117.145540,32.699985
6,6,CC-1019,-117.145545,32.699981
7,7,CC-1103,-117.145973,32.699544
8,8,CC-1105,-117.145973,32.699545
9,9,CC-1107,-117.146065,32.699490


In [13]:
#rename pole column header of clean_parking_loc data to 'pole_id' to match the header of the clean_combined_parking_meter_data dataset
clean_parking_loc.columns= ['index', 'pole_id', 'longitude', 'latitude']
clean_parking_loc


Unnamed: 0,index,pole_id,longitude,latitude
0,0,CC-1003,-117.145178,32.700353
1,1,CC-1005,-117.145178,32.700352
2,2,CC-1011,-117.145349,32.700155
3,3,CC-1013,-117.145405,32.700107
4,4,CC-1015,-117.145539,32.699987
5,5,CC-1017,-117.145540,32.699985
6,6,CC-1019,-117.145545,32.699981
7,7,CC-1103,-117.145973,32.699544
8,8,CC-1105,-117.145973,32.699545
9,9,CC-1107,-117.146065,32.699490


In [14]:
#drop index column
clean_parking_loc = clean_parking_loc.drop(['index'], axis = 1)
clean_parking_loc

Unnamed: 0,pole_id,longitude,latitude
0,CC-1003,-117.145178,32.700353
1,CC-1005,-117.145178,32.700352
2,CC-1011,-117.145349,32.700155
3,CC-1013,-117.145405,32.700107
4,CC-1015,-117.145539,32.699987
5,CC-1017,-117.145540,32.699985
6,CC-1019,-117.145545,32.699981
7,CC-1103,-117.145973,32.699544
8,CC-1105,-117.145973,32.699545
9,CC-1107,-117.146065,32.699490


In [15]:
#merge datasets based on matching parking meter IDs
merged_id = pd.merge(clean_parking_loc, clean_trans, on='pole_id')
merged_id

Unnamed: 0,pole_id,longitude,latitude,sum_trans_amt,combined_parking_meter_data
0,CC-1003,-117.145178,32.700353,85,27724
1,CC-1005,-117.145178,32.700352,250,31869
2,CC-1011,-117.145349,32.700155,515,57232
3,CC-1013,-117.145405,32.700107,345,37887
4,CC-1015,-117.145539,32.699987,390,25950
5,CC-1017,-117.145540,32.699985,50,30021
6,CC-1019,-117.145545,32.699981,70,30962
7,CC-1103,-117.145973,32.699544,110,47437
8,CC-1105,-117.145973,32.699545,360,49463
9,CC-1107,-117.146065,32.699490,425,45072


In [22]:
#export to new, clean data to csv file 'merged_parking_meters.csv'
merged_id.to_csv('../clean_data/merged_parking_meters.csv')


In [43]:
open_pot = pd.read_csv('../clean_data/cleaned_get_it_done_open_potholes.csv')
bus_stops = pd.read_csv('../clean_data/clean_bus_stops.csv')

open_pot

Unnamed: 0.1,Unnamed: 0,status_description,mobile_web_status,duplicate_verified,override_duplicate,requested_datetime,closed_date,open,closed,service_name,coordinates,lat,long,sap_problem_type
0,1,Acknowledged,In process,0,0,2017-05-23T18:06:00,,1,0,Pothole,32.79153673521502-117.1544678834988,32.791537,-117.154468,Pothole
1,54,New,New,0,0,2017-05-23T15:31:00,,1,0,Pothole,32.724165-117.05552499999999,32.724165,-117.055525,Pothole
2,60,Acknowledged,In process,0,0,2017-05-23T15:25:00,,1,0,Pothole,32.79042917715904-117.24192641030152,32.790429,-117.241926,Pothole
3,78,Acknowledged,In process,0,0,2017-05-23T14:43:00,,1,0,Pothole,32.760451-117.064381,32.760451,-117.064381,Pothole
4,81,New,New,0,0,2017-05-23T14:40:00,,1,0,Pothole,32.758759-117.063192,32.758759,-117.063192,Pothole
5,88,Acknowledged,In process,0,0,2017-05-23T14:17:00,,1,0,Pothole,32.748611-117.079278,32.748611,-117.079278,Pothole
6,116,Acknowledged,In process,0,1,2017-05-23T12:52:00,,1,0,Pothole,32.844727311545895-117.2670286848815,32.844727,-117.267029,Pothole
7,121,Acknowledged,In process,0,0,2017-05-23T12:45:00,,1,0,Pothole,32.74642288029995-117.1482656743188,32.746423,-117.148266,Pothole
8,125,Acknowledged,In process,0,1,2017-05-23T12:42:00,,1,0,Pothole,32.74642295486662-117.1493714079478,32.746423,-117.149371,Pothole
9,127,Acknowledged,In process,0,0,2017-05-23T12:38:00,,1,0,Pothole,32.74513698504784-117.1605940669536,32.745137,-117.160594,Pothole


In [23]:
def haversine_np(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    return np.arcsin(np.sqrt(a))

In [33]:
#start of death
busResults = np.zeros((len(bus_stops),len(open_pot)))
meterResults = np.zeros((len(bus_stops),len(open_pot)))


array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [34]:
for i in bus_stops.index:
    for j in open_pot.index:
        bus_stop = bus_stops.iloc[i]
        open_pott = open_pot.iloc[j]
        busResults[i][j] = haversine_np(bus_stop['LONGITUDE'], bus_stop['LATITUDE'], open_pott['long'], open_pott['lat'])
np.save('busDistances.npy',busResults)

In [30]:
for i in merged_id.index:
    for j in open_pot.index:
        meter = merged_id.iloc[i]
        open_pott = open_pot.iloc[j]
        meterResults[i][j] = haversine_np(meter['longitude'], meter['latitude'], open_pott['long'], open_pott['lat'])
np.save('meterDistances.npy',meterResults)

In [36]:
for i in bus_stops.index:
    for j in open_pot.index:
        bus_stop = bus_stops.iloc[i]
        open_pott = open_pot.iloc[j]
        busResults[i][j] = haversine_np(bus_stop['LONGITUDE'], bus_stop['LATITUDE'], open_pott['long'], open_pott['lat'])
    print (busResults)
np.save('busDistances.npy',busResults)


[[ 0.00120084  0.00142537  0.00101214 ...,  0.00207357  0.00219888
          nan]
 [ 0.00098614  0.00190658  0.00049701 ...,  0.0010194   0.00076846
          nan]
 [ 0.00097247  0.00189435  0.00049265 ...,  0.00100212  0.00076064
          nan]
 ..., 
 [ 0.0031686   0.00369702  0.00336195 ...,  0.00224049  0.00236664
          nan]
 [ 0.00315195  0.00368499  0.00334207 ...,  0.00222211  0.00234378
          nan]
 [ 0.0032538   0.00406697  0.00312851 ...,  0.00234244  0.00194056
          nan]]
[[ 0.00120084  0.00142537  0.00101214 ...,  0.00207357  0.00219888
          nan]
 [ 0.00098614  0.00190658  0.00049701 ...,  0.0010194   0.00076846
          nan]
 [ 0.00097247  0.00189435  0.00049265 ...,  0.00100212  0.00076064
          nan]
 ..., 
 [ 0.0031686   0.00369702  0.00336195 ...,  0.00224049  0.00236664
          nan]
 [ 0.00315195  0.00368499  0.00334207 ...,  0.00222211  0.00234378
          nan]
 [ 0.0032538   0.00406697  0.00312851 ...,  0.00234244  0.00194056
          nan]]


In [31]:
import numpy as np
bus_data = np.load('busDistances.npy')

In [33]:
# bus_data_nonan = bus_data[~np.isnan(bus_data)]
bus_data[np.isnan(bus_data)] = 1

In [36]:
min_dist_indices = np.zeros(bus_data.shape[0])
for i in range(0,bus_data.shape[0]):
    min_dist_indices[i] = np.argmin(bus_data[i])

In [44]:
#Use min_dist_indices to pair up bus stops and potholes

(6638, 6)


In [49]:
#bus_stops['OBJECTID'] bus_stops['LONGITUDE'] bus_stops['LATITUDE']
#open_pot['lat] open_pot['long] open_pot['Unnamed: 0']
np.zeros

Unnamed: 0.1,Unnamed: 0,status_description,mobile_web_status,duplicate_verified,override_duplicate,requested_datetime,closed_date,open,closed,service_name,coordinates,lat,long,sap_problem_type
0,1,Acknowledged,In process,0,0,2017-05-23T18:06:00,,1,0,Pothole,32.79153673521502-117.1544678834988,32.791537,-117.154468,Pothole
1,54,New,New,0,0,2017-05-23T15:31:00,,1,0,Pothole,32.724165-117.05552499999999,32.724165,-117.055525,Pothole
2,60,Acknowledged,In process,0,0,2017-05-23T15:25:00,,1,0,Pothole,32.79042917715904-117.24192641030152,32.790429,-117.241926,Pothole
3,78,Acknowledged,In process,0,0,2017-05-23T14:43:00,,1,0,Pothole,32.760451-117.064381,32.760451,-117.064381,Pothole
4,81,New,New,0,0,2017-05-23T14:40:00,,1,0,Pothole,32.758759-117.063192,32.758759,-117.063192,Pothole
5,88,Acknowledged,In process,0,0,2017-05-23T14:17:00,,1,0,Pothole,32.748611-117.079278,32.748611,-117.079278,Pothole
6,116,Acknowledged,In process,0,1,2017-05-23T12:52:00,,1,0,Pothole,32.844727311545895-117.2670286848815,32.844727,-117.267029,Pothole
7,121,Acknowledged,In process,0,0,2017-05-23T12:45:00,,1,0,Pothole,32.74642288029995-117.1482656743188,32.746423,-117.148266,Pothole
8,125,Acknowledged,In process,0,1,2017-05-23T12:42:00,,1,0,Pothole,32.74642295486662-117.1493714079478,32.746423,-117.149371,Pothole
9,127,Acknowledged,In process,0,0,2017-05-23T12:38:00,,1,0,Pothole,32.74513698504784-117.1605940669536,32.745137,-117.160594,Pothole


In [52]:
open_pot['Unnamed: 0']

0            1
1           54
2           60
3           78
4           81
5           88
6          116
7          121
8          125
9          127
10         129
11         132
12         162
13         171
14         177
15         184
16         189
17         271
18         287
19         290
20         365
21         391
22         405
23         421
24         424
25         433
26         436
27         450
28         553
29         578
         ...  
2544     63080
2545     63316
2546     64295
2547     64520
2548     65147
2549     65154
2550     65539
2551     66330
2552     66822
2553     67165
2554     67186
2555     69840
2556     70921
2557     71891
2558     72478
2559     73676
2560     73685
2561     74205
2562     76245
2563     76258
2564     77885
2565     78187
2566     79147
2567     83117
2568     85987
2569     91511
2570     92067
2571     92995
2572     94233
2573    100727
Name: Unnamed: 0, dtype: int64