# San Diego Parking Meter Day Revenue Prediction & Visualization

Data Files: treas_parking_meters_loc_datasd (meter location), treas_parking_payments_2017_datasd (meter payment transactions)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#enable IPython to display matplotlib graphs
%matplotlib inline
import csv

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.cross_validation import cross_val_score

In [14]:
#read in meter transaction file:
file=pd.read_csv("treas_parking_payments_2017_datasd.csv", sep=',', header='infer')
file.tail(3)
file.shape #(6143097, 7)

(6143097, 7)

In [30]:
#read in meter location file:
loc=pd.read_csv("treas_parking_meters_loc_datasd.csv", sep=',', header='infer')
loc.shape #4931 meters 
loc.columns

Index(['zone', 'area', 'sub_area', 'pole', 'config_id', 'config_name',
       'longitude', 'latitude'],
      dtype='object')

In [15]:
#thru EDA we found that the data records transactions up till 08/14/2017, 
#we will subset records from 07/01/2017 up to the latest one for prediction and visualization
#to do this, we need to first parse fields 'trans_start', 'meter_expire' into year, month, day, time (24 hours) format

from datetime import datetime

file['trans_start_year']=file['trans_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').year)
file['trans_start_month']=file['trans_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').month)
file['trans_start_day']=file['trans_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').day)
file['trans_start_time']=file['trans_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').time())

In [16]:
file['meter_expire_year']=file['meter_expire'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').year)
file['meter_expire_month']=file['meter_expire'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').month)
file['meter_expire_day']=file['meter_expire'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').day)
file['meter_expire_time']=file['meter_expire'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').time())

In [17]:
file.to_csv("treas_parking_payments_2017_datasd_parsed.csv", sep=',', header=True, index=False)

In [18]:
file.shape
file.columns
file.dtypes

uuid                  object
meter_type            object
pole_id               object
trans_amt              int64
pay_method            object
trans_start           object
meter_expire          object
trans_start_year       int64
trans_start_month      int64
trans_start_day        int64
trans_start_time      object
meter_expire_year      int64
meter_expire_month     int64
meter_expire_day       int64
meter_expire_time     object
dtype: object

In [22]:
#subset records in month 07, 08 into file2
file2=file[(file['trans_start_month']==7)|(file['trans_start_month']==8)]
file2.head(5)
file2.shape #(1288611, 15)

(1288611, 15)

In [24]:
#sanity check for NaNs:
nan_rows = lambda df: df[df.isnull().any(axis=1)]
nan_rows(file2) #cleared
nan_cols = lambda df: df.columns[df.isnull().any()].tolist()
nan_cols(file2) #cleared

[]

In [27]:
#Paid parking is only valid from 8am-6pm. Outside of this time range, if people accidentally paid the meter
#it shows `meter_expire` to be the same as `trans_start`. We exclude this part of the record from our analysis
file2[file2['trans_start']==file2['meter_expire']].shape #(10481, 15) rows omitted because of this
file2=file2[file2['trans_start']!=file2['meter_expire']]
file2.shape #(1278130, 15) left after removing transactions outside of meter operating hours

(1278130, 15)

In [31]:
#we join meter transaction table file2 with meter location file loc by meter pole id:
data=pd.merge(file2, loc, left_on='pole_id', right_on='pole', how='left')
data.columns

Index(['uuid', 'meter_type', 'pole_id', 'trans_amt', 'pay_method',
       'trans_start', 'meter_expire', 'trans_start_year', 'trans_start_month',
       'trans_start_day', 'trans_start_time', 'meter_expire_year',
       'meter_expire_month', 'meter_expire_day', 'meter_expire_time', 'zone',
       'area', 'sub_area', 'pole', 'config_id', 'config_name', 'longitude',
       'latitude'],
      dtype='object')

In [34]:
#sanity check for NaNs
nan_rows(data).shape #(9402, 23) rows unmatched from meter location file
nan_cols(data)

['zone',
 'area',
 'sub_area',
 'pole',
 'config_id',
 'config_name',
 'longitude',
 'latitude']

In [39]:
#we will also drop all the rows where records are not matched from meter location file:
data=data.loc[~data[nan_cols(data)].isnull().any(axis=1),:]
data.shape #(1268728, 23) records left

(1268728, 23)

In [50]:
data.head(3)

Unnamed: 0,uuid,meter_type,pole_id,trans_amt,pay_method,trans_start,meter_expire,trans_start_year,trans_start_month,trans_start_day,...,meter_expire_day,meter_expire_time,zone,area,sub_area,pole,config_id,config_name,longitude,latitude
0,SSWG800N170701060131375,SS,WG-800N,375,CASH,2017-07-01 06:01:31,2017-07-01 13:00:00,2017,7,1,...,1,13:00:00,Downtown,Marina,800 WEST G ST,WG-800N,13177.0,MSPM 9 Hour Max $0.75 HR 8am-6pm Mon-Sat,-117.170293,32.71238
1,SSST800E170701060146150,SS,ST-800E,150,CREDIT CARD,2017-07-01 06:01:46,2017-07-01 10:00:00,2017,7,1,...,1,10:00:00,Downtown,Marina,800 STATE ST,ST-800E,13181.0,MSPM 2 Hour Max $1.25 HR 8am-6pm Mon-Fri,-117.166454,32.713993
2,SSWF800N170701060237450,SS,WF-800N,450,CREDIT CARD,2017-07-01 06:02:37,2017-07-01 14:00:00,2017,7,1,...,1,14:00:00,Downtown,Marina,800 WEST F ST,WF-800N,13177.0,MSPM 9 Hour Max $0.75 HR 8am-6pm Mon-Sat,-117.170502,32.713672


## 1. Occupancy by time interval

1) Generate a file that includes: poleid, sub-area, year, month, day, time (being the 5 minute interval like 08:05:00, 08:10:00, 08:15:00), the % of time during that interval that was paid for, count of transactionsthat started in that interval, count of transactions that expired during that interval)

In [74]:
#create 5 minutes time interval list from 8am-6pm:
from datetime import timedelta
from datetime import date

start = datetime.strptime('07:55:00', '%H:%M:%S').time()
end= datetime.strptime('18:00:00', '%H:%M:%S').time()
#timedelta = timedelta(minutes=5)
intervals = []
curr=start
while curr < end:
    temp_datetime = datetime.combine(date(1,1,1),curr) #since datetime can only do addition of datetime and timedelta, not time and timedelta
                                                                #we create a temp_datetime with fake date 01/01/01 and do addition with timedelta
                                                                #then we extract time() and append to list to get time intervals list
    curr=(temp_datetime + timedelta(minutes=5)).time()
    intervals.append(curr)

intervals

#ref.:https://stackoverflow.com/questions/37779269/python-typeerror-adding-time-using-timedelta/37779368#37779368

[datetime.time(8, 0),
 datetime.time(8, 5),
 datetime.time(8, 10),
 datetime.time(8, 15),
 datetime.time(8, 20),
 datetime.time(8, 25),
 datetime.time(8, 30),
 datetime.time(8, 35),
 datetime.time(8, 40),
 datetime.time(8, 45),
 datetime.time(8, 50),
 datetime.time(8, 55),
 datetime.time(9, 0),
 datetime.time(9, 5),
 datetime.time(9, 10),
 datetime.time(9, 15),
 datetime.time(9, 20),
 datetime.time(9, 25),
 datetime.time(9, 30),
 datetime.time(9, 35),
 datetime.time(9, 40),
 datetime.time(9, 45),
 datetime.time(9, 50),
 datetime.time(9, 55),
 datetime.time(10, 0),
 datetime.time(10, 5),
 datetime.time(10, 10),
 datetime.time(10, 15),
 datetime.time(10, 20),
 datetime.time(10, 25),
 datetime.time(10, 30),
 datetime.time(10, 35),
 datetime.time(10, 40),
 datetime.time(10, 45),
 datetime.time(10, 50),
 datetime.time(10, 55),
 datetime.time(11, 0),
 datetime.time(11, 5),
 datetime.time(11, 10),
 datetime.time(11, 15),
 datetime.time(11, 20),
 datetime.time(11, 25),
 datetime.time(11, 30),


In [75]:
#convert interval into list of strings:
list_of_string_intervals=[]
for interval in intervals:
    list_of_string_intervals.append(interval.strftime('%H:%M:%S'))
list_of_string_intervals

['08:00:00',
 '08:05:00',
 '08:10:00',
 '08:15:00',
 '08:20:00',
 '08:25:00',
 '08:30:00',
 '08:35:00',
 '08:40:00',
 '08:45:00',
 '08:50:00',
 '08:55:00',
 '09:00:00',
 '09:05:00',
 '09:10:00',
 '09:15:00',
 '09:20:00',
 '09:25:00',
 '09:30:00',
 '09:35:00',
 '09:40:00',
 '09:45:00',
 '09:50:00',
 '09:55:00',
 '10:00:00',
 '10:05:00',
 '10:10:00',
 '10:15:00',
 '10:20:00',
 '10:25:00',
 '10:30:00',
 '10:35:00',
 '10:40:00',
 '10:45:00',
 '10:50:00',
 '10:55:00',
 '11:00:00',
 '11:05:00',
 '11:10:00',
 '11:15:00',
 '11:20:00',
 '11:25:00',
 '11:30:00',
 '11:35:00',
 '11:40:00',
 '11:45:00',
 '11:50:00',
 '11:55:00',
 '12:00:00',
 '12:05:00',
 '12:10:00',
 '12:15:00',
 '12:20:00',
 '12:25:00',
 '12:30:00',
 '12:35:00',
 '12:40:00',
 '12:45:00',
 '12:50:00',
 '12:55:00',
 '13:00:00',
 '13:05:00',
 '13:10:00',
 '13:15:00',
 '13:20:00',
 '13:25:00',
 '13:30:00',
 '13:35:00',
 '13:40:00',
 '13:45:00',
 '13:50:00',
 '13:55:00',
 '14:00:00',
 '14:05:00',
 '14:10:00',
 '14:15:00',
 '14:20:00',

In [102]:
#create binary variables in dataframe 'data' for each time intervals from 8am-6pm 
#to indicate which time interval each transaction start record falls into
for i in range(1,len(intervals)):
    data[list_of_string_intervals[i]]=np.nan
    data.loc[(data['trans_start_time']>=intervals[i-1])&(data['trans_start_time']<intervals[i]),list_of_string_intervals[i]]=1
    data.loc[data[list_of_string_intervals[i]].isnull(),list_of_string_intervals[i]]=0

data[list_of_string_intervals[1:]].head(5)

Unnamed: 0,08:05:00,08:10:00,08:15:00,08:20:00,08:25:00,08:30:00,08:35:00,08:40:00,08:45:00,08:50:00,...,17:15:00,17:20:00,17:25:00,17:30:00,17:35:00,17:40:00,17:45:00,17:50:00,17:55:00,18:00:00
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [105]:
#create binary variables in dataframe 'data' for each time intervals from 8am-6pm 
#to indicate which time interval each transaction end record falls into
#convert interval into list of strings2:
list_of_string_intervals2=[]
for interval in intervals:
    list_of_string_intervals2.append(' '.join([interval.strftime('%H:%M:%S'), '2'])) 
list_of_string_intervals2

['08:00:00 2',
 '08:05:00 2',
 '08:10:00 2',
 '08:15:00 2',
 '08:20:00 2',
 '08:25:00 2',
 '08:30:00 2',
 '08:35:00 2',
 '08:40:00 2',
 '08:45:00 2',
 '08:50:00 2',
 '08:55:00 2',
 '09:00:00 2',
 '09:05:00 2',
 '09:10:00 2',
 '09:15:00 2',
 '09:20:00 2',
 '09:25:00 2',
 '09:30:00 2',
 '09:35:00 2',
 '09:40:00 2',
 '09:45:00 2',
 '09:50:00 2',
 '09:55:00 2',
 '10:00:00 2',
 '10:05:00 2',
 '10:10:00 2',
 '10:15:00 2',
 '10:20:00 2',
 '10:25:00 2',
 '10:30:00 2',
 '10:35:00 2',
 '10:40:00 2',
 '10:45:00 2',
 '10:50:00 2',
 '10:55:00 2',
 '11:00:00 2',
 '11:05:00 2',
 '11:10:00 2',
 '11:15:00 2',
 '11:20:00 2',
 '11:25:00 2',
 '11:30:00 2',
 '11:35:00 2',
 '11:40:00 2',
 '11:45:00 2',
 '11:50:00 2',
 '11:55:00 2',
 '12:00:00 2',
 '12:05:00 2',
 '12:10:00 2',
 '12:15:00 2',
 '12:20:00 2',
 '12:25:00 2',
 '12:30:00 2',
 '12:35:00 2',
 '12:40:00 2',
 '12:45:00 2',
 '12:50:00 2',
 '12:55:00 2',
 '13:00:00 2',
 '13:05:00 2',
 '13:10:00 2',
 '13:15:00 2',
 '13:20:00 2',
 '13:25:00 2',
 '13:30:00

In [106]:
#create binary variables in dataframe 'data' for each time intervals from 8am-6pm 
#to indicate which time interval each transaction start record falls into
for i in range(1,len(intervals)):
    data[list_of_string_intervals2[i]]=np.nan
    data.loc[(data['meter_expire_time']>=intervals[i-1])&(data['meter_expire_time']<intervals[i]),list_of_string_intervals2[i]]=1
    data.loc[data[list_of_string_intervals2[i]].isnull(),list_of_string_intervals2[i]]=0

data[list_of_string_intervals2[1:]].head(5)

Unnamed: 0,08:05:00 2,08:10:00 2,08:15:00 2,08:20:00 2,08:25:00 2,08:30:00 2,08:35:00 2,08:40:00 2,08:45:00 2,08:50:00 2,...,17:15:00 2,17:20:00 2,17:25:00 2,17:30:00 2,17:35:00 2,17:40:00 2,17:45:00 2,17:50:00 2,17:55:00 2,18:00:00 2
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#then we create new variables for each time interval to specify the %time occupied during that interval for each meter pole:
#by research, we notice that the minimum purchase time of parking meter in San Diego is 6.66 minute at $2.25/hour
#which is longer than each time interval (5 minutes or 300 seconds)
#hence, we calculate %time occupied for each interval as:
 - 1 if 'meter_expire_time' - interval > 5 
 - ('meter_expire_time'- interval)/5 if 0 < 'meter_expire_time' - interval < 5
 - 0, 'meter_expire_time' - interval <=0

In [119]:
#create list of variable names for interval occupancy:
list_of_string_intervals3=[]
for interval in intervals:
    list_of_string_intervals3.append(' '.join([interval.strftime('%H:%M:%S'), 'Occupancy'])) 
list_of_string_intervals3

['08:00:00 Occupancy',
 '08:05:00 Occupancy',
 '08:10:00 Occupancy',
 '08:15:00 Occupancy',
 '08:20:00 Occupancy',
 '08:25:00 Occupancy',
 '08:30:00 Occupancy',
 '08:35:00 Occupancy',
 '08:40:00 Occupancy',
 '08:45:00 Occupancy',
 '08:50:00 Occupancy',
 '08:55:00 Occupancy',
 '09:00:00 Occupancy',
 '09:05:00 Occupancy',
 '09:10:00 Occupancy',
 '09:15:00 Occupancy',
 '09:20:00 Occupancy',
 '09:25:00 Occupancy',
 '09:30:00 Occupancy',
 '09:35:00 Occupancy',
 '09:40:00 Occupancy',
 '09:45:00 Occupancy',
 '09:50:00 Occupancy',
 '09:55:00 Occupancy',
 '10:00:00 Occupancy',
 '10:05:00 Occupancy',
 '10:10:00 Occupancy',
 '10:15:00 Occupancy',
 '10:20:00 Occupancy',
 '10:25:00 Occupancy',
 '10:30:00 Occupancy',
 '10:35:00 Occupancy',
 '10:40:00 Occupancy',
 '10:45:00 Occupancy',
 '10:50:00 Occupancy',
 '10:55:00 Occupancy',
 '11:00:00 Occupancy',
 '11:05:00 Occupancy',
 '11:10:00 Occupancy',
 '11:15:00 Occupancy',
 '11:20:00 Occupancy',
 '11:25:00 Occupancy',
 '11:30:00 Occupancy',
 '11:35:00 

In [180]:
#calculate %time occupancy for each interval at each pole

for i in range(1,len(intervals)):
    #also create temporaty variable meter_expire_time_fake by adding fake date 2000-01-01 to it for time difference substraction
    data['meter_expire_time_fake']='2000-01-01 ' + data['meter_expire_time'].astype(str)
    
    #same reason for creating intervals_fake column with date 2000-01-01
    data['intervals_fake']='2000-01-01 '+list_of_string_intervals[i-1]
    
    #calculate time difference and convert to ingeter of number of seconds
    timediff=data['meter_expire_time_fake'].astype('datetime64[s]') - data['intervals_fake'].astype('datetime64[s]')
    timediff=timediff.astype(np.int64)
    
    #add list of interval occupancy variables to dataframe:
    data[list_of_string_intervals3[i]]=np.nan
    
    data.loc[timediff>=5*60,list_of_string_intervals3[i]]=1
    data.loc[timediff<=0,list_of_string_intervals3[i]]=0
    data.loc[data[list_of_string_intervals3[i]].isnull(),list_of_string_intervals3[i]]=timediff/300 #5 mins = 300 seconds

In [181]:
data[list_of_string_intervals3[1:len(list_of_string_intervals3)]].head(5)

Unnamed: 0,08:05:00 Occupancy,08:10:00 Occupancy,08:15:00 Occupancy,08:20:00 Occupancy,08:25:00 Occupancy,08:30:00 Occupancy,08:35:00 Occupancy,08:40:00 Occupancy,08:45:00 Occupancy,08:50:00 Occupancy,...,17:15:00 Occupancy,17:20:00 Occupancy,17:25:00 Occupancy,17:30:00 Occupancy,17:35:00 Occupancy,17:40:00 Occupancy,17:45:00 Occupancy,17:50:00 Occupancy,17:55:00 Occupancy,18:00:00 Occupancy
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [185]:
#save to local as progress backup:
data.to_csv("SanDiegoMeterRevenuePrediction_data.csv",sep=',',header=True, index=False)

In [182]:
data.columns.values

array(['uuid', 'meter_type', 'pole_id', 'trans_amt', 'pay_method',
       'trans_start', 'meter_expire', 'trans_start_year',
       'trans_start_month', 'trans_start_day', 'trans_start_time',
       'meter_expire_year', 'meter_expire_month', 'meter_expire_day',
       'meter_expire_time', 'zone', 'area', 'sub_area', 'pole',
       'config_id', 'config_name', 'longitude', 'latitude', '08:05:00',
       '08:10:00', '08:15:00', '08:20:00', '08:25:00', '08:30:00',
       '08:35:00', '08:40:00', '08:45:00', '08:50:00', '08:55:00',
       '09:00:00', '09:05:00', '09:10:00', '09:15:00', '09:20:00',
       '09:25:00', '09:30:00', '09:35:00', '09:40:00', '09:45:00',
       '09:50:00', '09:55:00', '10:00:00', '10:05:00', '10:10:00',
       '10:15:00', '10:20:00', '10:25:00', '10:30:00', '10:35:00',
       '10:40:00', '10:45:00', '10:50:00', '10:55:00', '11:00:00',
       '11:05:00', '11:10:00', '11:15:00', '11:20:00', '11:25:00',
       '11:30:00', '11:35:00', '11:40:00', '11:45:00', '11:50:00',

In [183]:
#we group by {poleid, sub-area, year, month, day} and sum across all the time interval related columns to get % of time during that interval that was paid for, count of transactions
#that started in that interval, count of transactions that expired during that interval
list_to_sum=list_of_string_intervals[1:len(list_of_string_intervals)
                                    ]+list_of_string_intervals2[1:len(list_of_string_intervals2)
                                                               ]+list_of_string_intervals3[1:len(list_of_string_intervals3)]
data2=data.groupby(['pole_id', 'sub_area', 'trans_start_year', 'trans_start_month', 'trans_start_day'], 
                    as_index=False)[list_to_sum].sum()

In [184]:
data2

Unnamed: 0,pole_id,sub_area,trans_start_year,trans_start_month,trans_start_day,08:05:00,08:10:00,08:15:00,08:20:00,08:25:00,...,17:15:00 Occupancy,17:20:00 Occupancy,17:25:00 Occupancy,17:30:00 Occupancy,17:35:00 Occupancy,17:40:00 Occupancy,17:45:00 Occupancy,17:50:00 Occupancy,17:55:00 Occupancy,18:00:00 Occupancy
0,1-1004,1000 FIRST AVE,2017,7,1,1.0,0.0,0.0,0.0,0.0,...,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0
1,1-1004,1000 FIRST AVE,2017,7,3,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1-1004,1000 FIRST AVE,2017,7,5,0.0,0.0,1.0,0.0,0.0,...,3.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0
3,1-1004,1000 FIRST AVE,2017,7,6,2.0,0.0,0.0,1.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1-1004,1000 FIRST AVE,2017,7,7,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1-1004,1000 FIRST AVE,2017,7,8,0.0,0.0,0.0,0.0,0.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0
6,1-1004,1000 FIRST AVE,2017,7,10,1.0,0.0,0.0,0.0,5.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1-1004,1000 FIRST AVE,2017,7,11,1.0,0.0,0.0,0.0,0.0,...,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0
8,1-1004,1000 FIRST AVE,2017,7,12,0.0,0.0,0.0,0.0,0.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.0
9,1-1004,1000 FIRST AVE,2017,7,13,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [186]:
#save to local as progress backup:
data2.to_csv("SanDiegoMeterRevenuePrediction_data2.csv",sep=',',header=True, index=False)