# Parsing
## Capacity

In [None]:
#Formatter for capacity analysis

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
car_park={
         "Paseo":0,
         "Marengo":1,
         "Los Robles":2,
         "Holly":3,
         "Del Mar":4}

#column dict
cols=["Park Trans #",
          "Car Park #",
          "Entry Time",
          "Exit Time",
          "Net Price",
          "Net Turnover",
         ]
col_use=cols[1:]

#get rid of parking transaction number

path="D:\Datasets\Del Mar Goldline Station\parsed_mar\\"
all_ent_ppa='all_ENT_PPA.csv'

data=pd.read_csv(path+all_ent_ppa,header=None,names=cols,usecols=col_use)

In [None]:
#convert Nans to -1
data.fillna(-1,inplace=True)

#unknown transaction
data['Unknown'] = [1 if x==-1 else 0 for x in data['Net Price']]

#free
data['Free'] = [1 if x==0 else 0 for x in data['Net Price']]

# add new column -> 0 if full price, 1 if discounted
# no overlap with unknown
# no overlap with free
data['Discounted'] = data.apply(lambda x : 1 if x['Net Price'] != x['Net Turnover'] and x['Unknown']==0 and x['Free']==0 else 0, axis=1)

#full price
data['Full Price'] = (data['Discounted']+data['Free']+data['Unknown']+1)%2

#to datetime
data['Entry Time']=pd.to_datetime(data['Entry Time'],format='%Y-%m-%d %H:%M:%S')
data['Exit Time']=pd.to_datetime(data['Exit Time'],format='%Y-%m-%d %H:%M:%S')

#remove nonsensical entires with 1994 in Entry or Exit Time
data=data[data['Entry Time']!=pd.to_datetime('1994-01-01 00:00:00')]
data=data[data['Exit Time']!=pd.to_datetime('1994-01-01 00:00:00')]

In [None]:
park=[]
for i in car_park.iterkeys():
    print "Starting "+i
    #separate columns
    temp_entry=data[data['Car Park #']==car_park[i]][["Entry Time","Unknown","Free","Discounted","Full Price"]]
    temp_exit=data[data['Car Park #']==car_park[i]][["Exit Time","Unknown","Free","Discounted","Full Price"]]
    #add a 1 for entry and -1 for exit
    temp_entry.insert(1,'Entry/Exit',pd.Series([1]*temp_entry.shape[0], index=temp_entry.index))
    temp_exit.insert(1,'Entry/Exit',pd.Series([-1]*temp_exit.shape[0], index=temp_entry.index))
    #rename
    temp_entry.columns=['Time','Entry/Exit',"Unknown","Free","Discounted","Full Price"]
    temp_exit.columns=['Time','Entry/Exit',"Unknown","Free","Discounted","Full Price"]
    #concat
    temp=pd.concat([temp_entry,temp_exit])
    temp['Entry/Exit'].astype(int)
    temp.sort_values(['Time'],inplace=True)
    park.append(temp)
    #writeout
    temp.to_csv(path+i+'_inout.csv',index=False)
    print i+" done."

# Plotting

In [4]:
#TODO: Try removing circling people from data

#plotting
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import datetime
matplotlib.style.use('ggplot')

path="D:\Datasets\Del Mar Goldline Station\parsed_mar\\"

capacity=600

parking_name='del_mar'

car_types=['All Cars','Unknown','Free','Discounted','Full Price']
colnames=['datetime']+car_types

#import
df=pd.read_csv(path+parking_name+'_flow.csv',header=None,names=colnames)
df['datetime']=pd.to_datetime(df['datetime'],format='%Y-%m-%d %H:%M:%S')

#remove march and june from 2016
df=df.loc[(df['datetime'].dt.year==2015) | ((df['datetime'].dt.month!=3) & (df['datetime'].dt.month!=4))]

In [5]:
matplotlib.rcParams.update({'axes.titlesize': 24})
matplotlib.rcParams.update({'axes.labelsize': 22})
matplotlib.rcParams.update({'xtick.labelsize': 16})
matplotlib.rcParams.update({'ytick.labelsize': 16})
matplotlib.rcParams.update({'legend.fontsize': 16})
matplotlib.rcParams.update({'savefig.dpi': 220})

In [12]:
mini=df.loc[(df['datetime'].dt.year==2015) & ((df['datetime'].dt.month>7) & (df['datetime'].dt.day>0))]
min(mini['All Cars'])

179

## Mean Usage By Month

In [None]:
#separate out month
by_month=df[car_types].copy()
temp = pd.DatetimeIndex(df['datetime'])
by_month['Month'] = temp.month

#group by month
grouped = by_month.groupby('Month')
bar_data = grouped.aggregate(np.mean)/capacity*100

#month names for axis
months = []
for i in range(1,13):
    months.append(datetime.date(2008, i, 1).strftime('%B'))
bar_data.set_index(pd.Series(months),inplace=True)
    
    
bar_data.plot.bar(stacked=True,y=car_types[2:])
plt.title("Mean Parking Structure Usage By Month")
plt.xlabel("")
plt.gcf().subplots_adjust(bottom=0.15)
plt.ylabel("Percent Usage (%)")
plt.show()

## Plots of Daily Capacity

In [6]:
#separate out time
day_data=df[car_types].copy()
temp = pd.DatetimeIndex(df['datetime'])
day_data['time'] = temp.time
day_data['date'] = temp.date
day_data['month'] = temp.month

scatter_all=pd.pivot_table(day_data, index='time', columns='date',values='All Cars')
scatter_full=pd.pivot_table(day_data, index='time', columns='date',values='Full Price')
scatter_disc=pd.pivot_table(day_data, index='time', columns='date',values='Discounted')

#sample every nth
n=10

scatter_all.iloc[:,::n].plot(legend=False)
plt.show()

In [9]:
grouped = day_data.groupby(['time','month'])
day_month = grouped.aggregate(np.mean)/capacity*100

#unstack
day_month=day_month.unstack()
day_month_all=day_month['All Cars']
day_month_all.columns=months

day_month_all.plot(linewidth=3)
plt.title("Mean Parking Structure Usage During the Day")
plt.xlabel("Time")
plt.ylabel("Percent Usage (%)")
plt.legend(loc=3)
plt.show()

NameError: name 'months' is not defined

In [8]:
grouped = day_data[['time']+car_types].groupby(['time'])
day_avg = grouped.aggregate(np.mean)/capacity*100

day_avg.plot.area(y=car_types[2:])
plt.title("Mean Parking Structure Usage by Price Type During the Day")
plt.xlabel("Time")
plt.ylabel("Percent Usage (%)")
plt.show()

In [None]:
day_month_all.iloc[50:100]

## Plots of Weekly Capacity

In [None]:
#weekday name list
wkdy_name=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']

#separate out time
week_data=df[car_types].copy()
temp = pd.DatetimeIndex(df['datetime'])
week_data['time'] = temp.time
week_data['date'] = temp.date
week_data['weekday'] = df['datetime'].apply(lambda x: wkdy_name[x.weekday()])

grouped = week_data.groupby(['weekday'])
week_avg = grouped.aggregate(np.mean)/capacity*100
week_avg=week_avg.reindex(wkdy_name) #get right order

week_avg.plot.bar(stacked=True,y=car_types[2:])
plt.title("Mean Parking Structure Usage by Weekday")
plt.xlabel("")
plt.gcf().subplots_adjust(bottom=0.15)
plt.ylabel("Percent Usage (%)")
plt.show()

In [None]:
week_data=df[car_types].copy()
temp = pd.DatetimeIndex(df['datetime'])
week_data['time'] = temp.time
week_data['date'] = temp.date
week_data['weekday'] = df['datetime'].apply(lambda x: x.weekday())

grouped=week_data.groupby(['weekday','time'])
week_avg=grouped.aggregate(np.mean)/capacity*100
week_avg=week_avg.reset_index()
week_avg=week_avg.sort_values(['weekday','time'])
week_avg['weekday'] = week_avg['weekday'].apply(lambda x: wkdy_name[x])

ax=week_avg.plot.area(x=['time','weekday'],y=car_types[2:])
#ax.set_xticks(ax.get_xticks()[::1])
plt.title("Mean Parking Structure Usage in a Week")
plt.xlabel("(Time,Day)")
plt.ylabel("Percent Usage (%)")
plt.show()

In [None]:
week_avg[week_avg['weekday']=='Sunday'].plot.area(x=['time'],y=car_types[2:])
plt.show()

## Plots of Monthly Capacity

In [None]:
#separate out time
month_data=df[car_types].copy()
temp = pd.DatetimeIndex(df['datetime'])
month_data['day'] = temp.day
month_data['month'] = temp.month

grouped = month_data.groupby(['day'])
in_month_avg = grouped.aggregate(np.mean)/capacity*100

in_month_avg.plot.area(stacked=True,y=car_types[2:])
plt.title("Mean Parking Structure Usage in a Month")
plt.xlabel("Day")
plt.ylabel("Percent Usage (%)")
plt.show()

In [None]:
#Gross

grouped = month_data.groupby(['day','month'])
each_month = grouped.aggregate(np.mean)

#unstack
each_month=each_month.unstack()
each_month=each_month['All Cars']
each_month.columns=months

each_month.plot()
plt.title("Mean Parking Structure Usage in a Month")
plt.xlabel("Day")
plt.ylabel("Number of Cars")
plt.show()

## Histogram of Entry Frequency

In [None]:
hist_data=df[car_types[2:]]

hist_data[car_types[2:]].plot.hist(alpha=0.8,bins=50)
plt.title("Entry Frequency")
plt.xlabel("Number of Cars/10 Minutes")
plt.ylabel("Frequency")
plt.show()

In [None]:
hist_data

In [None]:
#looking only at September
#del_mar=del_mar.loc[(del_mar['datetime'].dt.month==12) & (del_mar['datetime'].dt.day==25),]

#split date time
#temp = pd.DatetimeIndex(del_mar['datetime'])
#del_mar['Date'] = temp.date
#del_mar['Time'] = temp.time

#group by time and average
#grouped = del_mar.groupby('Date')
#time=grouped.aggregate(np.mean)

del_mar.plot(x=del_mar['datetime'])
plt.show()

aug=del_mar()
#aug.plot(x=aug['datetime'])
#plt.show()

#del_mar.loc[(del_mar['datetime'].dt.month==12) & (del_mar['datetime'].dt.day==25),]