# Quartier des Spectacles

---

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
qds1516 = pd.read_csv('../TxOccupation_QDS_2015et16.csv', sep=';', decimal=",")

# Get a first look

In [3]:
qds1516.head()

Unnamed: 0,NoTroncon,DescriptionTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation
0,1,Bleury entre Sherbrooke et Président-Kennedy -...,2015-01-01,9,0,0.0
1,1,Bleury entre Sherbrooke et Président-Kennedy -...,2015-01-01,10,0,0.0
2,1,Bleury entre Sherbrooke et Président-Kennedy -...,2015-01-01,11,0,0.0
3,1,Bleury entre Sherbrooke et Président-Kennedy -...,2015-01-01,12,0,0.0
4,1,Bleury entre Sherbrooke et Président-Kennedy -...,2015-01-01,13,0,0.0


In [4]:
# number of rows
len(qds1516)

1186356

# Remove columns, change data types

In [5]:
# remove some columns
qds1516.drop('DescriptionTroncon', 1, inplace=True)

In [6]:
# convert NoHeure and NoTroncon columns from int to category
qds1516.NoHeure = qds1516.NoHeure.astype('category')
qds1516.NoTroncon = qds1516.NoTroncon.astype('category')

In [7]:
# convert DateCreation from Object to Date
qds1516.DateCreation = pd.to_datetime(qds1516['DateCreation'])

---

In [8]:
qds1516.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1186356 entries, 0 to 1186355
Data columns (total 5 columns):
NoTroncon       1186356 non-null category
DateCreation    1186356 non-null datetime64[ns]
NoHeure         1186356 non-null category
NbMinDispo      1186356 non-null int64
TxOccupation    1186356 non-null float64
dtypes: category(2), datetime64[ns](1), float64(1), int64(1)
memory usage: 30.6 MB


# Removing rows with 0 occupancy rate
these row are probably errors or the street has been closed

In [9]:
#qds1516 = troncon402[troncon402['TxOccupation'] > 0]

In [9]:
qds1516.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1186356 entries, 0 to 1186355
Data columns (total 5 columns):
NoTroncon       1186356 non-null category
DateCreation    1186356 non-null datetime64[ns]
NoHeure         1186356 non-null category
NbMinDispo      1186356 non-null int64
TxOccupation    1186356 non-null float64
dtypes: category(2), datetime64[ns](1), float64(1), int64(1)
memory usage: 30.6 MB


---

In [11]:
# get stats on the columns
qds1516.describe()

Unnamed: 0,NbMinDispo,TxOccupation
count,1186356.0,1186356.0
mean,594.476,0.4145478
std,395.7266,0.3555239
min,0.0,0.0
25%,300.0,0.0167
50%,480.0,0.3762
75%,840.0,0.7333
max,2280.0,1.0


In [12]:
qds1516.tail()

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation
1186351,2919,2016-12-31,13,120,0.0
1186352,2919,2016-12-31,14,120,0.0
1186353,2919,2016-12-31,15,120,0.0
1186354,2919,2016-12-31,16,120,0.0
1186355,2919,2016-12-31,17,120,0.0


---

# Add a column: Day of Week

In [13]:
qds1516['DayOfWeek'] = qds1516['DateCreation'].dt.dayofweek

In [14]:
days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}

qds1516['DayOfWeek'] = qds1516['DayOfWeek'].apply(lambda x: days[x])

In [15]:
qds1516[100000:100002]

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation,DayOfWeek
100000,21,2016-08-18,10,480,0.8333,Thursday
100001,21,2016-08-18,11,480,0.7979,Thursday


---

# Add a column: Day of Year

In [22]:
qds1516['DayOfYear'] = qds1516['DateCreation'].dt.dayofyear

In [24]:
qds1516[0:2]

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation,DayOfWeek,DayOfYear
0,1,2015-01-01,9,0,0.0,Thursday,1
1,1,2015-01-01,10,0,0.0,Thursday,1


In [23]:
qds1516[100000:100002]

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation,DayOfWeek,DayOfYear
100000,21,2016-08-18,10,480,0.8333,Thursday,231
100001,21,2016-08-18,11,480,0.7979,Thursday,231


---

# 25 novembre 2015

In [20]:
OneDay = qds1516[ (qds1516['DateCreation'] == '2015-11-25') ]

In [23]:
OneDay

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation,DayOfWeek
3513,1,2015-11-25,9,1020,0.6824,Wednesday
3514,1,2015-11-25,10,1020,0.9627,Wednesday
3515,1,2015-11-25,11,1020,1.0000,Wednesday
3516,1,2015-11-25,12,1020,0.7304,Wednesday
3517,1,2015-11-25,13,1020,0.8765,Wednesday
3518,1,2015-11-25,14,1020,0.7971,Wednesday
3519,1,2015-11-25,15,1020,0.7098,Wednesday
3520,1,2015-11-25,16,1020,0.5588,Wednesday
3521,1,2015-11-25,17,1020,0.3873,Wednesday
3522,1,2015-11-25,18,1020,0.8873,Wednesday


---

# Print number of unique troncon

In [26]:
no_section_list = qds1516.NoTroncon.unique()

In [27]:
len(no_section_list)

161

In [28]:
print(no_section_list)

[1, 2, 3, 4, 5, ..., 2781, 2821, 2874, 2918, 2919]
Length: 161
Categories (161, int64): [1, 2, 3, 4, ..., 2821, 2874, 2918, 2919]


---

# Extract a specific section

In [34]:
troncon402 = qds1516[qds1516['NoTroncon'] == 402]

In [35]:
troncon402.head()

Unnamed: 0,NoTroncon,DateCreation,NoHeure,NbMinDispo,TxOccupation,DayOfWeek
274112,402,2015-01-01,15,0,0.0,Thursday
274113,402,2015-01-01,16,0,0.0,Thursday
274114,402,2015-01-01,17,0,0.0,Thursday
274115,402,2015-01-01,18,0,0.0,Thursday
274116,402,2015-01-01,19,0,0.0,Thursday


In [36]:
troncon402.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7916 entries, 274112 to 282027
Data columns (total 6 columns):
NoTroncon       7916 non-null category
DateCreation    7916 non-null datetime64[ns]
NoHeure         7916 non-null category
NbMinDispo      7916 non-null int64
TxOccupation    7916 non-null float64
DayOfWeek       7916 non-null object
dtypes: category(2), datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 339.1+ KB
