In [202]:
# Pedestrian Counting System
## Dependecy Modules: Pandas and NumPy
## Source data expected in as: 'data/data.csv' - relative to the working directory

## Have already done the data profiling (ref. Pedestrian_Counting_System_DataProfiling.ipynb). Going straight into the analysis

In [246]:
import pandas as pd

In [247]:
#create a dataframe with data to be analysed
df = pd.read_csv('data/data.csv')

In [248]:
#Review dataframe - data
df.head()

Unnamed: 0,ID,Date_Time,Year,Month,Mdate,Day,Time,Sensor_ID,Sensor_Name,Hourly_Counts
0,2887628,"November 01, 2019 05:00:00 PM",2019,November,1,Friday,17,34,Flinders St-Spark La,300
1,2887629,"November 01, 2019 05:00:00 PM",2019,November,1,Friday,17,39,Alfred Place,604
2,2887630,"November 01, 2019 05:00:00 PM",2019,November,1,Friday,17,37,Lygon St (East),216
3,2887631,"November 01, 2019 05:00:00 PM",2019,November,1,Friday,17,40,Lonsdale St-Spring St (West),627
4,2887632,"November 01, 2019 05:00:00 PM",2019,November,1,Friday,17,36,Queen St (West),774


In [249]:
#Review dataframe - total row count
df.shape[0]

4415574

In [250]:
#Review data - check uniqueness in column values
df.nunique()

ID               4415574
Date_Time         115465
Year                  14
Month                 12
Mdate                 31
Day                    7
Time                  24
Sensor_ID             82
Sensor_Name           94
Hourly_Counts       6410
dtype: int64

In [251]:
#From earlier data profiling, identified the DQ issue in sensor names...applying the fix before conducting the analysis

In [252]:
#To fix the DQ issue quickly, just going to standardise the values by find and replace the specific sensor names only          
## in total for 12 sensor ids - 46, 54, 64, 60, 69, 68, 67, 66, 72, 77,76,75

In [253]:
#DQ fix - fetch the sensor ids that need to be fixed
dq_sid_46 = df['Sensor_ID'] == 46
dq_sid_54 = df['Sensor_ID'] == 54
dq_sid_64 = df['Sensor_ID'] == 64
dq_sid_60 = df['Sensor_ID'] == 60
dq_sid_69 = df['Sensor_ID'] == 69
dq_sid_68 = df['Sensor_ID'] == 68
dq_sid_67 = df['Sensor_ID'] == 67
dq_sid_66 = df['Sensor_ID'] == 66
dq_sid_72 = df['Sensor_ID'] == 72
dq_sid_77 = df['Sensor_ID'] == 77
dq_sid_76 = df['Sensor_ID'] == 76
dq_sid_75 = df['Sensor_ID'] == 75

In [254]:
#DQ fix - update sensor names for the sensor ids that need to be fixed
df.loc[dq_sid_46, 'Sensor_Name'] = 'Pelham St (South)'
df.loc[dq_sid_54, 'Sensor_Name'] = 'Lincoln-Swanston (West)'
df.loc[dq_sid_64, 'Sensor_Name'] = 'Royal Pde - Grattan St'
df.loc[dq_sid_60, 'Sensor_Name'] = 'Flinders La - Swanston St (West) Temp'
df.loc[dq_sid_69, 'Sensor_Name'] = 'Flinders Ln - Degraves St (Crossing)'
df.loc[dq_sid_68, 'Sensor_Name'] = 'Flinders Ln - Degraves St (North)'
df.loc[dq_sid_67, 'Sensor_Name'] = 'Flinders Ln - Degraves St (South)'
df.loc[dq_sid_66, 'Sensor_Name'] = 'State Library - New'
df.loc[dq_sid_72, 'Sensor_Name'] = 'Flinders St - ACMI'
df.loc[dq_sid_77, 'Sensor_Name'] = 'Harbour Esplanade (West) - Pedestrian Pa'
df.loc[dq_sid_76, 'Sensor_Name'] = 'Macaulay Rd - Bellair St'
df.loc[dq_sid_75, 'Sensor_Name'] = 'Spring St - Flinders St (West)'

In [255]:
#DQ fix - checking whether sensor id and name count matches after the fix
df.nunique()

ID               4415574
Date_Time         115465
Year                  14
Month                 12
Mdate                 31
Day                    7
Time                  24
Sensor_ID             82
Sensor_Name           82
Hourly_Counts       6410
dtype: int64

In [256]:
#From earlier data investigaiton, identified that duplicates rows exist for a given date time, sensor id and name

## not enough information to handle these duplicates. So going to keep these rows and proceeding the analysis


In [285]:
# Analysis start

# 1

## Top 10 (most pedestrians) locations by day

##   Going to get to do sum of 'Hourly_Counts' for each day of the week (Sunday, Monday and so on) for location (Sensor Name)
##   Get the top 10 largest values in hourly count

##     To repeat the above for each day of the week, funtion will implemented for reusability

In [265]:
#function to return top ten pedestrian loc by day

def top_pedestrain_loc_by_day(dataframe,day):
    
    #filter df by day
    df_day = dataframe[dataframe.Day == day]
        
    #group by sensor name and get sum of pedestrians, sort by largest and return top 10 value
    return df_day.groupby(['Sensor_Name'])['Hourly_Counts'].sum().sort_values(ascending=False).head(10)
    


In [282]:
from IPython.display import display

In [283]:
#Top 10 (most pedestrians) locations by days
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday','Thursday','Friday','Saturday']
for day in days:
    print('Top 10 pedestrian count by days: For - ' + day)
    display(top_pedestrain_loc_by_day(df,day))

Top 10 pedestrian count by days: For - Sunday


Sensor_Name
Town Hall (West)                     18172940
Melbourne Central                    15659936
Princes Bridge                       15588544
Bourke Street Mall (North)           13719617
Bourke Street Mall (South)           13474209
Flinders Street Station Underpass    13016524
Flinders St-Elizabeth St (East)       9225154
Southbank                             9199517
The Arts Centre                       8963423
State Library                         8931922
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Monday


Sensor_Name
Town Hall (West)                     19272251
Flinders Street Station Underpass    17820508
Melbourne Central                    15661642
Bourke Street Mall (South)           14738561
Princes Bridge                       14581093
Bourke Street Mall (North)           14281681
Spencer St-Collins St (North)        11849061
Flinders St-Elizabeth St (East)      11537518
State Library                        10339353
Flagstaff Station                     9654896
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Tuesday


Sensor_Name
Town Hall (West)                     19457168
Flinders Street Station Underpass    18537064
Melbourne Central                    16161824
Bourke Street Mall (South)           14480606
Princes Bridge                       14460721
Bourke Street Mall (North)           14337781
Spencer St-Collins St (North)        12623319
Flinders St-Elizabeth St (East)      11864474
State Library                        10617852
Flagstaff Station                    10430342
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Wednesday


Sensor_Name
Town Hall (West)                     20360823
Flinders Street Station Underpass    19083684
Melbourne Central                    16753294
Princes Bridge                       15665642
Bourke Street Mall (South)           15396248
Bourke Street Mall (North)           15112531
Spencer St-Collins St (North)        13004087
Flinders St-Elizabeth St (East)      12031778
State Library                        11000867
Flagstaff Station                    10553562
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Thursday


Sensor_Name
Town Hall (West)                     21187600
Flinders Street Station Underpass    19496066
Melbourne Central                    17114784
Bourke Street Mall (South)           16378446
Bourke Street Mall (North)           16037716
Princes Bridge                       15776348
Spencer St-Collins St (North)        13007689
Flinders St-Elizabeth St (East)      12088848
State Library                        11091657
Southern Cross Station               10465545
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Friday


Sensor_Name
Town Hall (West)                     24393876
Flinders Street Station Underpass    21320727
Melbourne Central                    19993700
Bourke Street Mall (South)           18881248
Princes Bridge                       18484231
Bourke Street Mall (North)           18445974
Spencer St-Collins St (North)        13058283
Flinders St-Elizabeth St (East)      12619633
State Library                        12265360
Southbank                            11080180
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by days: For - Saturday


Sensor_Name
Town Hall (West)                     22499236
Melbourne Central                    19171957
Princes Bridge                       18369336
Bourke Street Mall (South)           17095304
Bourke Street Mall (North)           16862095
Flinders Street Station Underpass    15417211
Southbank                            11062405
State Library                        10673040
Flinders St-Elizabeth St (East)      10583641
The Arts Centre                       9883781
Name: Hourly_Counts, dtype: int64

In [284]:
# 2

## Top 10 (most pedestrians) locations by month

##   Going to get to do sum of 'Hourly_Counts' for each month (January, February and so on) for location (Sensor Name)
##   Get the top 10 largest values in hourly count

##     To repeat the above for each month, funtion will implemented for reusability

In [286]:
#function to return top ten pedestrian loc by month

def top_pedestrain_loc_by_month(dataframe,month):
    
    #filter df by month
    df_month = dataframe[dataframe.Month == month]
        
    #group by sensor name and get sum of pedestrians, sort by largest and return top 10 value
    return df_month.groupby(['Sensor_Name'])['Hourly_Counts'].sum().sort_values(ascending=False).head(10)
    


In [287]:
#Top 10 (most pedestrians) locations by month
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October','November','December']
for month in months:
    print('Top 10 pedestrian count by months: For - ' + month)
    display(top_pedestrain_loc_by_month(df,month))

Top 10 pedestrian count by months: For - January


Sensor_Name
Town Hall (West)                     11967639
Princes Bridge                       10796073
Flinders Street Station Underpass    10279628
Bourke Street Mall (South)            9106886
Melbourne Central                     8433264
Bourke Street Mall (North)            8136291
Flinders St-Elizabeth St (East)       6307184
Southbank                             6279974
The Arts Centre                       6214671
Spencer St-Collins St (North)         6096117
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - February


Sensor_Name
Town Hall (West)                     11156945
Flinders Street Station Underpass    10292516
Princes Bridge                        9493960
Melbourne Central                     9004266
Bourke Street Mall (North)            8335328
Bourke Street Mall (South)            8134134
Spencer St-Collins St (North)         6591467
Flinders St-Elizabeth St (East)       6547085
State Library                         5708978
The Arts Centre                       5657723
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - March


Sensor_Name
Town Hall (West)                     12655259
Melbourne Central                    11737486
Flinders Street Station Underpass    11419121
Princes Bridge                       11001403
Bourke Street Mall (North)            9643824
Bourke Street Mall (South)            9017586
Flinders St-Elizabeth St (East)       7566831
Spencer St-Collins St (North)         7149300
State Library                         7091544
Southbank                             5930487
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - April


Sensor_Name
Town Hall (West)                     12463968
Melbourne Central                    10862976
Princes Bridge                       10660422
Flinders Street Station Underpass    10135936
Bourke Street Mall (North)            9194871
Bourke Street Mall (South)            8815472
Flinders St-Elizabeth St (East)       7561340
Spencer St-Collins St (North)         6215026
State Library                         6159571
The Arts Centre                       6090009
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - May


Sensor_Name
Town Hall (West)                     11591495
Flinders Street Station Underpass    10787077
Melbourne Central                    10615667
Bourke Street Mall (North)            9774547
Princes Bridge                        9561337
Bourke Street Mall (South)            8901477
Flinders St-Elizabeth St (East)       7592610
State Library                         6580684
Spencer St-Collins St (North)         6009446
Southbank                             5862252
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - June


Sensor_Name
Town Hall (West)                     11934368
Melbourne Central                    10287904
Flinders Street Station Underpass     9867379
Bourke Street Mall (North)            9455301
Princes Bridge                        8997795
Bourke Street Mall (South)            8818621
Flinders St-Elizabeth St (East)       6549459
Spencer St-Collins St (North)         6139724
State Library                         5463552
Southbank                             5242526
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - July


Sensor_Name
Town Hall (West)                     12555809
Melbourne Central                    11224490
Flinders Street Station Underpass    10808119
Bourke Street Mall (North)            9677482
Bourke Street Mall (South)            9499308
Princes Bridge                        9159764
Spencer St-Collins St (North)         6588137
State Library                         6434716
Southbank                             5903894
Flinders St-Elizabeth St (East)       5883997
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - August


Sensor_Name
Town Hall (West)                     11675587
Melbourne Central                    10477135
Flinders Street Station Underpass     9214815
Bourke Street Mall (North)            8299742
Bourke Street Mall (South)            8282631
Princes Bridge                        7861189
State Library                         6404288
Spencer St-Collins St (North)         5933432
Flinders St-Elizabeth St (East)       4805402
Flagstaff Station                     4648096
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - September


Sensor_Name
Town Hall (West)                     11475209
Melbourne Central                     9902555
Flinders Street Station Underpass     9602620
Bourke Street Mall (South)            8286561
Bourke Street Mall (North)            8192967
Princes Bridge                        7823066
Flinders St-Elizabeth St (East)       6435942
State Library                         6321620
Spencer St-Collins St (North)         6126071
The Arts Centre                       4742644
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - October


Sensor_Name
Town Hall (West)                     11152763
Flinders Street Station Underpass    10333881
Bourke Street Mall (North)            8737215
Bourke Street Mall (South)            8675275
Melbourne Central                     8260476
Princes Bridge                        7877358
State Library                         6942115
Spencer St-Collins St (North)         6728313
Flinders St-Elizabeth St (East)       5860962
Southbank                             5276274
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - November


Sensor_Name
Town Hall (West)                     12136661
Flinders Street Station Underpass    10598560
Bourke Street Mall (South)            9772438
Melbourne Central                     9752026
Princes Bridge                        8942629
Bourke Street Mall (North)            8871745
Flinders St-Elizabeth St (East)       7197048
Spencer St-Collins St (North)         6661345
State Library                         6630549
Southbank                             5660775
Name: Hourly_Counts, dtype: int64

Top 10 pedestrian count by months: For - December


Sensor_Name
Town Hall (West)                     14578191
Bourke Street Mall (South)           13134233
Flinders Street Station Underpass    11352132
Princes Bridge                       10750919
Bourke Street Mall (North)           10478082
Melbourne Central                     9958892
Flinders St-Elizabeth St (East)       7643186
Southbank                             7087552
State Library                         6079839
Spencer St-Collins St (North)         5946613
Name: Hourly_Counts, dtype: int64

In [288]:
# 3

## location has shown most decline due to lockdowns in last 2 years (Comparing Year 2020 and 2021 )

## Approach:
##  1. Funtion will implemented for reusability to filter dataset by reading year
##  2. Since not all locations were common between the years,for this analysis only common locations between the years will used
##  3. New column with change in Pedestrian count for locations between the years will be created
##  4. From this new column, the location for year 2021 with most drop in pedesrians will be identified 


In [289]:
#function to return pedestrian count by loc for given year
def loc_pedestrian_count_year(dataframe, year):

    dataframe = dataframe[dataframe.Year == year]

    df_agg = dataframe.groupby(['Sensor_Name']).agg({'Hourly_Counts':sum})
    
    return df_agg

In [290]:
#get predestrain count for year 2020

pd_count_2020 = loc_pedestrian_count_year(df,2020)
pd_count_2020 = pd_count_2020.reset_index()

sensor_name_2020 = pd_count_2020['Sensor_Name'].tolist()

In [291]:
#get predestrain count for year 2021

pd_count_2021 = loc_pedestrian_count_year(df,2021)
pd_count_2021 = pd_count_2021.reset_index()


sensor_name_2021 = pd_count_2021['Sensor_Name'].tolist()

In [293]:
#keeping only locations common between year 2020 and 2021

sensor_names_common = list(set(sensor_name_2020).intersection(sensor_name_2021))
pd_count_2020 = pd_count_2020.loc[pd_count_2020['Sensor_Name'].isin(sensor_names_common)]
pd_count_2021 = pd_count_2021.loc[pd_count_2021['Sensor_Name'].isin(sensor_names_common)]

pd_count_2020.reset_index(drop=True, inplace=True)
pd_count_2021.reset_index(drop=True, inplace=True)

In [158]:
#confirming row count and columns are consistent between 2020 and 2021 dataframe

In [294]:
print(pd_count_2020)

                       Sensor_Name  Hourly_Counts
0                    231 Bourke St        1342643
1                     Alfred Place         659680
2                   Birrarung Marr        1502283
3   Bourke St - Spencer St (North)        2474396
4   Bourke St - Spencer St (South)          58787
..                             ...            ...
61                Town Hall (West)        5260684
62                  Victoria Point         528236
63                 Waterfront City         389700
64                     Webb Bridge        1107272
65                  Westwood Place          41643

[66 rows x 2 columns]


In [295]:
print(pd_count_2021)

                       Sensor_Name  Hourly_Counts
0                    231 Bourke St        2099566
1                     Alfred Place         461953
2                   Birrarung Marr        1272681
3   Bourke St - Spencer St (North)        2316080
4   Bourke St - Spencer St (South)         130735
..                             ...            ...
61                Town Hall (West)        6155040
62                  Victoria Point         309276
63                 Waterfront City         374339
64                     Webb Bridge        1149161
65                  Westwood Place         261172

[66 rows x 2 columns]


In [296]:
#creating new column in 2021 df with change in pedestrain count comparing 2021 and 2020 
import numpy as np
pd_count_2021['diff'] = np.where(pd_count_2021['Hourly_Counts'] == pd_count_2020['Hourly_Counts'], 0, pd_count_2021['Hourly_Counts'] - pd_count_2020['Hourly_Counts'])

In [297]:
#Getting the most drop in pedestrian count for year 2021
pd_count_2021.sort_values(by='diff').head()

Unnamed: 0,Sensor_Name,Hourly_Counts,diff
22,Flinders St-Elizabeth St (East),5852590,-1507594
34,Melbourne Central,3327672,-814767
51,Spencer St-Collins St (North),3163176,-785310
50,Southern Cross Station,1142488,-623496
28,Little Collins St-Swanston St (East),2396703,-515796


In [298]:
#4.

## location that has most growth in last year - 2021

pd_count_2021.sort_values(by='diff', ascending=False).head()

Unnamed: 0,Sensor_Name,Hourly_Counts,diff
55,State Library - New,3994935,2188455
18,Flinders La-Swanston St (West),8187673,1489762
21,Flinders Ln - Degraves St (South),1928889,1486555
58,Swanston St - RMIT Building 80,2011218,1339490
56,Swanston St - City Square,2195660,1206102
