In [1]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pandas.plotting import scatter_matrix
from sklearn import cluster
from scipy import stats  ## for z-score. do we still need it?

In [2]:
# Read Scan On Data
scanOn_samp0 = pd.read_csv('all_scanOn_samp0.csv')

scanOn_samp0.head()

Unnamed: 0,Mode,BusinessDate,DateTime,CardID,CardType,VehicleID,ParentRoute,RouteID,StopID
0,1,2015-07-02,2015-07-02 13:08:13,1524480,9,1222,415,10883,15084
1,3,2015-07-02,2015-07-02 07:33:58,1756270,2,1091,24,15296,18566
2,1,2015-07-02,2015-07-02 16:42:38,10560630,2,1469,862,10227,19824
3,1,2015-07-01,2015-07-01 11:07:08,11812440,2,2886,458,8591,21184
4,1,2015-07-01,2015-07-01 17:28:14,12272500,2,2853,670,16447,21296


In [3]:
# Read card types
column_names = ("Card_SubType_ID","Card_SubType_Desc","Payment_Type","Fare_Type","Concession_Type","MI_Card_Group")
card_types = pd.read_csv('card_types.txt',sep="|", names=column_names)

card_types.head()

Unnamed: 0,Card_SubType_ID,Card_SubType_Desc,Payment_Type,Fare_Type,Concession_Type,MI_Card_Group
0,62,Pensioner Concession Card holder PC,Paid,Concession,Other Concession,Other Concession
1,29,Vic HCC - Sickness allowance,Paid,Concession,Other Concession,Other Concession
2,43,First Class Pass,Free,Concession,Free Pass,Other
3,34,PCC - Widow allowance,Paid,Concession,Other Concession,Other Concession
4,47,Victoria Police Travel Authority,Free,Concession,Free Pass,Other


In [4]:
# Read stop locations
column_names = ("StopLocationID","StopNameShort","StopNameLong","StopType","SuburbName","PostCode","RegionName","LocalGovernmentArea","StatDivision","GPSLat","GPSLong")
stop_locations = pd.read_csv('stop_locations.txt',sep="|", names=column_names)

stop_locations.head()

Unnamed: 0,StopLocationID,StopNameShort,StopNameLong,StopType,SuburbName,PostCode,RegionName,LocalGovernmentArea,StatDivision,GPSLat,GPSLong
0,867,Weemala Court,Weemala Ct/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088.0,Melbourne,Banyule,Greater Metro,-37.689596,145.105088
1,868,Crana Grove,Crana Gr/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088.0,Melbourne,Banyule,Greater Metro,-37.686742,145.105588
2,869,Punkerri Circuit,Punkerri Cct/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088.0,Melbourne,Banyule,Greater Metro,-37.683643,145.108743
3,870,Plenty River Drive,231 Plenty River Dr (Greensborough),Kerbside,Greensborough,3088.0,Melbourne,Banyule,Greater Metro,-37.682591,145.111331
4,875,Oldstead Rd,Oldstead Rd/Diamond Creek Rd (Greensborough),Kerbside,Greensborough,3088.0,Melbourne,Banyule,Greater Metro,-37.685336,145.117319


In [5]:
# Read calendar
column_names = ("Date","CalendarYear","FinancialYear","FinancialMonth","CalendarMonth","CalendarMonthSeq","CalendarQuarter","FinancialQuarter","CalendarWeek","FinancialWeek","DayType""DayTypeCategory","WeekdaySeq","WeekDay","FinancialMonthSeq","FinancialMonthName","MonthNumber","ABSWeek","WeekEnding","QuarterName")
calendar = pd.read_csv('calendar.txt',sep="|", names=column_names)

calendar.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Date,CalendarYear,FinancialYear,FinancialMonth,CalendarMonth,CalendarMonthSeq,CalendarQuarter,FinancialQuarter,CalendarWeek,FinancialWeek,DayTypeDayTypeCategory,WeekdaySeq,WeekDay,FinancialMonthSeq,FinancialMonthName,MonthNumber,ABSWeek,WeekEnding,QuarterName
20170930,2017-09-30,2017,FY2017 - 2018,9,September,201709,2017Q3,FY17-18Q1,39,13,Saturday,Weekend,Saturday,6,Saturday,201709,Sep 17/,9,1239,w/e 2017-09-30,September Qtr. 2017
20080615,2008-06-15,2008,FY2007 - 2008,18,June,200806,2008Q2,FY07-08Q4,24,50,Sunday,Weekend,Sunday,7,Sunday,200818,Jun 07/,6,755,w/e 2008-06-21,June Qtr. 2008
20040222,2004-02-22,2004,FY2003 - 2004,14,February,200402,2004Q1,FY03-04Q3,8,34,Sunday,Weekend,Normal Sunday,7,Sunday,200414,Feb 03/,2,530,w/e 2004-02-28,March Qtr. 2004
20190620,2019-06-20,2019,FY2018 - 2019,18,June,201906,2019Q2,FY18-19Q4,24,51,Weekday,Weekday,0,4,Thursday,201906,Jun 18/,6,1329,w/e 2019-06-22,June Qtr. 2019
20170831,2017-08-31,2017,FY2017 - 2018,8,August,201708,2017Q3,FY17-18Q1,35,9,Weekday,Weekday,Normal Weekday,4,Thursday,201708,Aug 17/,8,1235,w/e 2017-09-02,September Qtr. 2017
