In [1]:
import pandas as pd
import numpy as np
import datetime

In [2]:
# method returning graph showing how many journeys were started by day of the week

# method that takes a string name of a csv file in the same folder and
# returns a DF of the csv contents
def getDFfromCSV(csvFileName):
    weeklyDF = pd.read_csv(csvFileName)
    return weeklyDF

## Method returning a DF of journeys started in this week
### Columns ordered by day : "StartStation Id", "StartStation Name", "Start Date"

In [3]:
def toWeeklyStartedDF(weeklyDF):
    # New DataFrame from extracted Start Station Data from original data Set
    startedJourneys = pd.DataFrame(weeklyDF[["StartStation Id", "StartStation Name", "Start Date"]])
    
    # sort the bike rides in chronological order for the first week of January
    # format date column into datetime variable
    startedJourneys["Start Date"] = pd.to_datetime(startedJourneys["Start Date"], format='%d/%m/%Y %H:%M')
    startedJourneys.sort_values(by= "Start Date", inplace=True)
    
    return startedJourneys

In [4]:
def toWeeklyEndedDF(weeklyDF):
    # New DataFrame from extracted End Station Data from original data Set
    endedJourneys = pd.DataFrame(weeklyDF[["EndStation Id", "EndStation Name", "End Date"]])
    
    # sort the bike rides in chronological order for the first week of January
    # format date column into datetime variable
    endedJourneys["End Date"] = pd.to_datetime(endedJourneys["End Date"], format='%d/%m/%Y %H:%M')
    endedJourneys.sort_values(by= "End Date", inplace=True)
    
    return endedJourneys

In [5]:
# Create a day and date column
def giveDFDateDayNameColumns(startedJourneys, columnName):
     # Create a new date and Day Name column that separates data per day
    startedJourneys["Day/Month"] = startedJourneys[columnName].apply(lambda x: "%d/%d/%d" % (x.day, x.month,x.year))
    startedJourneys["Day/Month"] = startedJourneys[columnName].dt.date
    startedJourneys["Day Name"] = startedJourneys[columnName].apply(lambda x: "%a" % (x.day_name()))
    return startedJourneys

In [6]:
# demandByTime - bike borrow requests per day
def toGroupByDayDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    startedJourneys = giveDFDateDayNameColumns(startedJourneys, "Start Date")
    counterDailyJourneyStarts = startedJourneys.groupby(["Day/Month"], as_index=False).size()
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    return counterDailyJourneyStarts

In [7]:
# demandByTime - bike return requests per day
def toGroupByDayDFEnd(endedJourneys):
    # Create a new column that separeates data per day and counts how many bike return requests
    endedJourneys = giveDFDateDayNameColumns(endedJourneys, "End Date")
    counterDailyJourneyEnds = endedJourneys.groupby(["Day/Month"], as_index=False).size()
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    return counterDailyJourneyEnds

In [8]:
# Journeys started per hour per day starting Monday-Sunday
def toGroupByDayByHourDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    #startedJourneys["Day/Month"] = startedJourneys["Start Date"].apply(lambda x: "%d/%d %a" % (x.day, x.month, x.day_name()))
    hourlyCountsSeries = startedJourneys.groupby([startedJourneys["Start Date"].dt.weekday.rename("Day"), startedJourneys["Start Date"].dt.hour.rename("Hour")]).size()
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0).reset_index()                             
    hourlyCountsDF.columns = ["Day of Week", "Hour", "Number of Journeys Started"]                           
    
    #hourlyCountsDF =hourlyCountsSeries.to_frame('size')
    #hourlyCountsSeries = hourlyCountsSeries["Start Date", "Start Date", "StartStation Id"]
    #hourlyCountsDF.index.names = ["Start Date1", "Start Date2","size"]
    #hourlyCountsDF.columns = hourlyCountsDF.columns.droplevel(0)
    
    #hourlyCountsSeries = hourlyCountsSeries.reset_index(name="new name")
    #df = pd.DataFrame(hourlyCountsSeries).reset_index()
    #df.columns = ['Day', 'Hour', "Num Journeys Started"]
    return hourlyCountsDF

In [9]:
def createAllHoursTotalDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    #startedJourneys["Day/Month"] = startedJourneys["Start Date"].apply(lambda x: "%d/%d %a" % (x.day, x.month, x.day_name()))
    hourlyCountsSeries = startedJourneys.groupby([startedJourneys["Start Date"].dt.hour.rename("Hour")]).size()
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0)                             
    hourlyCountsDF.columns = ["Hour", "Total Number of Journeys Started"]                           
    return hourlyCountsDF

In [10]:
# Returns the mean demand for each hour Mon - Fri 
def createAllHoursMonToFriMeanDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    #startedJourneys["Day/Month"] = startedJourneys["Start Date"].apply(lambda x: "%d/%d %a" % (x.day, x.month, x.day_name()))
    startedJourneysMonToFri = startedJourneys.loc[startedJourneys["Day Name"].isin(["'Monday'","'Tuesday'","'Wednesday'","'Thursday'","'Friday'"])]
    #startedJourneysMonToFri = startedJourneys[(startedJourneys["Day Name"]== 'Monday') | (startedJourneys["Day Name"] == 'Tuesday')]
    
    hourlyCountsSeries = startedJourneysMonToFri.groupby([startedJourneysMonToFri["Start Date"].dt.hour.rename("Hour")]).size()/5
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0)                             
    hourlyCountsDF.columns = ["Hour", "Total Number of Journeys Started"]                           
    return hourlyCountsDF

In [11]:
# Returns the mean demand for each hour for the Weekend
def createAllHoursSatSunTotalDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    #startedJourneys["Day/Month"] = startedJourneys["Start Date"].apply(lambda x: "%d/%d %a" % (x.day, x.month, x.day_name()))
    startedJourneysMonToFri = startedJourneys.loc[~startedJourneys["Day Name"].isin(["'Monday'","'Tuesday'","'Wednesday'","'Thursday'","'Friday'"])]
    #startedJourneysMonToFri = startedJourneys[(startedJourneys["Day Name"]== 'Monday') | (startedJourneys["Day Name"] == 'Tuesday')]
    
    hourlyCountsSeries = startedJourneysMonToFri.groupby([startedJourneysMonToFri["Start Date"].dt.hour.rename("Hour")]).size()/2
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0)                             
    hourlyCountsDF.columns = ["Hour", "Total Number of Journeys Started"]                           
    return hourlyCountsDF

In [12]:
def createAllHoursMeanDF(startedJourneys):
    # Create a new column that separeates data per day and counts how many bike borrow requests
    #startedJourneys["Day/Month"] = startedJourneys["Start Date"].apply(lambda x: "%d/%d %a" % (x.day, x.month, x.day_name()))
    hourlyCountsSeries = startedJourneys.groupby([startedJourneys["Start Date"].dt.hour.rename("Hour")]).size()/7
    #counterDailyJourneyStarts.sort_values(by="Day/Month")
    
    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0)                             
    hourlyCountsDF.columns = ["Hour", "Average Number of Journeys Started"]                           
    return hourlyCountsDF

In [13]:
def numGainOrLossByStationStart(startedJourneys):
    startedDayStationDF = startedJourneys.groupby(["Day/Month", "StartStation Name","StartStation Id"], as_index=False).size()
    
    return startedDayStationDF

In [14]:
def numGainOrLossByStationEnd(endedJourneys):
    endedDayStationDF = endedJourneys.groupby(["Day/Month", "EndStation Name", "EndStation Id"], as_index=False).size()
    
    return endedDayStationDF

In [15]:
def preprocessData(weekDF):
    """
    preprocessData removes invalid rides and formats the Dates to the datetime type
    :param weekDF: a dataframe containg a single week of TfL bike ride data 
    :return: the same dataframe with invalid rides removed and Start_Date and End_Date columns formatted
    """ 
    # Type Format the Time Columns in the DataFrame
    weekDF["Start Date"] = pd.to_datetime(weekDF["Start Date"], format='%d/%m/%Y %H:%M')
    weekDF["End Date"] = pd.to_datetime(weekDF["End Date"], format='%d/%m/%Y %H:%M')
    weekDF["Hire Time"]= (weekDF["End Date"] - weekDF["Start Date"]).dt.total_seconds() / 60
    
    # Remove any rides that start and end at the same station and are less than 3 minutes long
    counter = 0
    for index,row in weekDF.iterrows():
        if (row["Hire Time"]<3) & (row["StartStation Name"]==row["EndStation Name"]):
            weekDF = weekDF.drop(labels=index, axis=0)
            counter+=1
    weekDF= weekDF.reset_index().drop(labels="index", axis=1)        
    print("Data Cleaned. Rides Removed =",counter)
    return weekDF

janWeek1 = '195JourneyDataExtract01Jan2020-07Jan2020.csv'
JanW1DF = preprocessData(getDFfromCSV(janWeek1))
JanW1DF

Data Cleaned. Rides Removed = 463


Unnamed: 0,Rental Id,Duration,Bike Id,End Date,EndStation Id,EndStation Name,Start Date,StartStation Id,StartStation Name,Hire Time
0,94113398,960,6800,2020-01-07 14:07:00,541,"Green Park Station, Mayfair",2020-01-07 13:51:00,164,"Cleveland Gardens, Bayswater",16.0
1,94117049,600,8691,2020-01-07 17:06:00,48,"Godliman Street, St. Paul's",2020-01-07 16:56:00,323,"Clifton Street, Shoreditch",10.0
2,94110497,540,531,2020-01-07 11:01:00,654,"Ashmole Estate, Oval",2020-01-07 10:52:00,624,"Courland Grove, Wandsworth Road",9.0
3,94050449,600,8150,2020-01-04 12:27:00,685,"Osiers Road, Wandsworth",2020-01-04 12:17:00,774,"Hurlingham Park, Parsons Green",10.0
4,94019122,1140,15515,2020-01-02 16:31:00,676,"Hartington Road, Stockwell",2020-01-02 16:12:00,83,"Panton Street, West End",19.0
...,...,...,...,...,...,...,...,...,...,...
126333,94067445,360,14461,2020-01-05 13:25:00,183,"Riverlight North, Nine Elms",2020-01-05 13:19:00,800,"Sopwith Way, Battersea Park",6.0
126334,94105080,1260,16363,2020-01-07 08:50:00,129,"Golden Square, Soho",2020-01-07 08:29:00,804,"Good's Way, King's Cross",21.0
126335,94056128,780,3351,2020-01-04 16:11:00,376,"Millbank Tower, Pimlico",2020-01-04 15:58:00,800,"Sopwith Way, Battersea Park",13.0
126336,94115074,420,16831,2020-01-07 15:40:00,695,"Islington Green, Angel",2020-01-07 15:33:00,804,"Good's Way, King's Cross",7.0


In [16]:
getDFfromCSV(janWeek1)

Unnamed: 0,Rental Id,Duration,Bike Id,End Date,EndStation Id,EndStation Name,Start Date,StartStation Id,StartStation Name
0,94113398,960,6800,07/01/2020 14:07,541,"Green Park Station, Mayfair",07/01/2020 13:51,164,"Cleveland Gardens, Bayswater"
1,94117049,600,8691,07/01/2020 17:06,48,"Godliman Street, St. Paul's",07/01/2020 16:56,323,"Clifton Street, Shoreditch"
2,94110497,540,531,07/01/2020 11:01,654,"Ashmole Estate, Oval",07/01/2020 10:52,624,"Courland Grove, Wandsworth Road"
3,94050449,600,8150,04/01/2020 12:27,685,"Osiers Road, Wandsworth",04/01/2020 12:17,774,"Hurlingham Park, Parsons Green"
4,94019122,1140,15515,02/01/2020 16:31,676,"Hartington Road, Stockwell",02/01/2020 16:12,83,"Panton Street, West End"
...,...,...,...,...,...,...,...,...,...
126796,94067445,360,14461,05/01/2020 13:25,183,"Riverlight North, Nine Elms",05/01/2020 13:19,800,"Sopwith Way, Battersea Park"
126797,94105080,1260,16363,07/01/2020 08:50,129,"Golden Square, Soho",07/01/2020 08:29,804,"Good's Way, King's Cross"
126798,94056128,780,3351,04/01/2020 16:11,376,"Millbank Tower, Pimlico",04/01/2020 15:58,800,"Sopwith Way, Battersea Park"
126799,94115074,420,16831,07/01/2020 15:40,695,"Islington Green, Angel",07/01/2020 15:33,804,"Good's Way, King's Cross"


## Main

In [17]:
#Main
janWeek1 = '195JourneyDataExtract01Jan2020-07Jan2020.csv'
JanW1DF = preprocessData(getDFfromCSV(janWeek1))

juneWeek1 = "217JourneyDataExtract03Jun2020-09Jun2020.csv"
juneW1DF = getDFfromCSV(juneWeek1)

juneWeek2 = "218JourneyDataExtract10Jun2020-16Jun2020.csv"
juneW2DF = getDFfromCSV(juneWeek2)

febW1 = "200JourneyDataExtract05Feb2020-11Feb2020.csv"
febW1DF = preprocessData(getDFfromCSV(febW1))

startedJourneys = toWeeklyStartedDF(febW1DF)

dayDF = toGroupByDayDF(startedJourneys)
hoursInDayDF = toGroupByDayByHourDF(startedJourneys)
allHoursTotalDF = createAllHoursTotalDF(startedJourneys)
allHoursMeanDF = createAllHoursMeanDF(startedJourneys)
JanW1DF

Data Cleaned. Rides Removed = 463


FileNotFoundError: [Errno 2] No such file or directory: '218JourneyDataExtract10Jun2020-16Jun2020.csv'

# Calculate the average Change in Bike inventory for each station per week

In [None]:
# Using First Week of February which is a regular pre-covid week
# End Table stores the number of journeys ended at each station for each day of this week in the "size" column
endedJourneys = toWeeklyEndedDF(febW1DF)
endByDay = toGroupByDayDFEnd(endedJourneys)
endTable = numGainOrLossByStationEnd(endedJourneys)
endedJourneys

In [None]:
startTable = numGainOrLossByStationStart(startedJourneys)
startTable

In [None]:
# startTable stores the change in the inventory of bikes for each station each day for this week 
# size here is the number of journeys started at the station or number of bikes borrowed from the station
startTable["Change in number of bikes"] = endTable["size"] - startTable["size"]
startTable

In [None]:
#not useful
#numGainOrLossByStationStart(startedJourneys).plot(kind="bar")

# Find the change in Bike inventory for each station after the AM and PM Peaks

## AM Peak pre-covid = 8AM, use data for 9AM
## PM Peak pre-covid = 5PM, use data for 7PM 

In [None]:
## For one days data
## Find journeys started for each station for each hour
## Sum Journeys started until 9am for each station, 
def journeysEndedBeforeTime(weekDF, timeUntil):
    """
    journeysEndedBeforeTime Sums the journeys ended until a given time, for each station
    :param weekDF: a dataframe containg a single week of TfL bike ride data 
    :param timeUntil: the hour inclusive you want the sum of the number of journeys ended until
    :return: a DF giving you the number of journeys ended at each station until the time
                [StationID, End Station Name, Total Number of journeys Ended]
    """ 
    ## Find journeys ended for each station for each hour
    endedJourneys = toWeeklyEndedDF(weekDF)
    giveDFDateDayNameColumns(endedJourneys, "End Date")

    endedJourneysWed = endedJourneys.loc[endedJourneys["Day Name"].isin(["'Wednesday'"])]

    hourlyCountsSeries = endedJourneysWed.groupby(["EndStation Name", "EndStation Id",endedJourneysWed["End Date"].dt.hour.rename("Hour")]).size()

    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0).reset_index()                             
    hourlyCountsDF.columns = [ "EndStation Id", "Hour", "EndStation Name","Total Number of Journeys Ended"] 

    ## Sum journeys ended for each station until 9AM and then do ended-started

    endedAt9AMEachStationDF = pd.DataFrame([], columns=["Station Name", "Journeys Ended by 9AM"])
    
    listUniqueStationNames = pd.unique(hourlyCountsDF['EndStation Name'])
    stationNamesDF = pd.DataFrame(listUniqueStationNames)
    
    before9AMDF = hourlyCountsDF.loc[(hourlyCountsDF['Hour'] <= timeUntil)]
    before9AMDF= before9AMDF.groupby(["EndStation Name","EndStation Id"]).sum()

    before9AMDF = before9AMDF.reset_index(level=0).reset_index()                             
    before9AMDF= before9AMDF.drop(["Hour"], axis="columns")
    return before9AMDF
endedBefore9AMDF = journeysEndedBeforeTime(febW1DF, 9)
endedBefore9AMDF

In [None]:
def journeysStartedBeforeTime(weekDF, timeUntil):
    """
    journeysStartedBeforeTime Sums the journeys started until a given time, for each station
    :param weekDF: a dataframe containg a single week of TfL bike ride data 
    :param timeUntil: the hour inclusive you want the sum of the number of journeys started until
    :return: a DF giving you the number of journeys started at each station until the time
                [StationID, Start Station Name, Total Number of journeys started]
    """ 
    ## Find journeys ended for each station for each hour
    startedJourneys = toWeeklyStartedDF(weekDF)
    giveDFDateDayNameColumns(startedJourneys, "Start Date")

    startedJourneysWed = startedJourneys.loc[startedJourneys["Day Name"].isin(["'Wednesday'"])]

    hourlyCountsSeries = startedJourneysWed.groupby(
        ["StartStation Name", "StartStation Id",startedJourneysWed["Start Date"].dt.hour.rename("Hour")]).size()

    hourlyCountsDF = pd.DataFrame(hourlyCountsSeries) 
    hourlyCountsDF = hourlyCountsDF.reset_index(level=0).reset_index()                             
    hourlyCountsDF.columns = [ "StartStation Id", "Hour", "StartStation Name","Total Number of Journeys Started"] 

    ## Sum journeys ended for each station until 9AM and then do ended-started

    startedAt9AMEachStationDF = pd.DataFrame([], columns=["Station Name", "Journeys Started by 9AM"])
    listUniqueStationNames = pd.unique(hourlyCountsDF['StartStation Name'])
    stationNamesDF = pd.DataFrame(listUniqueStationNames)

    before9AMDF = hourlyCountsDF.loc[(hourlyCountsDF['Hour'] <= timeUntil)]
    before9AMDF= before9AMDF.groupby(["StartStation Name","StartStation Id"]).sum()

    before9AMDF = before9AMDF.reset_index(level=0).reset_index()                             
    before9AMDF= before9AMDF.drop(["Hour"], axis="columns")
    return before9AMDF

startedBefore9AMDF = journeysStartedBeforeTime(febW1DF, 9)
startedBefore9AMDF

In [None]:

#joined9AMDF = pd.concat([startedBefore9AMDF, endedBefore9AMDF], axis=1)
def mergeEndedAndStartDFByTime(endedBefore9AMDF,startedBefore9AMDF, timeString ):
    joined9AMDF = pd.merge(startedBefore9AMDF, endedBefore9AMDF, how='outer', left_on = ['StartStation Name','StartStation Id'], right_on = ['EndStation Name','EndStation Id'])  
    joined9AMDF['StartStation Name'] = joined9AMDF['StartStation Name'].fillna(joined9AMDF['EndStation Name'])
    joined9AMDF['StartStation Id'] = joined9AMDF['StartStation Id'].fillna(joined9AMDF['EndStation Id'])
    
    #joined9AMDF= joined9AMDF.drop(['EndStation Name'], axis="columns")
    
    #joined9AMDF = pd.merge(startedBefore9AMDF, endedBefore9AMDF, how='outer', left_on = 'StartStation Id', right_on = 'EndStation Id')  
    
    
    joined9AMDF= joined9AMDF.drop(['EndStation Name', "EndStation Id"], axis="columns")
    
    joined9AMDF= joined9AMDF.fillna(0)
    joined9AMDF[("Change In Inventory by "+timeString)] = joined9AMDF["Total Number of Journeys Ended"] - joined9AMDF["Total Number of Journeys Started"]
    
    return joined9AMDF

def changeInBikeInventoryForTime(weekDF, timeInt, timeString):
    endedBefore9AMDF = journeysEndedBeforeTime(weekDF, timeInt)
    startedBefore9AMDF = journeysStartedBeforeTime(weekDF, timeInt)
    endAndStartedJourneys9AMDF = mergeEndedAndStartDFByTime(endedBefore9AMDF, startedBefore9AMDF, timeString)
    return endAndStartedJourneys9AMDF
endAndStartedJourneys9AMDF = changeInBikeInventoryForTime(febW1DF, 9, "9AM")
endAndStartedJourneys9AMDF

In [None]:

endAndStartedJourneys7PMDF = changeInBikeInventoryForTime(febW1DF, 19, "7PM")
endAndStartedJourneys7PMDF

In [None]:
#End of the day 
endAndStartedJourneys11PMDF = changeInBikeInventoryForTime(febW1DF, 23, "11PM")
endAndStartedJourneys11PMJUNEDF = changeInBikeInventoryForTime(juneW1DF, 23, "11PM")
endAndStartedJourneys11PMJUNEDF

In [None]:
#Min Activity after AM Peak
endAndStartedJourneys10AMDF = changeInBikeInventoryForTime(febW1DF, 10, "10AM")
endAndStartedJourneys10AMDF

# Total Activity per day for all stations in a week

In [None]:
totalActivityPerDayAllStationDF = pd.concat([dayDF,endByDay]).groupby(['Day/Month']).sum().reset_index()
totalActivityPerDayAllStationDF

In [None]:
totalActivityPerDayAllStationDF = totalActivityPerDayAllStationDF.sort_values(by = ['Day/Month'])
totalActivityPerDayAllStationDF

In [None]:
#totalActivityPerDayAllStationDF["Day/Month"] = pd.to_datetime(totalActivityPerDayAllStationDF["Day/Month"], format= '%d/%m/%Y')
#endedJourneys["End Date"] = pd.to_datetime(endedJourneys["End Date"], format='%d/%m/%Y %H:%M')


#totalActivityPerDayAllStationDF["Date"] = totalActivityPerDayAllStationDF["Day/Month"].apply(lambda x: "%d/%d" % (x.day, x.month))
totalActivityPerDayAllStationDF

# Graphs and Plots

In [None]:
allHoursTotalDFTemp = allHoursTotalDF.drop("Hour", axis = 'columns')
allHoursTotalDF

In [None]:
# The value for 0 is the number of rides between 00:00 and 01:00, the value for 1 is the number of rides between 
# 01:00 and 02:00 and so on
xaxisLabels =[]
for i in range(0,24):
    xaxisLabels.append(i)
allHoursTotalDFTemp.plot(kind="line", figsize=(15,10), 
                         xlabel= "Time in 24hr",
                         ylabel="Total Number of Journeys Started", 
                         xticks = xaxisLabels,
                         title = "Average no. journeys started every hour Monday to Sunday",
                         grid=True)

In [None]:
startedJourneysFeb = toWeeklyStartedDF(febW1DF)
startedJourneysFeb = giveDFDateDayNameColumns(startedJourneysFeb, "Start Date")
startedJourneysFeb.head()

## Average of Journeys started each hour Mon-Fri

In [None]:
monToFriHourly = createAllHoursMonToFriMeanDF(startedJourneysFeb)

startedJourneysJune1 = toWeeklyStartedDF(juneW1DF)
startedJourneysJune1 = giveDFDateDayNameColumns(startedJourneysJune1, "Start Date")
janFebJuneMonToFriTotalHourlyDF = monToFriHourly.drop("Total Number of Journeys Started", axis="columns")

janFebJuneMonToFriTotalHourlyDF["Pre-Covid Week (First Week of Feb)"] = createAllHoursMonToFriMeanDF(startedJourneysFeb)["Total Number of Journeys Started"]
janFebJuneMonToFriTotalHourlyDF["Lockdown Week (First Week of June)"] = createAllHoursMonToFriMeanDF(startedJourneysJune1)["Total Number of Journeys Started"]
janFebJuneMonToFriTotalHourlyDF

In [None]:
# The value for 0 is the number of rides between 00:00 and 01:00, the value for 1 is the number of rides between 
# 01:00 and 02:00 and so on
xaxisLabels =[]
for i in range(0,24):
    xaxisLabels.append(i)
janFebJuneMonToFriTotalHourlyDF = janFebJuneMonToFriTotalHourlyDF.drop("Hour", axis="columns")
janFebJuneMonToFriTotalHourlyDF.plot(kind="line", figsize=(15,10), 
                         xlabel= "Time in 24hr",
                         ylabel="Total Number of Journeys Started", 
                         xticks = xaxisLabels,
                         title = "Average no. of rides started for each hour across all stations Monday to Friday",
                         grid=True)

## Average journeys started each hour Sat and Sunday average

In [None]:
createAllHoursSatSunTotalDF

monToFriHourly = createAllHoursSatSunTotalDF(startedJourneysFeb)

startedJourneysJune1 = toWeeklyStartedDF(juneW1DF)
startedJourneysJune1 = giveDFDateDayNameColumns(startedJourneysJune1, "Start Date")
janFebJuneSatSunTotalHourlyDF = monToFriHourly.drop("Total Number of Journeys Started", axis="columns")

janFebJuneSatSunTotalHourlyDF["Pre-Covid Week (First Week of Feb)"] = createAllHoursSatSunTotalDF(startedJourneysFeb)["Total Number of Journeys Started"]
janFebJuneSatSunTotalHourlyDF["Lockdown Week (First Week of June)"] = createAllHoursSatSunTotalDF(startedJourneysJune1)["Total Number of Journeys Started"]
janFebJuneSatSunTotalHourlyDF

In [None]:
janFebJuneSatSunTotalHourlyDF = janFebJuneSatSunTotalHourlyDF.drop("Hour", axis="columns")
janFebJuneSatSunTotalHourlyDF.plot(kind="line", figsize=(15,10), 
                         xlabel= "Time in 24hr",
                         ylabel="Total Number of Journeys Started", 
                         xticks = xaxisLabels,
                         title = "Average no. of rides started for each hour across all stations for the weekend",
                         grid=True)

In [None]:
startedJourneys = toWeeklyStartedDF(febW1DF)
allHoursTotalDF = createAllHoursTotalDF(startedJourneys)


In [None]:
allHoursMeanDF.plot(kind="line", figsize=(15,10),xlabel= "Time in 24hr", ylabel="Mean Number of Journeys Started")

In [None]:
hoursInDayDF

In [None]:
import matplotlib.pyplot as plt
print("imported")

In [None]:
def plotHoursInDayForWeek(weekDF, graphTitle):
    startedJourneys = toWeeklyStartedDF(weekDF)
    hoursInDayDF = toGroupByDayByHourDF(startedJourneys)
    xaxisLabels =[]
    days=["Mon","Tue","Wed","Thur","Fri","Sat","Sun",""]
    xaxisLabels2=[]
    for i in range(0,8):
        #xaxisLabels2.append(days[i])
        xaxisLabels.append(i*24)
    hoursInDayDFTemp = hoursInDayDF.drop(["Day of Week", "Hour"], axis='columns')    
    ax= hoursInDayDFTemp.plot(kind="line", figsize=(15,10),
                      xticks=xaxisLabels,
                      xlabel= "Hour of the Week", 
                      ylabel="Total Number of Journeys Started",
                      title=graphTitle,
                      grid = True)
    ax.set_xticklabels(days)
    plt.show()   
    return hoursInDayDFTemp
plotHoursInDayForWeek(febW1DF, "Number of journeys started for each hour of the first week of February 2020")   
plotHoursInDayForWeek(juneW1DF, "Number of journeys started for each hour of the first week of June 2020")   
plotHoursInDayForWeek(JanW1DF, "Number of journeys started for each hour of the first week of January 2020")   

In [None]:
dayDF.plot(kind="line", figsize=(15,10), ylabel="Total Number of Journeys Started")

In [None]:
dayDF

## GeoPandas

In [None]:
import networkx as nx

In [None]:
#import geopandas as gpd
#from geopandas import GeoDataFrame
#from shapely.geometry import Point


print("imported")