## Data cleaning for TFL Data and Unix Timestamp Conversion

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import time
import datetime
import os

In [2]:
# check file names
files = os.listdir(os.curdir) 
files

['.ipynb_checkpoints',
 'Create Buffer and Convert to GeoJSON.R',
 'FOI-1215.csv',
 'London Tubestations Coordinates.csv',
 'Merge Tube Data and Create List of Unique Unix Timestamps .ipynb',
 'TFL Data Cleaning and Unix Timestamp Conversion.ipynb']

In [3]:
night_tube = pd.read_csv("FOI-1215.csv") 
# recorded entry and exit taps at each station

In [4]:
night_tube = night_tube.rename(columns = {"Station Name":"Station", "Number of Entries": "Entries", "Number of Exits": "Exits"})

In [5]:
night_tube.head()

Unnamed: 0,Date,Station,Time,Entries,Exits
0,19/8/2016,Bank,0:30:00,228,77
1,19/8/2016,Bank,1:00:00,189,117
2,19/8/2016,Bank,1:30:00,184,104
3,19/8/2016,Bank,2:00:00,137,74
4,19/8/2016,Bank,2:30:00,84,37


Subset night tube data based on relevant time - same as survey data

In [6]:
lst = ["2:00:00", "2:30:00", "3:00:00", "3:30:00", "4:00:00", "4:30:00"]

In [7]:
night_tube = night_tube[night_tube["Time"].isin(lst)] 
# remove time with data

In [8]:
night_tube = night_tube.replace(to_replace = "Fewer than 5", value = int(0)) 
# replace fewer than 5 with 0

In [9]:
cols = ["Entries", "Exits"] 
# make all values integers
night_tube[cols] = night_tube[cols].applymap(np.int64) 
# convert entry exit data to integer

In [10]:
night_tube = night_tube.reset_index()

In [11]:
night_tube = night_tube.drop("index", axis = 1)
night_tube.head(1)

Unnamed: 0,Date,Station,Time,Entries,Exits
0,19/8/2016,Bank,2:00:00,137,74


In [12]:
dt = night_tube.Date.str.cat(night_tube.Time, sep = " ")
dt.head()
dt.shape

(31960,)

Convert to unixtime

In [13]:
night_tube['unix'] = [time.mktime(datetime.datetime.strptime(x, "%d/%m/%Y %H:%M:%S").timetuple()) for x in dt]

In [14]:
night_tube.head(1)

Unnamed: 0,Date,Station,Time,Entries,Exits,unix
0,19/8/2016,Bank,2:00:00,137,74,1471568000.0


Save new dataframe as CSV file 

In [15]:
night_tube.to_csv("night_unix.csv")