/
VariableCreation.R
94 lines (79 loc) 路 2.5 KB
/
VariableCreation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#' ---
#' title: "Variable Creation"
#' ---
#'
#' Functions to assist with typecasting variables from raw XML extraction
#' and creation of new calculated fields prior to upload to a DBMS
#'
#' [Source code](https://github.com/ruaridhw/london-tube/blob/master/2_analysis/r/VariableCreation.R)
library(data.table)
library(tibble)
library(magrittr)
library(lubridate)
#' This function relies on sp, rgdal, tibble and magrittr
# ---- modify_StopPoints
library(rgdal)
modify_StopPoints <- function(df){
# Define proj4 coordinate systems
bng <- "+init=epsg:27700"
latlon <- "+proj=longlat +datum=WGS84"
# Create spatial object using BNG coordinates
coord.bng <-
with(df,
SpatialPointsDataFrame(
cbind(Place_Location_Easting, Place_Location_Northing) %>%
as.integer() %>% matrix(ncol = 2),
data = tibble(AtcoCode,
Descriptor_CommonName,
Place_NptgLocalityRef),
proj4string = CRS(bng)))
# Cast as latlon coordinates
coord.latlon <- spTransform(coord.bng, CRS(latlon))
# Replace dataframe
df <- as_tibble(coord.latlon)
names(df) <- c("AtcoCode","CommonName","NptgLocalityRef","Longitude","Latitude")
df
}
# ---- modify_VehicleJourneys
modify_VehicleJourneys <- function(df){
within(df, {
DepartureTime %<>% as.POSIXct(format = "%T")
# Extract number of minutes since midnight from DepartureTime
DepartureMins <- DepartureTime %>%
{lubridate::minute(.) + 60 * lubridate::hour(.)}
})
}
# ---- modify_JourneyPatternTimingLinks
modify_JourneyPatternTimingLinks <- function(df){
within(df, {
From_SequenceNumber %<>% as.integer
To_SequenceNumber %<>% as.integer
# Extract timings from RunTime and WaitTime to create JourneyTime
RunTime %<>% substr(3, 3) %>% as.integer
WaitTime %<>% substr(3, 3) %>% as.integer
JourneyTime <- RunTime + ifelse(is.na(WaitTime), 0, WaitTime)
})
}
# ---- modify_RouteLinks
modify_RouteLinks <- function(df){
within(df, {
Distance %<>% as.integer
})
}
# ---- modify_Services
modify_Services <- function(df){
within(df, {
OpPeriod_StartDate %<>% as.Date
OpPeriod_EndDate %<>% as.Date
})
}
#' Convenience dispatch function
# ---- modify
modify_df <- function(tablename, df){
tables_to_modify <- c("StopPoints", "VehicleJourneys",
"JourneyPatternTimingLinks", "RouteLinks", "Services")
if (tablename %in% tables_to_modify) {
df %<>% list %>% do.call(what = paste0("modify_", tablename))
}
df
}