# Trip Data Generation

In this notebook we show to generate the trip data for maximum occupancy modeling and analysis. The initial step is to assign a zero to all the negative occupancies. Also, we set a maximum ocupancy value of 32. Then, we group by date and hour to obtain the maximum occupancy. Also, we average temperature and precipitation. 


In [1]:
library(tidyverse)
library(readr)

“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
carta = read_csv('data/jmartinez/Transit_Data/carta.csv', progress = F) 


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  .default = col_double(),
  scheduled_arrival_time = [34mcol_datetime(format = "")[39m,
  actual_arrival_time = [34mcol_datetime(format = "")[39m,
  direction_desc = [31mcol_character()[39m,
  service_period = [31mcol_character()[39m,
  date = [34mcol_date(format = "")[39m,
  scheduled_datetime = [34mcol_datetime(format = "")[39m,
  actual_arrival_datetime = [34mcol_datetime(format = "")[39m,
  trip_start_time = [34mcol_datetime(format = "")[39m,
  trip_date = [34mcol_date(format = "")[39m,
  service_kind = [31mcol_character()[39m
)
[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.


“1658648 parsing failures.
    row      col               expected actual                                    file
5820871 route_id no trailing characters    10A 'data/jmartinez/Transit_Data/carta.csv'
5820872 rou

In [3]:
carta$trip_id = as.character(carta$trip_id)
carta$stop_id = as.character(carta$stop_id)
carta$route_id = as.character(carta$route_id)
carta$direction_id = as.character(carta$direction_id)
carta$hour = factor(carta$hour)
carta$month = factor(carta$month)

# Route 1

## Pre-lockdown

### Direction 0

In [5]:
Pre_Trips_r1_d0 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '1', direction_id == '0') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r1_d0 <- unique(Pre_Trips_r1_d0)

Pre_Trips_r1_d0$trip_id = factor(Pre_Trips_r1_d0$trip_id)
Pre_Trips_r1_d0$hour = factor(Pre_Trips_r1_d0$hour)

Pre_Trips_r1_d0$mean_temp = (Pre_Trips_r1_d0$mean_temp - min(Pre_Trips_r1_d0$mean_temp))/(max(Pre_Trips_r1_d0$mean_temp) - min(Pre_Trips_r1_d0$mean_temp))
Pre_Trips_r1_d0$mean_precip = (Pre_Trips_r1_d0$mean_precip - min(Pre_Trips_r1_d0$mean_precip))/(max(Pre_Trips_r1_d0$mean_precip) - min(Pre_Trips_r1_d0$mean_precip))

head(Pre_Trips_r1_d0)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,132994,4,1,0.3288005,0
2019-01-02,132994,5,2,0.3283889,0
2019-01-02,132997,11,7,0.3535107,0
2019-01-02,132998,5,3,0.3283889,0
2019-01-02,133002,6,3,0.2918481,0
2019-01-02,133002,7,3,0.294022,0


In [None]:
#write.csv(Pre_Trips_r1_d0, 'data/jmartinez/Max_Occupancy_Trips/data/Pre_Trips_r1_d0.csv', row.names = F)

### Direction 1

In [6]:
Pre_Trips_r1_d1 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '1', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r1_d1 <- unique(Pre_Trips_r1_d1)

Pre_Trips_r1_d1$trip_id = factor(Pre_Trips_r1_d1$trip_id)
Pre_Trips_r1_d1$hour = factor(Pre_Trips_r1_d1$hour)

Pre_Trips_r1_d1$mean_temp = (Pre_Trips_r1_d1$mean_temp - min(Pre_Trips_r1_d1$mean_temp))/(max(Pre_Trips_r1_d1$mean_temp) - min(Pre_Trips_r1_d1$mean_temp))
Pre_Trips_r1_d1$mean_precip = (Pre_Trips_r1_d1$mean_precip - min(Pre_Trips_r1_d1$mean_precip))/(max(Pre_Trips_r1_d1$mean_precip) - min(Pre_Trips_r1_d1$mean_precip))

head(Pre_Trips_r1_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,133039,5,17,0.3279331,0
2019-01-02,133041,6,14,0.313805,0
2019-01-02,133045,7,11,0.3088654,0
2019-01-02,133045,8,10,0.3164356,0
2019-01-02,133046,8,4,0.3209698,0
2019-01-02,133048,9,6,0.3502474,0


In [None]:
#write.csv(Pre_Trips_r1_d0, 'data/jmartinez/Max_Occupancy_Trips/data/Pre_Trips_r1_d1.csv', row.names = F)

## Post-lockdown

### Direction 0

In [7]:
Post_Trips_r1_d0 <- carta %>%
  filter(trip_date >= '2020-03-05', route_id == '1', direction_id == '0') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r1_d0 <- unique(Post_Trips_r1_d0)

Post_Trips_r1_d0$trip_id = factor(Post_Trips_r1_d0$trip_id)
Post_Trips_r1_d0$hour = factor(Post_Trips_r1_d0$hour)

Post_Trips_r1_d0$mean_temp = (Post_Trips_r1_d0$mean_temp - min(Post_Trips_r1_d0$mean_temp))/(max(Post_Trips_r1_d0$mean_temp) - min(Post_Trips_r1_d0$mean_temp))
Post_Trips_r1_d0$mean_precip = (Post_Trips_r1_d0$mean_precip - min(Post_Trips_r1_d0$mean_precip))/(max(Post_Trips_r1_d0$mean_precip) - min(Post_Trips_r1_d0$mean_precip))

head(Post_Trips_r1_d0)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2020-03-05,138338,5,0,0.2916571,0
2020-03-05,138342,5,3,0.2916571,0
2020-03-05,138342,6,6,0.2898337,0
2020-03-05,138344,6,1,0.2761706,0
2020-03-05,138346,7,10,0.279635,0
2020-03-05,138347,7,2,0.2857127,0


In [None]:
#write.csv(Post_Trips_r1_d0, 'data/jmartinez/Max_Occupancy_Trips/data/Post_Trips_r1_d1.csv', row.names = F)

### Direction 1

In [8]:
Post_Trips_r1_d1 <- carta %>%
  filter(trip_date >= '2020-03-05', route_id == '1', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r1_d1 <- unique(Post_Trips_r1_d1)

Post_Trips_r1_d1$trip_id = factor(Post_Trips_r1_d1$trip_id)
Post_Trips_r1_d1$hour = factor(Post_Trips_r1_d1$hour)

Post_Trips_r1_d1$mean_temp = (Post_Trips_r1_d1$mean_temp - min(Post_Trips_r1_d1$mean_temp))/(max(Post_Trips_r1_d1$mean_temp) - min(Post_Trips_r1_d1$mean_temp))
Post_Trips_r1_d1$mean_precip = (Post_Trips_r1_d1$mean_precip - min(Post_Trips_r1_d1$mean_precip))/(max(Post_Trips_r1_d1$mean_precip) - min(Post_Trips_r1_d1$mean_precip))

head(Post_Trips_r1_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2020-03-05,138383,5,4,0.2906203,0
2020-03-05,138383,6,4,0.2873011,0
2020-03-05,138385,6,5,0.275792,0
2020-03-05,138385,7,5,0.2711697,0
2020-03-05,138387,7,5,0.2782163,0
2020-03-05,138389,8,6,0.2989043,0


In [None]:
#write.csv(Pre_Trips_r1_d0, 'data/jmartinez/Max_Occupancy_Trips/data/Post_Trips_r1_d1.csv', row.names = F)

# Route 4

## Pre-lockdown

### Direction 0

In [9]:
Pre_Trips_r4_d0 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '4', direction_id == '0') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r4_d0 <- unique(Pre_Trips_r4_d0)

Pre_Trips_r4_d0$trip_id = factor(Pre_Trips_r4_d0$trip_id)
Pre_Trips_r4_d0$hour = factor(Pre_Trips_r4_d0$hour)

Pre_Trips_r4_d0$mean_temp = (Pre_Trips_r4_d0$mean_temp - min(Pre_Trips_r4_d0$mean_temp))/(max(Pre_Trips_r4_d0$mean_temp) - min(Pre_Trips_r4_d0$mean_temp))
Pre_Trips_r4_d0$mean_precip = (Pre_Trips_r4_d0$mean_precip - min(Pre_Trips_r4_d0$mean_precip))/(max(Pre_Trips_r4_d0$mean_precip) - min(Pre_Trips_r4_d0$mean_precip))

head(Pre_Trips_r4_d0)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,134296,4,0,0.3292844,0
2019-01-02,134296,5,5,0.3287357,0
2019-01-02,134298,8,16,0.3243253,0
2019-01-02,134298,9,9,0.3316438,0
2019-01-02,134299,9,21,0.3423543,0
2019-01-02,134299,10,9,0.3538335,0


In [None]:
#write.csv(Pre_Trips_r4_d0, 'data/jmartinez/Max_Occupancy_Trips/data/Pre_Trips_r4_d0.csv', row.names = F)

### Direction 1

In [10]:
Pre_Trips_r4_d1 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '4', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r4_d1 <- unique(Pre_Trips_r4_d1)

Pre_Trips_r4_d1$trip_id = factor(Pre_Trips_r4_d1$trip_id)
Pre_Trips_r4_d1$hour = factor(Pre_Trips_r4_d1$hour)

Pre_Trips_r4_d1$mean_temp = (Pre_Trips_r4_d1$mean_temp - min(Pre_Trips_r4_d1$mean_temp))/(max(Pre_Trips_r4_d1$mean_temp) - min(Pre_Trips_r4_d1$mean_temp))
Pre_Trips_r4_d1$mean_precip = (Pre_Trips_r4_d1$mean_precip - min(Pre_Trips_r4_d1$mean_precip))/(max(Pre_Trips_r4_d1$mean_precip) - min(Pre_Trips_r4_d1$mean_precip))


head(Pre_Trips_r4_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,134364,6,11,0.3073824,0
2019-01-02,134365,9,17,0.3450123,0
2019-01-02,134365,10,17,0.3525516,0
2019-01-02,134366,9,8,0.3457845,0
2019-01-02,134366,10,13,0.3524518,0
2019-01-02,134368,7,24,0.3077242,0


In [None]:
#write.csv(Pre_Trips_r4_d1, 'data/jmartinez/Max_Occupancy_Trips/data/Pre_Trips_r4_d1.csv', row.names = F)

## Post-lockdown

### Direction 0

In [11]:
Post_Trips_r4_d0 <- carta %>%
  filter(trip_date >= '2020-03-05', route_id == '4', direction_id == '0') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r4_d0 <- unique(Post_Trips_r4_d0)

Post_Trips_r4_d0$trip_id = factor(Post_Trips_r4_d0$trip_id)
Post_Trips_r4_d0$hour = factor(Post_Trips_r4_d0$hour)

Post_Trips_r4_d0$mean_temp = (Post_Trips_r4_d0$mean_temp - min(Post_Trips_r4_d0$mean_temp))/(max(Post_Trips_r4_d0$mean_temp) - min(Post_Trips_r4_d0$mean_temp))
Post_Trips_r4_d0$mean_precip = (Post_Trips_r4_d0$mean_precip - min(Post_Trips_r4_d0$mean_precip))/(max(Post_Trips_r4_d0$mean_precip) - min(Post_Trips_r4_d0$mean_precip))

head(Post_Trips_r4_d0)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2020-03-05,139636,4,0,0.3121739,0
2020-03-05,139636,5,3,0.3121739,0
2020-03-05,139642,9,14,0.3476467,0
2020-03-05,139647,7,13,0.3005615,0
2020-03-05,139654,5,0,0.3121739,0
2020-03-05,139654,6,1,0.3067751,0


In [None]:
#write.csv(Post_Trips_r4_d0, 'Post_Trips_r4_d0.csv', row.names = F)

### Direction 1

In [13]:
Post_Trips_r4_d1 <- carta %>%
  filter(trip_date >= '2020-03-05', route_id == '4', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r4_d1 <- unique(Post_Trips_r4_d1)

Post_Trips_r4_d1$trip_id = factor(Post_Trips_r4_d1$trip_id)
Post_Trips_r4_d1$hour = factor(Post_Trips_r4_d1$hour)

Post_Trips_r4_d1$mean_temp = (Post_Trips_r4_d1$mean_temp - min(Post_Trips_r4_d1$mean_temp))/(max(Post_Trips_r4_d1$mean_temp) - min(Post_Trips_r4_d1$mean_temp))
Post_Trips_r4_d1$mean_precip = (Post_Trips_r4_d1$mean_precip - min(Post_Trips_r4_d1$mean_precip))/(max(Post_Trips_r4_d1$mean_precip) - min(Post_Trips_r4_d1$mean_precip))

head(Post_Trips_r4_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2020-03-05,139698,6,24,0.2631616,0
2020-03-05,139698,7,5,0.253516,0
2020-03-05,139704,9,8,0.3345024,0
2020-03-05,139704,10,11,0.3345024,0
2020-03-05,139708,6,0,0.2556453,0
2020-03-05,139708,7,10,0.2637675,0


In [None]:
#write.csv(Post_Trips_r4_d1, 'Post_Trips_r4_d1.csv', row.names = F)

# Route 9

## Pre-lockdown

### Direction 0

In [14]:
Pre_Trips_r9_d0 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '9', direction_id == '0') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r9_d0 = unique(Pre_Trips_r9_d0)

Pre_Trips_r9_d0$trip_id = factor(Pre_Trips_r9_d0$trip_id)
Pre_Trips_r9_d0$hour = factor(Pre_Trips_r9_d0$hour)

Pre_Trips_r9_d0$mean_temp = (Pre_Trips_r9_d0$mean_temp - min(Pre_Trips_r9_d0$mean_temp))/(max(Pre_Trips_r9_d0$mean_temp) - min(Pre_Trips_r9_d0$mean_temp))
Pre_Trips_r9_d0$mean_precip = (Pre_Trips_r9_d0$mean_precip - min(Pre_Trips_r9_d0$mean_precip))/(max(Pre_Trips_r9_d0$mean_precip) - min(Pre_Trips_r9_d0$mean_precip))

head(Pre_Trips_r9_d0)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,134767,4,0,0.3286791,0
2019-01-02,134767,5,0,0.3281117,0
2019-01-02,134768,5,1,0.3281117,0
2019-01-02,134769,7,7,0.3110232,0
2019-01-02,134769,8,4,0.3170004,0
2019-01-02,134770,8,3,0.3272536,0


In [None]:
write.csv(Pre_Trips_r9_d0, 'Pre_Trips_r9_d0.csv', row.names = F)

In [None]:
### Direction 1

In [15]:
Pre_Trips_r9_d1 <- carta %>%
  filter(trip_date < '2020-03-05', route_id == '9', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Pre_Trips_r9_d1 <- unique(Pre_Trips_r9_d1)

Pre_Trips_r9_d1$trip_id = factor(Pre_Trips_r9_d1$trip_id)
Pre_Trips_r9_d1$hour = factor(Pre_Trips_r9_d1$hour)

Pre_Trips_r9_d1$mean_temp = (Pre_Trips_r9_d1$mean_temp - min(Pre_Trips_r9_d1$mean_temp))/(max(Pre_Trips_r9_d1$mean_temp) - min(Pre_Trips_r9_d1$mean_temp))
Pre_Trips_r9_d1$mean_precip = (Pre_Trips_r9_d1$mean_precip - min(Pre_Trips_r9_d1$mean_precip))/(max(Pre_Trips_r9_d1$mean_precip) - min(Pre_Trips_r9_d1$mean_precip))

head(Pre_Trips_r9_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2019-01-02,134796,5,12,0.328462,0
2019-01-02,134796,6,5,0.328462,0
2019-01-02,134797,6,10,0.3208408,0
2019-01-02,134798,10,6,0.3535623,0
2019-01-02,134799,11,7,0.3535623,0
2019-01-02,134799,12,5,0.3535623,0


In [None]:
write.csv(Pre_Trips_r9_d1, 'Pre_Trips_r9_d1.csv', row.names = F)

## Post-lockdown

### Direction 0

In [None]:
Post_Trips_r9_d0 <- carta_r9 %>%
  filter(trip_date >= '2020-03-05', direction_id == 0) %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r9_d0 <- unique(Post_Trips_r9_d0)

Post_Trips_r9_d0$trip_id = factor(Post_Trips_r9_d0$trip_id)
Post_Trips_r9_d0$hour = factor(Post_Trips_r9_d0$hour)

Post_Trips_r9_d0$mean_temp = (Post_Trips_r9_d0$mean_temp - min(Post_Trips_r9_d0$mean_temp))/(max(Post_Trips_r9_d0$mean_temp) - min(Post_Trips_r9_d0$mean_temp))
Post_Trips_r9_d0$mean_precip = (Post_Trips_r9_d0$mean_precip - min(Post_Trips_r9_d0$mean_precip))/(max(Post_Trips_r9_d0$mean_precip) - min(Post_Trips_r9_d0$mean_precip))

head(Post_Trips_r9_d0)

In [None]:
#write.csv(Post_Trips_r9_d0, 'Post_Trips_r9_d0.csv', row.names = F)

### Direction 1

In [17]:
Post_Trips_r9_d1 <- carta %>%
  filter(trip_date >= '2020-03-05', route_id == '1', direction_id == '1') %>%
  group_by(trip_date, trip_id, hour) %>%
  mutate(max_occupancy = max(occupancy),
         mean_temp = mean(Estimated_Temp),
         mean_precip = mean(Estimated_Precip),
         max_occupancy = if_else(max_occupancy < 0, 0, max_occupancy),
         max_occupancy = if_else(max_occupancy > 32, 32, max_occupancy)) %>%
  summarise(max_occupancy, mean_temp, mean_precip)

Post_Trips_r9_d1 <- unique(Post_Trips_r9_d1)

Post_Trips_r9_d1$trip_id = factor(Post_Trips_r9_d1$trip_id)
Post_Trips_r9_d1$hour = factor(Post_Trips_r9_d1$hour)

Post_Trips_r9_d1$mean_temp = (Post_Trips_r9_d1$mean_temp - min(Post_Trips_r9_d1$mean_temp))/(max(Post_Trips_r9_d1$mean_temp) - min(Post_Trips_r9_d1$mean_temp))
Post_Trips_r9_d1$mean_precip = (Post_Trips_r9_d1$mean_precip - min(Post_Trips_r9_d1$mean_precip))/(max(Post_Trips_r9_d1$mean_precip) - min(Post_Trips_r9_d1$mean_precip))

head(Post_Trips_r9_d1)

`summarise()` has grouped output by 'trip_date', 'trip_id', 'hour'. You can override using the `.groups` argument.



trip_date,trip_id,hour,max_occupancy,mean_temp,mean_precip
<date>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
2020-03-05,138383,5,4,0.2906203,0
2020-03-05,138383,6,4,0.2873011,0
2020-03-05,138385,6,5,0.275792,0
2020-03-05,138385,7,5,0.2711697,0
2020-03-05,138387,7,5,0.2782163,0
2020-03-05,138389,8,6,0.2989043,0


In [None]:
#write.csv(Post_Trips_r9_d1, 'Post_Trips_r9_d1.csv', row.names = F)