Skip to content

mahimavedi/Predict-fare

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

3 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

# Loading the tidyverse
library(tidyverse)

# Reading in the taxi data
taxi <- read_csv("datasets/taxi.csv")
# Taking a look at the first couple of rows in taxi
head(taxi)
#cleaning data
taxi <- taxi %>% 
rename(lat = pickup_latitude) %>%
rename(long = pickup_longitude) %>%
filter(fare_amount > 0 | tip_amount > 0) %>%
mutate(total = log(fare_amount + tip_amount))
#reducing data to one area
taxi <- taxi  %>% 
    filter(between(lat, 40.70,40.83)) %>%
    filter(between(long, -74.025, -73.93))
    # Loading in ggmap and viridis for nice colors
library(ggmap)
library(viridis)
manhattan <- readRDS("datasets/manhattan.rds")
# Drawing a density map with the number of journey start locations
ggmap(manhattan, darken = 0.5) +
   scale_fill_viridis(option = 'plasma') + 
   geom_bin2d(data = taxi, aes(x = long, y = lat), bins = 60, alpha = 0.6) + 
   labs(x = 'longitude', y = 'latitude', fill = 'journeys') 
   # Loading in the tree package
library(tree)
# Fitting a tree to lat and long
fitted_tree <- tree(total ~ lat + long, data = taxi)

# Draw a diagram of the tree structure
plot(fitted_tree)
text(fitted_tree)
library(lubridate)
# Generate the three new time variables
taxi <- taxi %>% 
    mutate(hour = hour(pickup_datetime)) %>%
mutate(wday = wday(pickup_datetime, label = TRUE)) %>%
mutate(month = month(pickup_datetime, label = TRUE))
head(taxi)
fitted_tree <- tree(total ~ lat + long + hour + wday + month, data = taxi)
plot(fitted_tree)
text(fitted_tree)
summary(fitted_tree)
library(randomForest)
# Fitting a random forest
fitted_forest <- randomForest(total ~ lat + long + hour + wday + month, taxi, ntree = 80, sampsize = 10000)
taxi$pred_total <- fitted_forest$predicted
ggmap(manhattan, darken = 0.5) +
   scale_fill_viridis(option = 'plasma') + 
   stat_summary_2d(data = taxi, aes(x = long, y = lat, z = pred_total), fun = mean, bins = 60, alpha = 0.6) + 
   labs(x = 'longitude', y = 'latitude', fill = 'journeys')  
   mean_if_enough_data <- function(x) { 
    ifelse( length(x) >= 15, mean(x), NA) 
}
ggmap(manhattan, darken = 0.5) +
   scale_fill_viridis(option = 'plasma') + 
   stat_summary_2d(data = taxi, aes(x = long, y = lat, z = total), fun = mean_if_enough_data, bins = 60, alpha = 0.6) + 
   labs(x = 'longitude', y = 'latitude', fill = 'fare')  
   

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published