In [None]:
# This R environment comes with all of CRAN preinstalled, as well as many other helpful packages
# The environment is defined by the kaggle/rstats docker image: https://github.com/kaggle/docker-rstats
# For example, here's several helpful packages to load in 

library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

system("ls ../input")
library(tidyverse)
library(stringr)
library(lubridate)
library(leaflet)
library(DT)
library(forecast)


rm(list=ls())

fillColor = "#FFA07A"
fillColor2 = "#F1C40F"

#NYC311 = read_csv("../input/cluster1/0.csv")
NYC311 = read_csv("../input/cleaned2/cleaned_data.csv")
# Any results you write to the current directory are saved as output.

In [None]:
NYC311 = NYC311 %>%
  rename(ComplaintType = `complaint_type`) %>%
  rename(CreatedDate = `created_date`)

In [None]:
colnames(NYC311)

In [None]:
NYC311 %>%
  group_by(borough) %>%
  summarise(Count = n()) %>%
  ungroup() %>%
  mutate(borough = reorder(borough,Count)) %>%
  arrange(desc(Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = borough,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = borough, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Borough', 
       y = 'Count', 
       title = 'Borough and Count') +
  coord_flip() + 
  theme_bw()

In [None]:
max(NYC311$CreatedDate,na.rm = TRUE)
min(NYC311$CreatedDate,na.rm = TRUE)

In [None]:
NYC311 %>%
  mutate(month = month.abb[month(mdy_hms(CreatedDate))]) %>%
  filter(!is.na(month)) %>%
  group_by(month) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(month = reorder(month,Count)) %>%
  
    ggplot(aes(x = month,y = Count)) +
    geom_bar(stat='identity',colour="white", fill = fillColor2) +
    geom_text(aes(x = month, y = 1, label = paste0("(",Count,")",sep="")),
              hjust=0, vjust=.5, size = 4, colour = 'black',
              fontface = 'bold') +
    labs(x = 'Month', 
         y = 'Count', 
         title = 'Month and Count') +
    coord_flip() + 
    theme_bw()

In [None]:
NYC311$hour = hour(NYC311$CreatedDate)

GetTop10BusyHours = function(NYC311)
{
  NYC311 %>%
  filter(!is.na(hour)) %>%
  group_by(hour) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(hour = reorder(hour,Count)) %>%
  head(10) %>%
    
    ggplot(aes(x = hour,y = Count)) +
    geom_bar(stat='identity',colour="white", fill = fillColor2) +
    geom_text(aes(x = hour, y = 1, label = paste0("(",Count,")",sep="")),
              hjust=0, vjust=.5, size = 4, colour = 'black',
              fontface = 'bold') +
    labs(x = 'hour', 
         y = 'Count', 
         title = 'hour and Count') +
    coord_flip() + 
    theme_bw()
  
}

In [None]:
NYC311 %>%
  mutate(year = year(mdy_hms(CreatedDate))) %>%
  mutate(month = month(mdy_hms(CreatedDate))) %>%
  filter(!is.na(year)) %>%
  filter(!is.na(month)) %>%
  group_by(year,month) %>%
  summarise(Count = n()) %>%
  arrange(year,month) %>%
  mutate(YearMonth = make_date(year=year,month=month) ) %>%
  

  ggplot(aes(x=YearMonth,y=Count,group = 1)) +
  geom_line(size=1, color="red")+
  geom_point(size=3, color="red") +
  labs(x = 'Time', y = 'Count',title = 'Trend of 311 Calls') +
  theme_bw() 

In [None]:
NYC311TrendData = NYC311 %>%
  mutate(year = year(mdy_hms(CreatedDate))) %>%
  mutate(month = month(mdy_hms(CreatedDate))) %>%
  filter(!is.na(year)) %>%
  filter(!is.na(month)) %>%
  group_by(year,month) %>%
  summarise(Count = n()) %>%
  arrange(year,month)

tsNYC311TrendData = ts(NYC311TrendData)

datatable((tsNYC311TrendData), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

In [None]:
fit <- auto.arima(tsNYC311TrendData[1:40,3])

preds = forecast(fit, h = 5)

preds %>% autoplot(include=188) +theme_bw()

In [None]:
NYC311TrendData = NYC311 %>%
  mutate(year = year(mdy_hms(CreatedDate))) %>%
  mutate(WeekNo = week(mdy_hms(CreatedDate))) %>%
  
  filter(!is.na(year)) %>%
  filter(!is.na(WeekNo)) %>%
  group_by(year,WeekNo) %>%
  summarise(Count = n()) %>%
  arrange(year,WeekNo)

tsNYC311TrendData = ts(NYC311TrendData)

datatable((tsNYC311TrendData), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

In [None]:
fit <- auto.arima(tsNYC311TrendData[1:188,3])

preds = forecast(fit, h = 5)

preds %>% autoplot(include=188) +theme_bw()

In [None]:
predictions = as.numeric(preds$mean)

cat("\n","The predictions are  ",predictions)

In [None]:
error = sqrt( mean( (tsNYC311TrendData[41:45,3] - predictions)^2))

cat("\n","The RMSE is ", error)

In [None]:
# NYC311TrendData = NYC311 %>%
#   mutate(year = year(mdy_hms(CreatedDate))) %>%
#   mutate(WeekNo = week(mdy_hms(CreatedDate))) %>%
  
#   filter(!is.na(year)) %>%
#   filter(!is.na(WeekNo)) %>%
#   group_by(year,WeekNo) %>%
#   summarise(Count = n()) %>%
#   arrange(year,WeekNo)
#   mutate(YearMonth = make_date(year=year,WeeK=) )

# #tsAustin311TrendData = ts(Austin311TrendData)

# # datatable((tsAustin311TrendData), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
#   ggplot(aes(x=YearMonth,y=Count,group = 1)) +
#   geom_line(size=1, color="red")+
#   geom_point(size=3, color="red") +
#   labs(x = 'Time', y = 'Count',title = 'Trend of 311 Calls') +
#   theme_bw() 