In [None]:
#Background
#Customer Service Requests Analysis

#NYC 311's mission is to provide the public with quick and easy access to all New York City government services and information while offering the best customer service.
#Each day, NYC311 receives thousands of requests related to several hundred types of non-emergency services, including noise complaints, plumbing issues, and illegally parked cars.
#These requests are received by NYC311 and forwarded to the relevant agencies such as the police, buildings, or transportation. 
#The agency responds to the request, addresses it, and then closes it.

#Problem
#Perform a service request data analysis of New York City 311 calls. 
#You will focus on the data wrangling techniques to understand the pattern in the data and also visualize the major complaint types.
#Domain: Customer Service

#Task

#     Import a 311 NYC service request.
#     Read or convert the columns ‘Created Date’ and Closed Date’ to datetime datatype and create a new column ‘Request_Closing_Time’ as the time elapsed between request creation and request closing. (Hint: Explore the package/module datetime)
#     Provide major insights/patterns that you can offer in a visual format (graphs or tables); at least 4 major conclusions that you can come up with after generic data mining.
#     Order the complaint types based on the average ‘Request_Closing_Time’, grouping them for different locations.
#     Perform a statistical test for the following:

#     Whether the average response time across complaint types is similar or not (overall)
#     Are the type of complaint or service requested and location related?


from pandas import read_csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import datetime

%matplotlib inline

In [None]:
#read in data

dataset = read_csv("../../../ai_algos/data/311_Service_Requests_from_2010_to_Present.csv")
dataset.head()

In [None]:
#check data types of data
dataset.dtypes
#dataset.info(null_counts=True)

In [None]:
#shape before drop NaN values
print(dataset.shape)

#see the Nan values
dataset.isnull().sum()

In [None]:
#fill nan closed date with 
dataset["Closed Date"].replace(to_replace='NaN',value='12-31-99 0:00',inplace=True)

#drop columns with too many Nan or unneeded

dataset.drop(columns=['Incident Address', 'Street Name', 'Cross Street 1', 'Cross Street 2',
       'Intersection Street 1', 'Intersection Street 2','Resolution Description', 
     'Resolution Action Updated Date','Community Board','X Coordinate (State Plane)','School or Citywide Complaint',
    'Vehicle Type','Taxi Company Borough','Taxi Pick Up Location','Garage Lot Name','School Name', 'School Number', 
              'School Region', 'School Code','School Phone Number', 'School Address', 'School City', 'School State',
       'School Zip', 'School Not Found','Ferry Direction', 'Ferry Terminal Name','Unique Key','Bridge Highway Name',
       'Bridge Highway Direction', 'Road Ramp', 'Bridge Highway Segment'],axis=1,inplace=True)

dataset.shape

In [None]:
#convert from object type to date type
dataset["Created Date"] = pd.to_datetime(dataset["Created Date"])
dataset["Closed Date"] = pd.to_datetime(dataset["Closed Date"])

dataset["Request_closing_time"] = dataset["Closed Date"].sub(dataset["Created Date"],fill_value=0)

dataset.head(10)

In [None]:
#Complaint type breakdown to figure out majority of complaint types and top 10 complaints

dataset['Complaint Type'].value_counts().plot(kind='barh',alpha=0.6,figsize=(15,30))
plt.show()

In [None]:
#view the status of tickets

dataset['Status'].value_counts().plot(kind='barh',alpha=0.6,figsize=(7,7))
plt.show()

In [None]:
#group by datasets by complaintss to display plots against the city

dataset_complaints = dataset.groupby("Complaint Type")
dataset_driveway = dataset_complaints.get_group("Blocked Driveway")

print(dataset_driveway.shape)