## Part1: For this project, you should look at requests with Request Type of "COVID-19" and Subrequest Type of "COVID-19 Violations". 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests

%matplotlib inline

In [2]:
# to see all columns
pd.set_option('display.max_columns', None)

### Using the csv file:

In [None]:
#hubNash = pd.read_csv("../data/hubNashville__311__Service_Requests.csv")

In [None]:
#covid19 = hubNash[(hubNash["Request Type"] == "COVID-19") & hubNash["Subrequest Type"].str.contains("COVID-19 Violations")]

### Using Json files:

In [3]:
#getting data from API, case_requests = COVID-19 and setting limit to 25k

url = 'https://data.nashville.gov/resource/7qhx-rexh.json?case_request=COVID-19&$limit=25000'

response = requests.get(url)

In [4]:
#reading the json text to dataframe

hubNashville = pd.read_json(response.text)

In [6]:
#subsetting for case_request and case_subrequest

covid19 = hubNashville[hubNashville["case_subrequest"] == "COVID-19 Violations"]

In [7]:
covid19.shape

(11632, 19)

In [9]:
covid19.head()

Unnamed: 0,additional_subrequest,case_number,case_origin,case_request,case_subrequest,closed_when_created,contact_type,date_time_closed,date_time_opened,incident_address,incident_city,incident_council_district,incident_zip_code,latitude,longitude,mapped_location,parent_case,state_issue,status
1,COVID-19 Violations,495545,hubNashville Community,COVID-19,COVID-19 Violations,False,,,2020-11-04T21:27:30.000,2612 Lebanon Pike,NASHVILLE,15.0,37214.0,36.170223,-86.673428,"{'latitude': '36.1702228', 'longitude': '-86.6...",,False,New
2,COVID-19 Violations,495548,hubNashville Community,COVID-19,COVID-19 Violations,False,,,2020-11-04T21:31:04.000,130 W Trinity Ln,NASHVILLE,5.0,37207.0,36.205655,-86.772439,"{'latitude': '36.2056553', 'longitude': '-86.7...",,False,New
3,COVID-19 Violations,494661,hubNashville Community,COVID-19,COVID-19 Violations,False,,2020-11-04T15:00:35.000,2020-11-03T20:19:45.000,3441 Lebanon Pike,HERMITAGE,14.0,37076.0,36.189819,-86.624601,"{'latitude': '36.18981867780618', 'longitude':...",,False,Closed
5,COVID-19 Violations,494225,hubNashville Community,COVID-19,COVID-19 Violations,False,,2020-11-04T14:51:07.000,2020-11-03T15:32:21.000,440 McMurray Dr,NASHVILLE,27.0,37211.0,36.0619,-86.718059,"{'latitude': '36.0618999', 'longitude': '-86.7...",,False,Closed
7,COVID-19 Violations,495269,hubNashville Community,COVID-19,COVID-19 Violations,False,,,2020-11-04T17:48:22.000,2381 Murfreesboro Pike,NASHVILLE,28.0,37217.0,36.082906,-86.648202,"{'latitude': '36.082905861572854', 'longitude'...",,False,New


In [11]:
covid19["case_request"].unique()

array(['COVID-19'], dtype=object)

In [None]:
covid19["Closed When Created"].unique()

In [None]:
covid19 = covid19.drop(columns = ["additional_subrequest", "case_request","case_subrequest", "", "", ""], axis = 1)

In [None]:
covid19 = covid19.rename(columns ={"request #":"request_number", "request origin":"request_origin","contact type":"contact_type", "closed when created": "closed_when_created", "council district": "council_district"})

### Convert the time

In [None]:
# Converting the original date column from string to datetime
covid19["date_opened"] = covid19["date / time opened"].astype("datetime64")
covid19["date_closed"] = covid19["date / time closed"].astype("datetime64")
covid19.info()

In [None]:
covid19['month_opened'] = covid19['date_opened'].dt.month
covid19['day_opened'] = covid19['date_opened'].dt.day
covid19['time_opened'] = covid19['date_opened'].dt.time

covid19.head()

In [None]:
covid19["month_day_opened"] = covid19['date_opened'].dt.strftime('%m-%d') # from Taylor

In [None]:
covid19 = covid19.drop(columns = ["date / time opened", "date / time closed"], axis = 1)

In [None]:
# NB: there is missing values and this is why I can't convert the closed dates in integer (can't if Nan values) but I shouldn't need it for the project
#covid19[["month_closed", "day_closed"]] = covid19[["month_closed", "day_closed"]].apply(pd.to_numeric)
#covid19.info()

In [None]:
covid19.shape

In [None]:
covid19['month_day_opened'].hist(bins = 25);

In [None]:
fontsize = 12

plt.figure(figsize=(25,5))
sns.barplot(data= covid19, x = "state", y = "per_capita")
plt.xticks(rotation = 45)
plt.xticks(fontsize = fontsize)
plt.xlabel('State', fontsize = fontsize)
plt.yticks(fontsize = fontsize)
plt.ylabel('Per capita', fontsize = fontsize)
plt.title('Distribution of candy consumption per state', fontsize = 14, fontweight = 'bold');

### Convert the location into a geodata point

In [None]:
#import geopandas as gpd
#covid19 = gpd.GeoDataFrame(
#            covid19, geometry=gpd.points_from_xy(covid19.longitude, covid19.latitude))