## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import requests

### Exploring Dataset

https://data.cityofnewyork.us/Public-Safety/Incidents-Responded-to-by-Fire-Companies/tm6d-hbzd

In [2]:
df=pd.read_csv("Incidents_Responded_to_by_Fire_Companies.csv")

FileNotFoundError: [Errno 2] File b'Incidents_Responded_to_by_Fire_Companies.csv' does not exist: b'Incidents_Responded_to_by_Fire_Companies.csv'

In [None]:
df.head()

### Extracting Relevant Data
For our analysis of places in New York with high fire calls we require to extract 
* **ZIP_CODE** *(Zip code of the place)*
* **TOTAL_INCIDENT_DURATION** *(Time period for which the fire was not extinguisghed)*
* **INCIDENT_TYPE_DESC** *(Type of incident)* 
* **HIGHEST_LEVEL_DESC**
* **ACTION_TAKEN1_DESC** *(Action taken by the authority)*

In [None]:
data=df.loc[:,["ZIP_CODE","TOTAL_INCIDENT_DURATION","INCIDENT_TYPE_DESC","HIGHEST_LEVEL_DESC","ACTION_TAKEN1_DESC"]]
data.head()

## Feature Engineering Dataset
This includes
* Dropping  incomplete data
* Normalisation
* Grouping the data by ZIP CODE

In [None]:
data = data.dropna(how='any',axis=0) 
data["ZIP_CODE"]= data["ZIP_CODE"].astype(int)
data["TOTAL_INCIDENT_DURATION"]= data["TOTAL_INCIDENT_DURATION"].astype(int)/max(data["TOTAL_INCIDENT_DURATION"].astype(int))

In [None]:
data11 = pd.get_dummies(data["INCIDENT_TYPE_DESC"], prefix="", prefix_sep="")
data22 = pd.get_dummies(data["HIGHEST_LEVEL_DESC"], prefix="", prefix_sep="")
data33 = pd.get_dummies(data["ACTION_TAKEN1_DESC"], prefix="", prefix_sep="")
dff=pd.concat([data11, data22,data33,data["ZIP_CODE"],data["TOTAL_INCIDENT_DURATION"]], axis=1)
dff.head()

In [None]:
dff1=dff.groupby(['ZIP_CODE']).sum()
dff1.head()

## Applying K Means for clustering the Data

In [None]:
from sklearn.cluster import KMeans
kclusters = 3

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dff1)
kmeans.labels_

**Each Category shows the proness to fire.**
* *Category-0 is most prone to fire cases*
* *Category-2 is least prone to fire cases*

In [None]:
dff1.insert(0, 'Cluster Labels', kmeans.labels_)
dff1.head()

## Using dataset to extract coordinates of places

In [None]:
zipcode=pd.read_csv("Zip_Codes_Government_Data.csv")
zipcode.rename({'ZIP': 'ZIP_CODE'}, axis='columns', inplace=True)
data_df = pd.merge(dff1,zipcode,left_index=True,right_on="ZIP_CODE")
data_df.head()

## Maping Data using Folium

In [3]:
!conda install folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
map_clusters = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(data_df['LAT'], data_df['LNG'], data_df['Cluster Labels']):
    label = folium.Popup( ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Collecting package metadata: ...working... done
Solving environment: ...working... failed



PackagesNotFoundError: The following packages are not available from current channels:

  - folium

Current channels:

  - https://repo.anaconda.com/pkgs/main/win-64
  - https://repo.anaconda.com/pkgs/main/noarch
  - https://repo.anaconda.com/pkgs/free/win-64
  - https://repo.anaconda.com/pkgs/free/noarch
  - https://repo.anaconda.com/pkgs/r/win-64
  - https://repo.anaconda.com/pkgs/r/noarch
  - https://repo.anaconda.com/pkgs/msys2/win-64
  - https://repo.anaconda.com/pkgs/msys2/noarch

To search for alternate channels that may provide the conda package you're
looking for, navigate to

    https://anaconda.org

and use the search bar at the top of the page.




ModuleNotFoundError: No module named 'folium'

## Client details for using FOURSQUARE API

In [None]:
CLIENT_ID = '2JHMQCZSBYWTAZBYCXN4HAXCJ4MHJ0CRJ3LT3KAJSHAXOGGW' # your Foursquare ID
CLIENT_SECRET = 'EHFK1W4Q5L53EDSSBVACC3ZTN3AQSR13B5HIEAX5WRNEAVAC' # your Foursquare Secret
VERSION = '20190703' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

## Formimg a Dataset of all the food joints in a given area

This is done to count the number of food joints and observe if more fire cases are related to it.

In [None]:
def getNearbyVenues(cluster,zipcode,latitudes, longitudes, radius=500):
    limit=200
    venues_list=[]
    for lat, lng in zip( latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&categoryId=4d4b7105d754a06374d81259&client_id={}&client_secret={}&v={}&ll={},{}&LIMIT={}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            limit,
            radius
            )  
        # make the GET request
        results = len(requests.get(url).json()["response"]["venues"])
        
        # return only relevant information for each nearby venue
        venues_list.append(results)
    data={
        "ZIPCODE":zipcode,
        "Number_of_Food_Joints":venues_list,
        "Cluster Labels":cluster
    }
    nearby_venues = pd.DataFrame(data)

    print("Finished!")
    
    return(nearby_venues)

In [None]:
nyc_venues = getNearbyVenues(cluster=data_df['Cluster Labels'],
                                zipcode=data_df['ZIP_CODE'],
                                latitudes=data_df['LAT'],
                                 longitudes=data_df['LNG'],
                                 radius = 500
                                )
nyc_venues.head()

## Final Dataset

In [None]:
nyc_venues=nyc_venues.drop(["ZIPCODE"],1)
new=nyc_venues.groupby(['Cluster Labels']).sum()
new.reset_index(inplace=True)
new.head()

## Plotting the Data

In [None]:
import matplotlib.pyplot as plt
p=[]
for i in new["Cluster Labels"]:
    p.append("Category "+str(i))
print(p)
plt.bar(new["Cluster Labels"],new["Number_of_Food_Joints"])
plt.xlabel('Cluster Labels', fontsize=20)
plt.ylabel('Number of Food Joints', fontsize=20)
plt.xticks(new["Cluster Labels"],p,fontsize=10, rotation=45)
plt.title('Fire Likelihood of NYC food joints')
plt.show()