In [1]:
# Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import date
import calendar

In [2]:
# Import & Read CSV generated from DarkSky Weather API
csv_path1= "Resources/temp_weather.csv"
weather_df = pd.read_csv(csv_path1)
# Clean weather_df
del weather_df['Unnamed: 0']
del weather_df['index']

weather_df.head(1)

Unnamed: 0,Date,Temperature,Weather Condition
0,2016-02-12,20.53,Mostly Cloudy


In [3]:
# Import & Read CSV generated from Pittsburgh Crime Codes


In [4]:
#Import & Read Original Pittsburgh Crime Data CSV
csv_path3= "Resources/crime_data.csv"
master_df = pd.read_csv(csv_path3)
master_df.head(1)

Unnamed: 0,PK,CCR,AGE,GENDER,RACE,ARRESTTIME,ARRESTLOCATION,OFFENSES,INCIDENTLOCATION,INCIDENTNEIGHBORHOOD,INCIDENTZONE,INCIDENTTRACT,COUNCIL_DISTRICT,PUBLIC_WORKS_DIVISION,X,Y
0,1975272,16158872,42.0,F,B,2016-08-24T12:20:00,"4700 Block Centre AV Pittsburgh, PA 15213",3929 Retail Theft.,"4700 Block Centre AV Pittsburgh, PA 15213",Bloomfield,5,804.0,8.0,2.0,-79.949277,40.452551


In [5]:
# Clean master_df & Print Pittsburgh Crime DataFrame (organized_df)
# Split up ARRESTTIME column into Arrest Hour, Arrest Date, Arrest Month, Arrest Year, Arrest Unix Timestamp columns
master_df[['Date','Arrest_Time']]=master_df['ARRESTTIME'].str.split('T', expand=True,n=2)

# Drop unnecessary columns: PK, CCR, ARRESTLOCATION, OFFENSES, INCIDENTTRACT, COUNCIL_DISTRICT, PUBLIC_WORKS_DIVISION.
del master_df['PK']
del master_df['CCR']
del master_df['ARRESTTIME']
del master_df['ARRESTLOCATION']
del master_df['INCIDENTTRACT']
del master_df['COUNCIL_DISTRICT']
del master_df['PUBLIC_WORKS_DIVISION']
# Rename Columns
master_df.columns = ['Age', 'Gender', 'Race', 'Offenses Severity', 'Incident Location', 'Incident Neighborhood', 'Incident Zone', 'Incident Longitude', 'Incident Latitude', 'Date', 'Arrest_Time']
# Reorganize Columns
organized_df = master_df[['Age', 'Gender', 'Race', 'Offenses Severity', 'Date', 'Arrest_Time', 'Incident Location', 'Incident Latitude', 'Incident Longitude', 'Incident Neighborhood', 'Incident Zone']]
# View DataFrame
organized_df.head(1)

Unnamed: 0,Age,Gender,Race,Offenses Severity,Date,Arrest_Time,Incident Location,Incident Latitude,Incident Longitude,Incident Neighborhood,Incident Zone
0,42.0,F,B,3929 Retail Theft.,2016-08-24,12:20:00,"4700 Block Centre AV Pittsburgh, PA 15213",40.452551,-79.949277,Bloomfield,5


In [6]:
# Merge organized_df with weather_df
merged_df = weather_df.merge(organized_df, how="inner", on=["Date", "Date"])
merged_df.head(1)

Unnamed: 0,Date,Temperature,Weather Condition,Age,Gender,Race,Offenses Severity,Arrest_Time,Incident Location,Incident Latitude,Incident Longitude,Incident Neighborhood,Incident Zone
0,2016-02-12,20.53,Mostly Cloudy,24.0,F,B,"2701(a)(1) Simple Assault - Intent., Know., Re...",21:00:00,"1100 Block McKinney LANE Pittsburgh, PA 15205",40.423529,-80.035034,Ridgemont,6


In [7]:
# Clean & Organize merged_df
merged_df[['Arrest Year','Arrest Month','Arrest Date']]=merged_df['Date'].str.split('-', expand=True,n=2)
merged_df[['Arrest_Hour','Arrest_LongTime']]=merged_df['Arrest_Time'].str.split(':', expand=True,n=1)

# Convert "Arrest Month" column from Month Numbers to Month Names
look_up = {'01': 'January', '02': 'February', '03': 'March', '04': 'April', '05': 'May',
            '06': 'June', '07': 'July', '08': 'August', '09': 'September', '10': 'October', '11': 'November', '12': 'December'}
merged_df['Arrest Month'] = merged_df['Arrest Month'].apply(lambda x: look_up[x])

# Convert "Arrest Timestamp" column into "Arrest Day" column
merged_df['Date'] = pd.to_datetime(merged_df.Date) 
merged_df['Arrest Day of Week'] = merged_df.Date.dt.weekday_name

# Create 'Arrest Time of Day' column from 'Arrest Hour' column
merged_df["Arrest_Hour"] = pd.to_numeric(merged_df["Arrest_Hour"])
# Create bins in which to place values based upon Time of Day
day_bins = [0, 4, 8, 12, 16, 20, 24]
# Create labels for day_bins
day_labels = ["Early Morning", "Morning", "Late Morning", "Early Afternoon", "Early Evening", "Evening"]
# Create 'Arrest Time of Day' column for storing bins
merged_df['Arrest Time of Day'] = pd.cut(merged_df['Arrest_Hour'], day_bins, labels=day_labels)
# Deleter unwanted columns
del merged_df['Date']
del merged_df['Arrest_Time']
del merged_df['Arrest_LongTime']
del merged_df['Arrest_Hour']
# Reorganize columns
full_crime_df = merged_df[['Age', 'Gender', 'Race', 'Offenses Severity', 'Arrest Day of Week', 'Arrest Date', 'Arrest Month', 'Arrest Year', 'Temperature', 'Weather Condition', 'Incident Location', 'Incident Latitude', 'Incident Longitude', 'Incident Neighborhood', 'Incident Zone']]
# Show DataFrame
full_crime_df.head()


Unnamed: 0,Age,Gender,Race,Offenses Severity,Arrest Day of Week,Arrest Date,Arrest Month,Arrest Year,Temperature,Weather Condition,Incident Location,Incident Latitude,Incident Longitude,Incident Neighborhood,Incident Zone
0,24.0,F,B,"2701(a)(1) Simple Assault - Intent., Know., Re...",Friday,12,February,2016,20.53,Mostly Cloudy,"1100 Block McKinney LANE Pittsburgh, PA 15205",40.423529,-80.035034,Ridgemont,6
1,31.0,M,W,13(a)(16) Possession of Controlled Substance /...,Wednesday,17,February,2016,32.76,Overcast,"800 Block Concord ST Pittsburgh, PA 15212",40.457564,-79.997924,East Allegheny,1
2,31.0,F,O,9015 Failure To Appear/Arrest on Attachment Order,Thursday,3,March,2016,34.11,Partly Cloudy,"600 Block Liberty AV Pittsburgh, PA 15222",40.441773,-80.00162,Central Business District,2
3,22.0,M,W,2702 Aggravated Assault. / 2706 Terroristic Th...,Monday,4,April,2016,50.96,Light Rain,"S 15th ST & E Carson ST Pittsburgh, PA 15203",40.428815,-79.983425,South Side Flats,3
4,36.0,M,W,13(a)(32) Paraphernalia - Use or Possession,Friday,8,April,2016,39.39,Overcast,"Boyd ST & Diamond ST Pittsburgh, PA 15219",40.438656,-79.993293,Bluff,2


In [8]:
# Weather vs Crime Frequency / Crime Type
    # Temperature (Hot vs Cold)
        # X-Axis: Temperature (Ex. Very Cold, Cold, Moderate, Hot, Very Hot, etc.)
        # Y-Axis: Amount of Crime (measured by incident number)
    # Weather Condition: 
        # X-Axis: Type of weather (ie rain, sun, snow, etc.)
        # Y-Axis: Amount of Crime (measured by incident number)


In [9]:
# Crime Frequency vs Incident Location (Heatmap) (Liam)
    # Crime Frequency vs Police Zone (Heatmap) (Liam)

    #Drop irrelevant columns

heatmap_DF = full_crime_df[['Offenses Severity', 'Arrest Day of Week', 'Arrest Date', 'Arrest Month', 'Arrest Year', 'Temperature', 'Weather Condition', 'Incident Location', 'Incident Latitude', 'Incident Longitude', 'Incident Neighborhood', 'Incident Zone']]
heatmap_DF.head()

Unnamed: 0,Offenses Severity,Arrest Day of Week,Arrest Date,Arrest Month,Arrest Year,Temperature,Weather Condition,Incident Location,Incident Latitude,Incident Longitude,Incident Neighborhood,Incident Zone
0,"2701(a)(1) Simple Assault - Intent., Know., Re...",Friday,12,February,2016,20.53,Mostly Cloudy,"1100 Block McKinney LANE Pittsburgh, PA 15205",40.423529,-80.035034,Ridgemont,6
1,13(a)(16) Possession of Controlled Substance /...,Wednesday,17,February,2016,32.76,Overcast,"800 Block Concord ST Pittsburgh, PA 15212",40.457564,-79.997924,East Allegheny,1
2,9015 Failure To Appear/Arrest on Attachment Order,Thursday,3,March,2016,34.11,Partly Cloudy,"600 Block Liberty AV Pittsburgh, PA 15222",40.441773,-80.00162,Central Business District,2
3,2702 Aggravated Assault. / 2706 Terroristic Th...,Monday,4,April,2016,50.96,Light Rain,"S 15th ST & E Carson ST Pittsburgh, PA 15203",40.428815,-79.983425,South Side Flats,3
4,13(a)(32) Paraphernalia - Use or Possession,Friday,8,April,2016,39.39,Overcast,"Boyd ST & Diamond ST Pittsburgh, PA 15219",40.438656,-79.993293,Bluff,2


In [10]:
import geopandas as gpd
import folium

#Pull Pittsburgh City Borders
congr_districts = gpd.read_file("Resources/cb_2015_us_cd114_20m")
congr_districts.crs = {'datum': 'NAD83', 'ellps': 'GRS80', 'proj':'longlat', 'no_defs':True}

district14 = congr_districts[ congr_districts.GEOID == '4214' ]  # 42 = PA, 14 = District

In [11]:
#Create folium map config
distric_map = folium.Map(location=[40, -80], zoom_start=9, tiles='cartodbpositron' )
print('default map crs: ',distric_map.crs)

default map crs:  EPSG3857


In [12]:
district14 = district14.to_crs({'init':'epsg:3857'})

In [13]:
#Create Folium Map
folium.GeoJson(district14).add_to(distric_map)
distric_map

In [14]:
#Round and group coordinates
heatmap_DF["Rounded Lat"] = round(heatmap_DF["Incident Latitude"], 2)
heatmap_DF["Rounded Long"] = round(heatmap_DF["Incident Longitude"], 2)
heatmap_DF2 = heatmap_DF.groupby(["Rounded Lat", "Rounded Long"]).count()
heatmap_DF2 = heatmap_DF2.reset_index()
heatmap_DF2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Rounded Lat,Rounded Long,Offenses Severity,Arrest Day of Week,Arrest Date,Arrest Month,Arrest Year,Temperature,Weather Condition,Incident Location,Incident Latitude,Incident Longitude,Incident Neighborhood,Incident Zone
0,0.0,0.0,1553,1553,1553,1553,1553,1538,1538,1553,1553,1553,1514,1514
1,39.75,-78.68,1,1,1,1,1,1,1,1,1,1,1,1
2,39.99,-75.78,1,1,1,1,1,1,1,1,1,1,1,1
3,40.18,-80.24,1,1,1,1,1,1,1,1,1,1,1,1
4,40.31,-80.08,2,2,2,2,2,2,2,2,2,2,2,2


In [15]:
import folium
from folium.plugins import HeatMap

#Create heatmap, base column doesnt matter
max_amount = float(heatmap_DF2["Temperature"].max())

hmap = folium.Map(location=[40.0, -80], zoom_start=7, )

hm_wide = HeatMap( list(zip(heatmap_DF2["Rounded Lat"].values, heatmap_DF2["Rounded Long"].values, heatmap_DF2.Temperature.values)), 
                   min_opacity=0.2,
                   max_val=max_amount,
                   radius=17, blur=15, 
                   max_zoom=1, 
                 )

folium.GeoJson(district14).add_to(hmap)
hmap.add_child(hm_wide)

In [16]:
# Crime Severity vs Incident Location (Heatmap) (Liam)

#TODO code that seperates felonies from misdemeanors
#heatmap_DF3 = heatmap_DF2[]

In [None]:
#For Misdemeanors
import folium
from folium.plugins import HeatMap

max_amount = float(heatmap_DF3["Misdemeanors"].max())

hmap = folium.Map(location=[40.0, -80], zoom_start=7, )

hm_wide = HeatMap( list(zip(heatmap_DF3["Rounded Lat"].values, heatmap_DF2["Rounded Long"].values, heatmap_DF3.Misdemeanors.values)), 
                   min_opacity=0.2,
                   max_val=max_amount,
                   radius=17, blur=15, 
                   max_zoom=1, 
                 )

folium.GeoJson(district14).add_to(hmap)
hmap.add_child(hm_wide)

In [None]:
#For Felonies
import folium
from folium.plugins import HeatMap

max_amount = float(heatmap_DF3["Felonies"].max())

hmap = folium.Map(location=[40.0, -80], zoom_start=7, )

hm_wide = HeatMap( list(zip(heatmap_DF3["Rounded Lat"].values, heatmap_DF3["Rounded Long"].values, heatmap_DF3.Felonies.values)), 
                   min_opacity=0.2,
                   max_val=max_amount,
                   radius=17, blur=15, 
                   max_zoom=1, 
                 )

folium.GeoJson(district14).add_to(hmap)
hmap.add_child(hm_wide)

In [17]:
# Age, Gender and Race vs Crime (Pie Charts?)
    # Crime Frequency
        # Grouped by incident location & date
        
    # Crime Type
    
    # Crime Severity (Felonies, Misdemeanors or Violations/Infractions)


In [18]:
#full_crime_df.groupby(['Gender','Race']).count().unstack().plot.bar()

In [19]:
#PH# Crime Frequency vs Time of Year (seasonality)
#crime_plot = full_crime_df.groupby(["Arrest Year","Arrest Month"]).count().stack().plot.bar()
#crime_plot.set_xlabel('Arrest Year')
#crime_plot.set_ylabel('OFFENSES')
#crime_plot.set_title('crime vs time of year')

In [20]:
#PH# Crime Frequency vs Time of Year (seasonality)

# Split data into groups based upon day of week
#month_groups = organized_df.groupby('Arrest Month').sum()

#sorter2 = ['January', 'Febraury', 'March', 'April', 'May', 'June', 
#          'July', 'August', 'September', 'October', 'November', 'December']
#sorterIndex = dict(zip(sorter2,range(len(sorter2))))

#month_groups['Month_id'] = month_groups.index
#month_groups['Month_id'] = month_groups['Month_id'].map(sorterIndex)
#month_groups.sort_values('Month_id', inplace=True)

#fig, ax = plt.subplots(facecolor='white')
 
#ax.bar(month_groups.Month_id, month_groups.Criminal)
#ax.set_xticks(month_groups.Month_id)
#ax.set_xticklabels(month_groups.index)

# Edit
    # Change Colors
    # Make Labels Diagonal
    # Change Y-Axis meaurements

In [21]:
#Time_df = full_crime_df.loc[:,["Arrest Year","Arrest Month"]]
#Arrest_Year_group = Time_df.groupby(["Arrest Year","Arrest Month"]).count()


In [22]:
# Crime Severity vs Time of Year (seasonality)
#Arrest month vs len(grouped(Incident Severity))
# X-Axis: Months
# Y-Axis: Severity of Crime (measured by incident number)


In [23]:
#PH# Crime Frequency vs Day of Week

# Split data into groups based upon day of week
#day_groups = organized_df.groupby('Arrest Day of Week').sum()

#sorter = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
#sorterIndex = dict(zip(sorter,range(len(sorter))))

#day_groups['Day_id'] = day_groups.index
#day_groups['Day_id'] = day_groups['Day_id'].map(sorterIndex)
#day_groups.sort_values('Day_id', inplace=True)

#fig, ax = plt.subplots(facecolor='white')
 
#ax.bar(day_groups.Day_id, day_groups.Criminal)
#ax.set_xticks(day_groups.Day_id)
#ax.set_xticklabels(day_groups.index)


In [24]:
# Crime Severity vs Day of Week
#Arrest Day of Week vs len(grouped(Incident Severity))
# X-Axis: Day (Monday, Tuesday, Wednesday, etc.)
# Y-Axis: Severity of Crime (measured by incident type)


In [25]:
#PH# Crime Frequency vs Time of Day 

# Split data into groups based upon day of week
#time_of_day_groups = organized_df.groupby('Arrest Time of Day')
# Find out how many arrests occured during each month group
#incidents_time_of_day = time_of_day_groups['Criminal'].count()
# Chart our data, give it a title, and label the axes
#incidents_time_of_day_chart = incidents_time_of_day.plot(kind="bar", title="Crime Frequency vs. Time of Day")
#incidents_time_of_day_chart.set_xlabel("Time of Day")
#incidents_time_of_day_chart.set_ylabel("Amount of Crime (Per Incident)")
#plt.xticks(rotation = 45)


#plt.tight_layout()

# Edit
# Change Colors
# Make Labels Diagonal
# Change Y-Axis meaurements

In [26]:
# Crime Severity vs Time of Day 
# X-Axis: Time of Day (early morning, morning, early afternoon, noon, etc.)
# Y-Axis: Severity of Crime (measured by incident type)
