<h1>Crime and Weather<h1>
<h2>Graphs and Exploratory Analysis<h2>
<h3>Lauren Paredes<h3>



In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

Read in both crime and weather into data frames using pandas.

In [2]:
crimeDF= pd.read_csv("crime.csv", encoding='windows-1254')
weatherDF = pd.read_csv("weather_data_long.csv")


Clean up and trim down the dataframes

In [3]:
del weatherDF['Time']
del weatherDF["WinSpeed"]
del weatherDF["Pressure"]
del weatherDF["Humidity"]
del weatherDF["Wind"]
del weatherDF["DewPoint"]
del weatherDF["WindGust"]
del weatherDF["Precip."]
del weatherDF["Condition"]

In [4]:
del crimeDF["incident_id"]
del crimeDF["offense_id"]
del crimeDF["OFFENSE_CODE"]
del crimeDF["OFFENSE_CODE_EXTENSION"]
del crimeDF["LAST_OCCURRENCE_DATE"]
del crimeDF["REPORTED_DATE"]
del crimeDF["VICTIM_COUNT"]
del crimeDF["GEO_X"]
del crimeDF["GEO_Y"]
del crimeDF["GEO_LON"]
del crimeDF["GEO_LAT"]
del crimeDF["DISTRICT_ID"]
del crimeDF["PRECINCT_ID"]
del crimeDF["NEIGHBORHOOD_ID"]
del crimeDF["IS_CRIME"]
del crimeDF["IS_TRAFFIC"]
del crimeDF["INCIDENT_ADDRESS"]

In [5]:
# need the occurance date to be in the format of datetime
crimeDF['Date'] = pd.to_datetime(crimeDF["FIRST_OCCURRENCE_DATE"]).dt.date
del crimeDF["FIRST_OCCURRENCE_DATE"]
# crimeDF.rename(columns={'OCCURRENCE_DATE':'Date'},inplace =True)

In [6]:
# Making sure both dataframes have a common col to merge on with the correct datatype
crimeDF['Date']= pd.to_datetime(crimeDF['Date'])
weatherDF['Date']= pd.to_datetime(weatherDF['Date'])

In [7]:
# Set the temperature for the average divided by group
weatherDF= weatherDF.groupby(['Date']).max()

<h3> Merging crime and weather on a common date<h3>

In [8]:
mergeOnDate = pd.merge(crimeDF, weatherDF, how='outer',on='Date')
mergeOnDate=mergeOnDate.dropna(subset=['OFFENSE_TYPE_ID'])
mergeOnDate=mergeOnDate.dropna(subset=['Temperature'])
mergeOnDate['Temperature'] =mergeOnDate['Temperature'].astype('int')
display(mergeOnDate)

Unnamed: 0,OFFENSE_TYPE_ID,OFFENSE_CATEGORY_ID,Date,Temperature
0,criminal-mischief-other,public-disorder,2017-06-25,73
1,criminal-mischief-other,public-disorder,2017-06-25,73
2,criminal-mischief-other,public-disorder,2017-06-25,73
3,criminal-mischief-other,public-disorder,2017-06-25,73
4,criminal-mischief-other,public-disorder,2017-06-25,73
...,...,...,...,...
379047,fraud-by-use-of-computer,white-collar-crime,2019-01-14,43
379048,fraud-by-use-of-computer,white-collar-crime,2019-01-14,43
379049,pawn-broker-viol,all-other-crimes,2019-01-14,43
379050,outside-steal-recovered-veh,all-other-crimes,2019-01-14,43


<h2>Graphs and Exploratory Analysis<h2>

In [None]:
graphJoin =mergeOnDate
graphJoin['Year'] = pd.DatetimeIndex(graphJoin['Date']).year
graphJoin['Month'] = pd.DatetimeIndex(graphJoin['Date']).month

In [None]:
plt.scatter(graphJoin['Year'],graphJoin['Temperature'])
plt.title("Temperature vs year Denver")

plt.xticks(np.arange( min(graphJoin["Year"]) , max(graphJoin["Year"]+1 ),1.0))
plt.xlabel("Date")
plt.ylabel("Temp(F)")
plt.show()

In [None]:
plt.scatter(graphJoin['Month'],graphJoin['Temperature'])
plt.title("Temperature vs month Denver")

plt.xticks(np.arange( min(graphJoin["Month"]) , max(graphJoin["Month"]+1 ),1.0))
plt.xlabel("Date")
plt.ylabel("Temp(F)")
plt.show()

In [None]:
ax = graphJoin.groupby("Month").size().plot(kind = "bar", title= "Total Crime Events by Month Denver")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=0)
plt.show()

In [None]:
ax = graphJoin.groupby("Year").size().plot(kind = "bar", title= "Total Crime Events by Year Denver")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=0)
plt.show()

Assigning crime types in terms of society, person, property. This categorization logic is explained in report.

In [9]:
# all-other-crimes = 37218/total size = 12.92
offenseType =(mergeOnDate["OFFENSE_TYPE_ID"].value_counts())
print(mergeOnDate.shape)
mergeOnDate= mergeOnDate[~mergeOnDate['OFFENSE_TYPE_ID'].isin(offenseType[offenseType < 5000].index)]

t=mergeOnDate.groupby('OFFENSE_CATEGORY_ID').size()
display(mergeOnDate)
display(t)
print(mergeOnDate.shape)

(228089, 4)


Unnamed: 0,OFFENSE_TYPE_ID,OFFENSE_CATEGORY_ID,Date,Temperature
0,criminal-mischief-other,public-disorder,2017-06-25,73
1,criminal-mischief-other,public-disorder,2017-06-25,73
2,criminal-mischief-other,public-disorder,2017-06-25,73
3,criminal-mischief-other,public-disorder,2017-06-25,73
4,criminal-mischief-other,public-disorder,2017-06-25,73
...,...,...,...,...
379037,theft-of-motor-vehicle,auto-theft,2019-01-14,43
379038,theft-of-motor-vehicle,auto-theft,2019-01-14,43
379039,theft-of-motor-vehicle,auto-theft,2019-01-14,43
379040,theft-of-motor-vehicle,auto-theft,2019-01-14,43


OFFENSE_CATEGORY_ID
all-other-crimes                11557
auto-theft                      20412
burglary                         5803
larceny                         28935
other-crimes-against-persons    15521
public-disorder                 19475
theft-from-motor-vehicle        29673
dtype: int64

(131376, 4)


In [None]:
h= mergeOnDate.groupby(['OFFENSE_TYPE_ID']).size()

h.shape
h.head(30)

In [10]:
types ={'auto-theft': 'property',
        'robbery': 'property',
        'arson': 'property',
        'theft-from-motor-vehicle': 'property',
        'burglary': 'property',
        'larceny': 'property',
        'sexual-assault': 'person',
        'drug-alcohol':'society',
        'other-crimes-against-persons': 'person',
        'aggravated-assault': 'person',
        'murder': 'person',
        'white-collar-crime': 'society',
        'public-disorder': 'society',
        'all-other-crimes': 'society'}

In [11]:
mergeOnDate["OFFENSE_CATEGORY_ID"] = mergeOnDate["OFFENSE_CATEGORY_ID"].map(types)
display(mergeOnDate)

Unnamed: 0,OFFENSE_TYPE_ID,OFFENSE_CATEGORY_ID,Date,Temperature
0,criminal-mischief-other,society,2017-06-25,73
1,criminal-mischief-other,society,2017-06-25,73
2,criminal-mischief-other,society,2017-06-25,73
3,criminal-mischief-other,society,2017-06-25,73
4,criminal-mischief-other,society,2017-06-25,73
...,...,...,...,...
379037,theft-of-motor-vehicle,property,2019-01-14,43
379038,theft-of-motor-vehicle,property,2019-01-14,43
379039,theft-of-motor-vehicle,property,2019-01-14,43
379040,theft-of-motor-vehicle,property,2019-01-14,43


Separate into hot, mild, cold dataframes

In [13]:
hotDays = mergeOnDate[mergeOnDate['Temperature'] > 80]
mildDays = mergeOnDate[(mergeOnDate['Temperature'] >= 50) & (mergeOnDate['Temperature'] <= 80)]
coldDays = mergeOnDate[mergeOnDate['Temperature'] < 50]

numofweathercrimes = len(mergeOnDate.index)
print("Total Entry Count: ",numofweathercrimes)
print("Hot days overview")
display(hotDays)
print("Mild days overview")
display(mildDays)
print("cold days overview")
display(coldDays)

TypeError: '>' not supported between instances of 'str' and 'int'

<h4>General distributions of crime types on different temperature categories<h4>

In [None]:
# overall merged data counts
print("All Data grouped by categoryID counts")
crimetypes = mergeOnDate.groupby(["OFFENSE_CATEGORY_ID"]).size()
print(crimetypes.head())
print(mergeOnDate.shape)

In [None]:
# category types on hot days from the hotDays dataframe
print("Hot Data grouped by categoryID counts")
hotTypes=hotDays.groupby(["OFFENSE_CATEGORY_ID"]).size()
print(hotTypes.head())
print(hotDays.shape)


In [None]:
# category types on mild days from the mildDays dataframe
print("Mild Data grouped by categoryID counts")
mildTypes =mildDays.groupby(["OFFENSE_CATEGORY_ID"]).size()
print(mildTypes.head())
print(mildDays.shape)

In [None]:
# category types on cold days from the coldDays dataframe
print("Cold Data grouped by categoryID counts")
coldTypes = coldDays.groupby(["OFFENSE_CATEGORY_ID"]).size()
print(coldTypes.head())
print(coldDays.shape)

In [None]:
pdisHot=(hotTypes/numofweathercrimes)*100
pdisCold=(coldTypes/numofweathercrimes)*100
pdisMild = (mildTypes/numofweathercrimes)*100
print("General Disributions for type and temperature")
print("Hot general Distribution:")
display(pdisHot)
print("Mild general Distribution:")
display(pdisMild)
print("Cold general Distribution:")
display(pdisCold)

<h3>Bayesian Classifications<h3>

P(Category|Temp) = /frac{(P(C and T))}{P(T)}`

P(Category) is represented by a series with Person, Property, and Society crimes
P(Temperature) is represented by a series with Hot, Mild, and Cold days

In [None]:
# def calculateClassPropbs(dataset):
#     numDataPoint = dataset.size
#     classProbs= {}
#     for dataPoint in 

In [None]:
priorCategories = (crimetypes / (numofweathercrimes))
print("Prior Probabilities of Categories")
display(priorCategories)
print("Sum of percentages of Categories: ",priorCategories.sum())



In [None]:
pHot = ((hotDays.shape[0])/numofweathercrimes)
pMild= ((mildDays.shape[0])/numofweathercrimes)
pCold=((coldDays.shape[0])/numofweathercrimes)
d = {'Hot':pHot, 'Mild':pMild,'Cold':pCold}
priorTemperatures= pd.Series(data=d, index=['Hot','Mild','Cold'])
print("Probabilities of Temperatures")
display(priorTemperatures)
print("Sum of percentages of Temperatures",priorTemperatures.sum())

P(T|C)= P(T and C)* P(C)

In [None]:

pHotTypes=(hotTypes/hotDays.shape[0])*100
pColdTypes=(coldTypes/coldDays.shape[0])*100
pMildTypes = (mildTypes/mildDays.shape[0])*100
print("Given a hot day probability of a type of crime: ")
print("Total sum: ", pHotTypes.sum())
display(pHotTypes)
print("Given a mild day probability of a type of crime: ")
print("Total sum: ", pColdTypes.sum())
display(pColdTypes)
print("Given a cold day probability of a type of crime: ")
print("Total sum: ", pMildTypes.sum())
display(pMildTypes)
# display(hotTypes.sum())


