Crime and Weather: Boston

In [14]:
import math
import csv
import statistics 
import argparse
import pandas as pd
import numpy as np
import pickle
from pathlib import Path
import matplotlib.pyplot as plt

Create Dataframes from Data Files

In [15]:
crimeDF = pd.read_csv("bcrime.csv", encoding='windows-1254')
weatherDF = pd.read_csv("bweather.csv")

numofcrimes = len(crimeDF.index)
print("Total Crimes:", numofcrimes)

numofweather = len(weatherDF.index)
print("Weather Entries:", numofweather)

Total Crimes: 327820
Weather Entries: 3749


  crimeDF = pd.read_csv("bcrime.csv", encoding='windows-1254')


Remove Unwanted Data and Display Working Dataframe

In [16]:
del crimeDF["INCIDENT_NUMBER"]
del crimeDF["UCR_PART"]
del crimeDF["OFFENSE_CODE"]
del crimeDF["DISTRICT"]
del crimeDF["REPORTING_AREA"]
del crimeDF["SHOOTING"]
del crimeDF["DAY_OF_WEEK"]
del crimeDF["HOUR"]
del crimeDF["Lat"]
del crimeDF["Long"]
del crimeDF["STREET"]

In [17]:
del weatherDF["High Dew Point (F)"]
del weatherDF["Low Dew Point (F)"]
del weatherDF["Avg Dew Point (F)"]
del weatherDF["High Humidity (%)"]
del weatherDF["Low Sea Level Press (in)"]
del weatherDF["High Visibility (mi)"]
del weatherDF["Avg Visibility (mi)"]
del weatherDF["Low Visibility (mi)"]
del weatherDF["Snowfall (in)"]
del weatherDF["Precip (in)"]
del weatherDF["Low Humidity (%)"]
del weatherDF["Avg Humidity (%)"]
del weatherDF["Avg Sea Level Press (in)"]
del weatherDF["High Sea Level Press (in)"]
del weatherDF["High Temp (F)"]
del weatherDF["Low Temp (F)"]
del weatherDF["High Wind (mph)"]
del weatherDF["High Wind Gust (mph)"]

Print Heads of Weather and Crime Data Frames 

In [18]:
crimeDF['OCCURRED_ON_DATE'] = pd.to_datetime(crimeDF.OCCURRED_ON_DATE)
crimeDF['Date'] = pd.to_datetime(crimeDF['OCCURRED_ON_DATE'].dt.date)
crimeDF.head()

Unnamed: 0,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,OCCURRED_ON_DATE,YEAR,MONTH,Location,Date
0,Disorderly Conduct,DISTURBING THE PEACE,2018-10-03 20:13:00,2018,10,"(42.26260773, -71.12118637)",2018-10-03
1,Property Lost,PROPERTY - LOST,2018-08-30 20:00:00,2018,8,"(42.35211146, -71.13531147)",2018-08-30
2,Other,THREATS TO DO BODILY HARM,2018-10-03 19:20:00,2018,10,"(42.30812619, -71.07692974)",2018-10-03
3,Aggravated Assault,ASSAULT - AGGRAVATED - BATTERY,2018-10-03 20:00:00,2018,10,"(42.35945371, -71.05964817)",2018-10-03
4,Aircraft,AIRCRAFT INCIDENTS,2018-10-03 20:49:00,2018,10,"(42.37525782, -71.02466343)",2018-10-03


In [19]:
weatherDF['Date'] = pd.to_datetime(dict(year=weatherDF.Year, month=weatherDF.Month, day=weatherDF.Day))
weatherDF.head()

Unnamed: 0,Year,Month,Day,Avg Temp (F),Avg Wind (mph),Events,Date
0,2008,1,1,34,10,Both,2008-01-01
1,2008,1,2,22,15,Snow,2008-01-02
2,2008,1,3,11,17,,2008-01-03
3,2008,1,4,20,12,,2008-01-04
4,2008,1,5,35,9,,2008-01-05


--------------

Merge Data Frames Based on Date and Complete Entries

In [20]:
mergedDF = pd.merge(weatherDF, crimeDF, how='outer', on='Date')
mergedDF = mergedDF.dropna(subset=['OFFENSE_DESCRIPTION'])
mergedDF = mergedDF.dropna(subset=['Avg Temp (F)'])
mergedDF['Year'] = mergedDF['Year'].astype(int)
mergedDF['Month'] = mergedDF['Month'].astype(int)
mergedDF['Day'] = mergedDF['Day'].astype(int)


del mergedDF["YEAR"]
del mergedDF["MONTH"]
#del mergedDF["Day"]

display(mergedDF)

Unnamed: 0,Year,Month,Day,Avg Temp (F),Avg Wind (mph),Events,Date,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,OCCURRED_ON_DATE,Location
2722,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 00:00:00,"(-1.00000000, -1.00000000)"
2723,2015,6,15,59.0,9.0,Rain,2015-06-15,Harassment,HARASSMENT,2015-06-15 00:00:00,"(42.29109287, -71.06594539)"
2724,2015,6,15,59.0,9.0,Rain,2015-06-15,Fraud,FRAUD - FALSE PRETENSE / SCHEME,2015-06-15 09:00:00,"(42.34283015, -71.07374670)"
2725,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 00:00:00,"(42.28363434, -71.08281320)"
2726,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 16:00:00,"(42.29037227, -71.06845477)"
...,...,...,...,...,...,...,...,...,...,...,...
280262,2018,4,8,37.0,13.0,,2018-04-08,Investigate Property,INVESTIGATE PROPERTY,2018-04-08 01:02:00,"(42.34391716, -71.08967541)"
280263,2018,4,8,37.0,13.0,,2018-04-08,Fire Related Reports,"FIRE REPORT - HOUSE, BUILDING, ETC.",2018-04-08 00:55:00,"(42.35095909, -71.07412780)"
280264,2018,4,8,37.0,13.0,,2018-04-08,Missing Person Located,MISSING PERSON - LOCATED,2018-04-08 00:23:00,"(42.26839985, -71.11127345)"
280265,2018,4,8,37.0,13.0,,2018-04-08,Missing Person Reported,MISSING PERSON,2018-04-08 00:23:00,"(42.26839985, -71.11127345)"


-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Create Data Frames Based on Average Temperatue

In [21]:
hotDays = mergedDF[mergedDF['Avg Temp (F)'] > 75]
mildDays = mergedDF[(mergedDF['Avg Temp (F)'] >= 45) & (mergedDF['Avg Temp (F)'] <= 75)]
coldDays = mergedDF[mergedDF['Avg Temp (F)'] < 45]

numofweathercrimes = len(mergedDF.index)

In [22]:
display(hotDays)

Unnamed: 0,Year,Month,Day,Avg Temp (F),Avg Wind (mph),Events,Date,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,OCCURRED_ON_DATE,Location
5012,2015,6,24,76.0,11.0,,2015-06-24,Drug Violation,"DRUGS - POSS CLASS A - HEROIN, ETC.",2015-06-24 19:47:00,"(42.33428841, -71.07239518)"
5013,2015,6,24,76.0,11.0,,2015-06-24,Other,TRESPASSING,2015-06-24 19:47:00,"(42.33428841, -71.07239518)"
5014,2015,6,24,76.0,11.0,,2015-06-24,Motor Vehicle Accident Response,M/V - LEAVING SCENE - PROPERTY DAMAGE,2015-06-24 10:00:00,"(42.35595391, -71.13852468)"
5015,2015,6,24,76.0,11.0,,2015-06-24,Property Lost,PROPERTY - LOST,2015-06-24 13:00:00,"(42.32194840, -71.08138427)"
5016,2015,6,24,76.0,11.0,,2015-06-24,Confidence Games,FRAUD - CREDIT CARD / ATM FRAUD,2015-06-24 00:00:00,"(42.35840146, -71.05041556)"
...,...,...,...,...,...,...,...,...,...,...,...
231003,2017,9,27,76.0,9.0,,2017-09-27,Larceny,LARCENY THEFT OF BICYCLE,2017-09-27 00:31:00,"(42.34396866, -71.10001300)"
231004,2017,9,27,76.0,9.0,,2017-09-27,Verbal Disputes,VERBAL DISPUTE,2017-09-27 00:19:32,"(42.26983458, -71.10003437)"
231005,2017,9,27,76.0,9.0,,2017-09-27,Motor Vehicle Accident Response,M/V ACCIDENT - PERSONAL INJURY,2017-09-27 00:13:00,"(42.32974270, -71.05826343)"
231006,2017,9,27,76.0,9.0,,2017-09-27,Motor Vehicle Accident Response,M/V ACCIDENT - PERSONAL INJURY,2017-09-27 00:01:00,"(42.38291031, -71.01860585)"


In [23]:
ldf = hotDays

theft = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("theft", case=False)]
numoftheft = len(theft.index)

vandalism = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("vandalism", case=False)]
numofvandalism = len(vandalism.index)

larceny = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("larceny", case=False)]
numoflarceny = len(larceny.index)

burglary = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("burglary", case=False)]
numofburglary = len(burglary.index)

property = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("property", case=False)]
numofproperty = len(property.index)

fire = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("fire", case=False)]
numoffire = len(fire.index)

numcrimeproph = numofburglary + numoflarceny + numoftheft + numofproperty + numofvandalism + numoffire

proppercenth = str(round(((numcrimeproph/numofweathercrimes)*100), 2))

print("Crimes Against Property on Hot Days:", numcrimeproph)
print("Percent of Crimes:", proppercenth)
print("Theft Crimes:", numoftheft)
print("Vadalism Crimes:", numofvandalism)
print("Larceny Crimes:", numoflarceny)
print("Burglary Crimes:", numofburglary)
print("Property Crimes:", numofproperty)
print("Fire Related Crimes:", numoffire)
print()

assault = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("assault", case=False)]
numofassault = len(assault.index)

manslaught = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("manslaughter", case=False)]
numofmanslaught = len(manslaught.index)

warrant = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("warrant", case=False)]
numofwarrant = len(warrant.index)

medical = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("medical", case=False)]
numofmedical = len(medical.index)

person = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("person", case=False)]
numofperson = len(person.index)

harassment = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("harassment", case=False)]
numofharass = len(harassment.index)

numcrimepersonh = numofassault + numofmanslaught + numofwarrant + numofmedical + numofperson + numofharass

peoplepercenth = str(round(((numcrimepersonh/numofweathercrimes)*100), 2))

print("Crimes Against People on Hot Days:", numcrimepersonh)
print("Percent of Crimes:", peoplepercenth)
print("Assault Crimes:", numofassault)
print("Manslaughter Crimes:", numofmanslaught)
print("Warrants:", numofwarrant)
print("Medical:", numofmedical)
print("Person Crimes:", numofperson)
print("Harassment Crimes:", numofharass)
print()

Violations = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("violation", case=False)]
numofvio = len(Violations.index)

threat = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("threat", case=False)]
numofthreat = len(threat.index)

other = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("other", case=False)]
numofother = len(other.index)

numcrimesocietyh = numofvio + numofthreat + numofother

societypercenth = str(round(((numcrimesocietyh/numofweathercrimes)*100), 2))

print("Crimes Against Society/Other on Hot Days:", numcrimesocietyh)
print("Percent of Crimes:", societypercenth)
print("Violations:", numofvio)
print("Threats:", numofthreat)
print("Other:", numofother)
print()

hotdaycrimetot = numcrimeproph + numcrimepersonh + numcrimesocietyh
print("Total Hot Day Crimes:", hotdaycrimetot)

print("Percent of Weather Crimes:", (str(round((hotdaycrimetot/numofweathercrimes)*100,2))))

Crimes Against Property on Hot Days: 14220
Percent of Crimes: 5.12
Theft Crimes: 2937
Vadalism Crimes: 1533
Larceny Crimes: 3785
Burglary Crimes: 766
Property Crimes: 4789
Fire Related Crimes: 410

Crimes Against People on Hot Days: 10473
Percent of Crimes: 3.77
Assault Crimes: 2269
Manslaughter Crimes: 20
Warrants: 1112
Medical: 1726
Person Crimes: 4922
Harassment Crimes: 424

Crimes Against Society/Other on Hot Days: 3407
Percent of Crimes: 1.23
Violations: 570
Threats: 874
Other: 1963

Total Hot Day Crimes: 28100
Percent of Weather Crimes: 10.12


In [24]:
display(mildDays)

Unnamed: 0,Year,Month,Day,Avg Temp (F),Avg Wind (mph),Events,Date,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,OCCURRED_ON_DATE,Location
2722,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 00:00:00,"(-1.00000000, -1.00000000)"
2723,2015,6,15,59.0,9.0,Rain,2015-06-15,Harassment,HARASSMENT,2015-06-15 00:00:00,"(42.29109287, -71.06594539)"
2724,2015,6,15,59.0,9.0,Rain,2015-06-15,Fraud,FRAUD - FALSE PRETENSE / SCHEME,2015-06-15 09:00:00,"(42.34283015, -71.07374670)"
2725,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 00:00:00,"(42.28363434, -71.08281320)"
2726,2015,6,15,59.0,9.0,Rain,2015-06-15,Property Lost,PROPERTY - LOST,2015-06-15 16:00:00,"(42.29037227, -71.06845477)"
...,...,...,...,...,...,...,...,...,...,...,...
279247,2018,4,4,49.0,13.0,Rain,2018-04-04,Medical Assistance,SICK/INJURED/MEDICAL - PERSON,2018-04-04 01:12:00,"(42.30569111, -71.06164783)"
279248,2018,4,4,49.0,13.0,Rain,2018-04-04,Motor Vehicle Accident Response,M/V ACCIDENT - PROPERTY DAMAGE,2018-04-04 00:26:00,"(42.27796370, -71.09246318)"
279249,2018,4,4,49.0,13.0,Rain,2018-04-04,Violations,VAL - OPERATING AFTER REV/SUSP.,2018-04-04 00:27:24,"(42.34950783, -71.07949866)"
279250,2018,4,4,49.0,13.0,Rain,2018-04-04,Medical Assistance,SICK/INJURED/MEDICAL - PERSON,2018-04-04 00:10:00,"(42.25992567, -71.13095630)"


In [25]:
ldf = mildDays

theft = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("theft", case=False)]
numoftheft = len(theft.index)

vandalism = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("vandalism", case=False)]
numofvandalism = len(vandalism.index)

larceny = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("larceny", case=False)]
numoflarceny = len(larceny.index)

burglary = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("burglary", case=False)]
numofburglary = len(burglary.index)

property = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("property", case=False)]
numofproperty = len(property.index)

fire = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("fire", case=False)]
numoffire = len(fire.index)

numcrimepropm = numofburglary + numoflarceny + numoftheft + numofproperty + numofvandalism + numoffire

proppercentm = str(round(((numcrimepropm/numofweathercrimes)*100), 2))

print("Crimes Against Property on Mild Days:", numcrimepropm)
print("Percent of Crimes:", proppercentm)
print("Theft Crimes:", numoftheft)
print("Vadalism Crimes:", numofvandalism)
print("Larceny Crimes:", numoflarceny)
print("Burglary Crimes:", numofburglary)
print("Property Crimes:", numofproperty)
print("Fire Related Crimes:", numoffire)
print()

assault = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("assault", case=False)]
numofassault = len(assault.index)

manslaught = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("manslaughter", case=False)]
numofmanslaught = len(manslaught.index)

warrant = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("warrant", case=False)]
numofwarrant = len(warrant.index)

medical = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("medical", case=False)]
numofmedical = len(medical.index)

person = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("person", case=False)]
numofperson = len(person.index)

harassment = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("harassment", case=False)]
numofharass = len(harassment.index)

numcrimepersonm = numofassault + numofmanslaught + numofwarrant + numofmedical + numofperson + numofharass

peoplepercentm = str(round(((numcrimepersonm/numofweathercrimes)*100), 2))

print("Crimes Against People on Mild Days:", numcrimepersonm)
print("Percent of Crimes:", peoplepercentm)
print("Assault Crimes:", numofassault)
print("Manslaughter Crimes:", numofmanslaught)
print("Warrants:", numofwarrant)
print("Medical:", numofmedical)
print("Person Crimes:", numofperson)
print("Harassment Crimes:", numofharass)
print()

Violations = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("violation", case=False)]
numofvio = len(Violations.index)

threat = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("threat", case=False)]
numofthreat = len(threat.index)

other = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("other", case=False)]
numofother = len(other.index)

numcrimesocietym = numofvio + numofthreat + numofother

societypercentm = str(round(((numcrimesocietym/numofweathercrimes)*100), 2))

print("Crimes Against Society/Other on Mild Days:", numcrimesocietym)
print("Percent of Crimes:", societypercentm)
print("Violations:", numofvio)
print("Threats:", numofthreat)
print("Other:", numofother)
print()

milddaycrimetot = numcrimepropm + numcrimepersonm + numcrimesocietym

print("Total Mild Day Crimes:", milddaycrimetot)
print("Percent of Weather Crimes:", (str(round((milddaycrimetot/numofweathercrimes)*100,2))))


Crimes Against Property on Mild Days: 69769
Percent of Crimes: 25.14
Theft Crimes: 13890
Vadalism Crimes: 7756
Larceny Crimes: 18400
Burglary Crimes: 3922
Property Crimes: 23960
Fire Related Crimes: 1841

Crimes Against People on Mild Days: 53922
Percent of Crimes: 19.43
Assault Crimes: 11708
Manslaughter Crimes: 89
Warrants: 4879
Medical: 9264
Person Crimes: 25921
Harassment Crimes: 2061

Crimes Against Society/Other on Mild Days: 17251
Percent of Crimes: 6.22
Violations: 2890
Threats: 4478
Other: 9883

Total Mild Day Crimes: 140942
Percent of Weather Crimes: 50.78


In [26]:
display(coldDays)

Unnamed: 0,Year,Month,Day,Avg Temp (F),Avg Wind (mph),Events,Date,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,OCCURRED_ON_DATE,Location
36649,2015,10,18,40.0,11.0,,2015-10-18,Vandalism,VANDALISM,2015-10-18 00:00:00,"(42.34128751, -71.05467933)"
36650,2015,10,18,40.0,11.0,,2015-10-18,Larceny,LARCENY ALL OTHERS,2015-10-18 00:00:00,"(42.35125902, -71.06009371)"
36651,2015,10,18,40.0,11.0,,2015-10-18,Other,THREATS TO DO BODILY HARM,2015-10-18 00:01:00,"(42.30956305, -71.08990197)"
36652,2015,10,18,40.0,11.0,,2015-10-18,Larceny From Motor Vehicle,LARCENY THEFT FROM MV - NON-ACCESSORY,2015-10-18 15:30:00,"(42.34539406, -71.07380247)"
36653,2015,10,18,40.0,11.0,,2015-10-18,Fraud,FRAUD - FALSE PRETENSE / SCHEME,2015-10-18 07:51:00,"(42.28020820, -71.17087959)"
...,...,...,...,...,...,...,...,...,...,...,...
280262,2018,4,8,37.0,13.0,,2018-04-08,Investigate Property,INVESTIGATE PROPERTY,2018-04-08 01:02:00,"(42.34391716, -71.08967541)"
280263,2018,4,8,37.0,13.0,,2018-04-08,Fire Related Reports,"FIRE REPORT - HOUSE, BUILDING, ETC.",2018-04-08 00:55:00,"(42.35095909, -71.07412780)"
280264,2018,4,8,37.0,13.0,,2018-04-08,Missing Person Located,MISSING PERSON - LOCATED,2018-04-08 00:23:00,"(42.26839985, -71.11127345)"
280265,2018,4,8,37.0,13.0,,2018-04-08,Missing Person Reported,MISSING PERSON,2018-04-08 00:23:00,"(42.26839985, -71.11127345)"


In [28]:
ldf = coldDays

theft = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("theft", case=False)]
numoftheft = len(theft.index)

vandalism = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("vandalism", case=False)]
numofvandalism = len(vandalism.index)

larceny = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("larceny", case=False)]
numoflarceny = len(larceny.index)

burglary = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("burglary", case=False)]
numofburglary = len(burglary.index)

property = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("property", case=False)]
numofproperty = len(property.index)

fire = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("fire", case=False)]
numoffire = len(fire.index)

numcrimepropc = numofburglary + numoflarceny + numoftheft + numofproperty + numofvandalism + numoffire

proppercentc = str(round(((numcrimepropc/numofweathercrimes)*100), 2))

print("Crimes Against Property on Cold Days:", numcrimepropc)
print("Percent of Crimes:", proppercentc)
print("Theft Crimes:", numoftheft)
print("Vadalism Crimes:", numofvandalism)
print("Larceny Crimes:", numoflarceny)
print("Burglary Crimes:", numofburglary)
print("Property Crimes:", numofproperty)
print("Fire Related Crimes:", numoffire)
print()


assault = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("assault", case=False)]
numofassault = len(assault.index)

manslaught = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("manslaughter", case=False)]
numofmanslaught = len(manslaught.index)

warrant = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("warrant", case=False)]
numofwarrant = len(warrant.index)

medical = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("medical", case=False)]
numofmedical = len(medical.index)

person = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("person", case=False)]
numofperson = len(person.index)

harassment = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("harassment", case=False)]
numofharass = len(harassment.index)

numcrimepersonc = numofassault + numofmanslaught + numofwarrant + numofmedical + numofperson + numofharass

peoplepercentc = str(round(((numcrimepersonc/numofweathercrimes)*100), 2))

print("Crimes Against People on Cold Days:", numcrimepersonc)
print("Percent of Crimes:", peoplepercentc)
print("Assault Crimes:", numofassault)
print("Manslaughter Crimes:", numofmanslaught)
print("Warrants:", numofwarrant)
print("Medical:", numofmedical)
print("Person Crimes:", numofperson)
print("Harassment Crimes:", numofharass)
print()


Violations = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("violation", case=False)]
numofvio = len(Violations.index)

threat = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("threat", case=False)]
numofthreat = len(threat.index)

other = ldf.loc[ldf['OFFENSE_DESCRIPTION'].str.contains("other", case=False)]
numofother = len(other.index)

numcrimesocietyc = numofvio + numofthreat + numofother

societypercentc = str(round(((numcrimesocietyc/numofweathercrimes)*100), 2))

print("Crimes Against Society/Other on Cold Days:", numcrimesocietyc)
print("Percent of Crimes:", societypercentc)
print("Violations:", numofvio)
print("Threats:", numofthreat)
print("Other:", numofother)
print()


colddaycrimetot = numcrimepropc + numcrimepersonc + numcrimesocietyc
print("Total Cold Day Crimes:", colddaycrimetot)

print("Percent of Weather Crimes:", (str(round((colddaycrimetot/numofweathercrimes)*100,2))))


Crimes Against Property on Cold Days: 38306
Percent of Crimes: 13.8
Theft Crimes: 6861
Vadalism Crimes: 4095
Larceny Crimes: 9867
Burglary Crimes: 1964
Property Crimes: 14469
Fire Related Crimes: 1050

Crimes Against People on Cold Days: 31714
Percent of Crimes: 11.43
Assault Crimes: 6421
Manslaughter Crimes: 41
Warrants: 2635
Medical: 6029
Person Crimes: 15439
Harassment Crimes: 1149

Crimes Against Society/Other on Cold Days: 10174
Percent of Crimes: 3.67
Violations: 1833
Threats: 2503
Other: 5838

Total Cold Day Crimes: 80194
Percent of Weather Crimes: 28.89


In [34]:
totweatherprop = numcrimepropm + numcrimeproph + numcrimepropc
totweatherperson = numcrimepersonh + numcrimepersonm + numcrimepersonc
totweathersociety = numcrimesocietyh + numcrimesocietym + numcrimesocietyc

hotProb = hotdaycrimetot/numofweathercrimes
mildProb = milddaycrimetot/numofweathercrimes
coldProb = colddaycrimetot/numofweathercrimes

propProb = totweatherprop/numofweathercrimes
peopleProb = totweatherperson/numofweathercrimes
societyProb = totweathersociety/numofweathercrimes

bayhprop = ((numcrimeproph/hotdaycrimetot)*(hotProb))/(propProb)
bayhpeople = ((numcrimepersonh/hotdaycrimetot)*(hotProb))/(peopleProb)
bayhsociety = ((numcrimesocietyh/hotdaycrimetot)*(hotProb))/(societyProb)

baymprop = (())

#probhotprop = (()/())

print("Property Crimes: ", (str(round((totweatherprop/numofweathercrimes)*100,2))))
print("People Crimes: ", (str(round((totweatherperson/numofweathercrimes)*100,2))))
print("Society Crimes: ", (str(round((totweathersociety/numofweathercrimes)*100,2))))
print("Bayes' Theroem:")
print("Given a Hot Day, Probability of Property Crime Occuring: ", (str(round(bayhprop*100,2))))
print("Given a Hot Day, Probability of People Crime Occuring: ", (str(round(bayhpeople*100,2))))
print("Given a Hot Day, Probability of Society Crime Occuring: ", (str(round(bayhsociety*100,2))))

Property Crimes:  44.06
People Crimes:  34.63
Society Crimes:  11.11
Bayes' Theroem:
Given a Hot Day, Probability of Property Crime Occuring:  11.63
Given a Hot Day, Probability of People Crime Occuring:  10.9
Given a Hot Day, Probability of Society Crime Occuring:  11.05


-----------------------

Crime Occurrences per Month

In [None]:
ax = crimeDF.groupby("MONTH").size().plot(kind = "bar", title= "Total crime events by month")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=0)
plt.show()

In [None]:
plt.scatter(weatherDF["Month"], weatherDF["Avg Temp (F)"])
plt.show()

Crime Occurences per Year

In [None]:
ax = crimeDF.groupby("YEAR").size().plot(kind = "bar", title= "Total crime events by Year")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=0)
plt.show()

Yearly Temperature 

In [None]:
plt.scatter(mergedDF["Year"], mergedDF["Avg Temp (F)"])
plt.show()

Temperature of Weather Events 

In [None]:
plt.scatter(weatherDF["Events"], weatherDF["Avg Temp (F)"])
plt.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Crime Types and Classification (work in progress)

In [None]:
offenseType =(crimeDF["OFFENSE_CODE_GROUP"].value_counts())
print(offenseType)
crimeDF.shape
ax = offenseType.plot(title="Count distribution of offense type")
ax.set_xlabel("Crime type")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=90)
plt.show()

Crime Occurances by Type (working on classification of "Type")

In [None]:
res = crimeDF[~crimeDF['OFFENSE_DESCRIPTION'].isin(offenseType[offenseType < 1000].index)]

# print(res)
ax = (res["OFFENSE_DESCRIPTION"].value_counts()).plot(title="Count distribution of offense description")
ax.set_xlabel("Crime type")
ax.set_ylabel("Number of Occurrences")
plt.xticks(rotation=90)
plt.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

This section looks to find the best representing column for crime types 

------

Crimes Against Property (OFFENSE_DESCRIPTION COLUMN)

In [None]:
theft = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("theft", case=False)]
numoftheft = len(theft.index)
print("Theft Crimes:", numoftheft)

vandalism = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("vandalism", case=False)]
numofvandalism = len(vandalism.index)
print("Vadalism Crimes:", numofvandalism)

larceny = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("larceny", case=False)]
numoflarceny = len(larceny.index)
print("Larceny Crimes:", numoflarceny)

burglary = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("burglary", case=False)]
numofburglary = len(burglary.index)
print("Burglary Crimes:", numofburglary)

property = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("property", case=False)]
numofproperty = len(property.index)
print("Property Crimes:", numofproperty)

fire = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("fire", case=False)]
numoffire = len(fire.index)
print("Fire Related Crimes:", numoffire)

numcrimeprop = numofburglary + numoflarceny + numoftheft + numofproperty + numofvandalism + numoffire
print("Crimes Against Property:", numcrimeprop)

proppercent = (numcrimeprop/numofcrimes)
print("Percent of Crimes:", proppercent*100)

Crimes Against People (OFFENSE_DESCRIPTION COLUMN)

In [None]:
assault = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("assault", case=False)]
numofassault = len(assault.index)
print("Assault Crimes:", numofassault)

manslaught = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("manslaughter", case=False)]
numofmanslaught = len(manslaught.index)
print("Manslaughter Crimes:", numofmanslaught)

warrant = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("warrant", case=False)]
numofwarrant = len(warrant.index)
print("Warrants:", numofwarrant)

medical = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("medical", case=False)]
numofmedical = len(medical.index)
print("Medical:", numofmedical)

person = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("person", case=False)]
numofperson = len(person.index)
print("Person Crimes:", numofperson)

harassment = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("harassment", case=False)]
numofharass = len(harassment.index)
print("Harassment Crimes:", numofharass)

numcrimeperson = numofassault + numofmanslaught + numofwarrant + numofmedical + numofperson + numofharass
print("Crimes Against People:", numcrimeperson)

peoplepercent = (numcrimeperson/numofcrimes)
print("Percent of Crimes:", peoplepercent*100)

Crimes Against Society (OFFENSE_DESCRIPTION COLUMN)

In [None]:
Violations = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("violation", case=False)]
numofvio = len(Violations.index)
print("Violations:", numofvio)

threat = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("threat", case=False)]
numofthreat = len(threat.index)
print("Threats:", numofthreat)

other = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("other", case=False)]
numofother = len(other.index)
print("Other:", numofother)

numcrimesociety = numofvio + numofthreat + numofother
print("Crimes Against Society/Other:", numcrimesociety)

societypercent = (numcrimesociety/numofcrimes)
print("Percent of Crimes:", societypercent*100)

Missing Crimes (OFFENSE_DESCRIPTION COLUMN)

In [None]:
numofmissing = numofcrimes - (numcrimeprop + numcrimeperson + numcrimesociety)
print("Number Missing:", numofmissing)

missingpercent = (numofmissing/numofcrimes)
print("Percent of Crimes:", missingpercent*100)

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Crimes Against Property (OFFENSE_CODE_GROUP COLUMN)

In [None]:
theft1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("theft", case=False)]
numoftheft1 = len(theft1.index)
print("Theft Crimes:", numoftheft1)

vandalism1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("vandalism", case=False)]
numofvandalism1 = len(vandalism1.index)
print("Vadalism Crimes:", numofvandalism1)

larceny1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("larceny", case=False)]
numoflarceny1 = len(larceny1.index)
print("Larceny Crimes:", numoflarceny1)

burglary1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("burglary", case=False)]
numofburglary1 = len(burglary1.index)
print("Burglary Crimes:", numofburglary1)

fire1 = crimeDF.loc[crimeDF['OFFENSE_DESCRIPTION'].str.contains("fire", case=False)]
numoffire1 = len(fire1.index)
print("Fire Related Crimes:", numoffire1)

property1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("property", case=False)]
numofproperty1 = len(property1.index)
print("Property Crimes:", numofproperty1)

numcrimeprop1 = numofburglary1 + numoflarceny1 + numoftheft1 + numofproperty1 + numofvandalism1 + numoffire1
print("Crimes Against Property:", numcrimeprop1)

proppercent1 = (numcrimeprop1/numofcrimes)
print("Percent of Crimes:", proppercent1*100)

Crimes Against People (OFFENSE_CODE_GROUP COLUMN)

In [None]:
assault1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("assault", case=False)]
numofassault1 = len(assault1.index)
print("Assault Crimes:", numofassault1)

manslaught1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("manslaughter", case=False)]
numofmanslaught1 = len(manslaught1.index)
print("Manslaughter Crimes:", numofmanslaught1)

warrant1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("warrant", case=False)]
numofwarrant1 = len(warrant1.index)
print("Warrants:", numofwarrant1)

medical1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("medical", case=False)]
numofmedical1 = len(medical1.index)
print("Medical:", numofmedical1)

person1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("person", case=False)]
numofperson1 = len(person1.index)
print("Person Crimes:", numofperson1)

harassment1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("harassment", case=False)]
numofharass1 = len(harassment1.index)
print("Harassment Crimes:", numofharass1)

numcrimeperson1 = numofassault1 + numofmanslaught1 + numofwarrant1 + numofmedical1 + numofperson1 + numofharass1
print("Crimes Against People:", numcrimeperson1)

peoplepercent1 = (numcrimeperson1/numofcrimes)
print("Percent of Crimes:", peoplepercent1*100)

Crimes Against Society (OFFENSE_CODE_GROUP COLUMN)

In [None]:
Violations1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("violation", case=False)]
numofvio1 = len(Violations1.index)
print("Violations:", numofvio1)

threat1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("threat", case=False)]
numofthreat1 = len(threat1.index)
print("Threats:", numofthreat1)

other1 = crimeDF.loc[crimeDF['OFFENSE_CODE_GROUP'].str.contains("other", case=False)]
numofother1 = len(other1.index)
print("Other:", numofother1)

numcrimesociety1 = numofvio1 + numofthreat1 + numofother1
print("Crimes Against Society/Other:", numcrimesociety1)

societypercent1 = (numcrimesociety1/numofcrimes)
print("Percent of Crimes:", societypercent1*100)

In [None]:
numofmissing1 = numofcrimes - (numcrimeprop1 + numcrimeperson1 + numcrimesociety1)
print("Number Missing:", numofmissing1)

missingpercent1 = (numofmissing1/numofcrimes)
print("Percent of Crimes:", missingpercent1*100)

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------