Hypothesis: We believe there's a correlation between temperature and the incidence of crime in Silver Spring, Maryland


In [None]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import datetime

In [None]:
# Save file path to variable
crimeMD_path = "Crime_MD.csv"
# silverspring_path = "silver_spring_climate.csv"

# Read with Pandas
crime_df = pd.read_csv(crimeMD_path, low_memory=False)
# silverspring_temp_df = pd.read_csv(silverspring_path)


In [None]:
# Save file path to variable
climateMD_path = "silver_spring_climate.csv"

# Read with Pandas
climate_df = pd.read_csv(climateMD_path, low_memory=False)

In [None]:
# Formatting the month for merge later
climate_df.columns = ['MonthName','Temp']
df = pd.to_datetime(climate_df['MonthName'], format='%B').dt.month
climate_df['Month'] = df
climate_df

In [None]:
# Pulling out only the columns of interest
main_crime_df = crime_df[['Crime Name1', 'Crime Name2', 'Crime Name3', 'City', 'Start_Date_Time']]

In [None]:
# Remove crimeless data and changing columns name 
main_crime_df = main_crime_df.loc[main_crime_df['Crime Name1'] != 'Not a Crime']
clean_crime_df = main_crime_df.rename(columns={'Crime Name1': 'Crime Main Category',
                                             'Crime Name2': 'Crime Sub Category',
                                             'Start_Date_Time': 'Date & Time of Crime'})

clean_crime_df['Date & Time of Crime']= pd.to_datetime(clean_crime_df['Date & Time of Crime'])
clean_crime_df['City'].value_counts()
silverspring_crime_df = clean_crime_df.loc[clean_crime_df['City'] == 'SILVER SPRING']
silverspring_crime_df

In [None]:
# Figure out the timeline of interest
data_first_date = silverspring_crime_df['Date & Time of Crime'].min()
data_last_date = silverspring_crime_df['Date & Time of Crime'].max()

print(data_first_date)
print(data_last_date)

# Throw away 2016 and 2020 
silverspring_clean_df = silverspring_crime_df.loc[(silverspring_crime_df['Date & Time of Crime'] > '2016-12-31 23:59:59') & (silverspring_crime_df['Date & Time of Crime'] < '2020-01-01 00:00:00')]

In [None]:
silverspring_clean_df

In [None]:
# Extract year from string format date
silverspring_clean_df['Year'] = pd.DatetimeIndex(silverspring_clean_df['Date & Time of Crime']).year
silverspring_clean_df['Month'] = pd.DatetimeIndex(silverspring_clean_df['Date & Time of Crime']).month

silverspring_clean_df.head(3)

In [None]:
combined_df = pd.merge(silverspring_clean_df, climate_df, on='Month')
combined_df


In [None]:
# Dan's code starts here - Property/Other

In [None]:
# Separate Crime Against Property from other crimes
sspring_property = silverspring_clean_df.loc[(silverspring_clean_df['Crime Main Category'] == 'Crime Against Property')]
sspring_property

In [None]:
# Separate Property Crimes by Year
propcrime_2017 = sspring_property.loc[(sspring_property['Year'] == 2017)]
propcrime_2018 = sspring_property.loc[(sspring_property['Year'] == 2018)]
propcrime_2019 = sspring_property.loc[(sspring_property['Year'] == 2019)]

In [None]:
pc2017_x_axis = sorted(propcrime_2017['Month'])
pc2017_y_axis = propcrime_2017['Month'].value_counts()
pd.DataFrame(pc2017_y_axis) 

In [None]:
# Dan's code ends here

In [None]:
# Cynthia's code starts here - Person

Sub-Hypothesis: We believe that as temperature decreases to a certain point during the winter, the crime against person will also decreases. This is because people are less likely to come out during the winter time when it gets too cold, making crime occur less.


In [None]:
# Create Crime Against Person df
person_crime_df = silverspring_clean_df.loc[(silverspring_clean_df['Crime Main Category'] == 'Crime Against Person'), :]
person_crime_df

In [None]:
# Using Groupby
personmonthgroup = person_crime_df.groupby('Month')
personcrime_count_month = personmonthgroup['Month'].count()
personcrime_count_month

personcrime_df = pd.DataFrame({'Number of Crime Incidence':personcrime_count_month})
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']


personcrime_index_df = personcrime_df.reset_index(drop=True)
personcrime_index_df['Month'] = months
personcrime_index_df = personcrime_index_df[['Month','Number of Crime Incidence']]
personcrime_index_df

In [None]:
# Visualize the incidence of Crime Against Person for each month from 2017-2020
x_all = personcrime_index_df['Month']
y_all = personcrime_index_df['Number of Crime Incidence']

plt.scatter(x_all, y_all,)
plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Person for each Month from 2017-2020')

plt.show()

In [None]:
# Create different df for different year (2017-2019)
person_crime_2017df = person_crime_df.loc[person_crime_df['Year'] == 2017]
person_crime_2018df = person_crime_df.loc[person_crime_df['Year'] == 2018]
person_crime_2019df = person_crime_df.loc[person_crime_df['Year'] == 2019]

In [None]:
# Number of crime in each month for 2017
monthgroup = person_crime_2017df.groupby('Month')
personcrime_count_month = personmonthgroup['Month'].count()
personcrime_count_month

personcrime_df = pd.DataFrame({'Number of Crime Incidence':personcrime_count_month})

personcrime_index_df = personcrime_df.reset_index(drop=True)
personcrime_index_df['Month'] = months
personcrime_index_df = personcrime_index_df[['Month','Number of Crime Incidence']]
personcrime_index_df

In [None]:
# Number of crime in each month for 2017
crimect_2017 = person_crime_2017df['Month'].value_counts()
crimect2017plot = crimect_2017.plot(kind='bar', title='Numbers of Crimes Against Person for each Month in 2017')
plt.xlabel('Month')
plt.ylabel('Number of Crimes')

In [None]:
# Cynthia's code ends here

In [None]:
# Rose's code starts here - Society 

In [None]:
# Create Crime Against Society df
society_crime_df = silverspring_clean_df.loc[(silverspring_clean_df['Crime Main Category'] == 'Crime Against Society'), :]
society_crime_df

In [None]:
# Using Groupby
societymonthgroup = society_crime_df.groupby('Month')
societycrime_count_month = societymonthgroup['Month'].count()
societycrime_count_month

# Turn count of crime into a dataframe
societycrime_df = pd.DataFrame({'Number of Crime Incidence':societycrime_count_month})

# Drop the index to get month column, add month name 
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
societycrime_index_df = societycrime_df.reset_index(drop=True)
societycrime_index_df['Month'] = months
societycrime_index_df = societycrime_index_df[['Month','Number of Crime Incidence']]
societycrime_index_df

In [None]:
x_all = societycrime_index_df['Month']
y_all = societycrime_index_df['Number of Crime Incidence']

plt.plot(x_all, y_all, marker='o')
plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Society for each Month from 2017-2020')

plt.show()

In [None]:
society_crime_df

In [None]:
# Create different df for different year (2017-2019)

society_crime_2017df = society_crime_df.loc[society_crime_df['Year'] == 2017]
society_crime_2018df = society_crime_df.loc[society_crime_df['Year'] == 2018]
society_crime_2019df = society_crime_df.loc[society_crime_df['Year'] == 2019]

In [None]:
# Number of crime in each month for 2017

society_month2017 = society_crime_2017df.groupby('Month')
society_crime_ct_2017 = society_month2017['Month'].count()

# Turn count of crime into a dataframe
society_crime_ct_2017df = pd.DataFrame({'Number of Crime Incidence':society_crime_ct_2017})

# Drop the index to get month column, add month name 
society_crime_ct_2017df = society_crime_ct_2017df.reset_index(drop=True)
society_crime_ct_2017df['Month'] = months
society_crime_ct_2017df = society_crime_ct_2017df[['Month','Number of Crime Incidence']]
society_crime_ct_2017df

In [None]:
# Visualize the incidence of Crime Against Society for each month from 2017
x_2017 = society_crime_ct_2017df['Month']
y_2017 = society_crime_ct_2017df['Number of Crime Incidence']

handle17, = plt.plot(x_2017, y_2017, marker='x')
plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Society for each Month in 2017')

plt.show()

In [None]:
# Number of crime in each month for 2018

society_month2018 = society_crime_2018df.groupby('Month')
society_crime_ct_2018 = society_month2018['Month'].count()

# Turn count of crime into a dataframe
society_crime_ct_2018df = pd.DataFrame({'Number of Crime Incidence':society_crime_ct_2018})

# Drop the index to get month column, add month name 
society_crime_ct_2018df = society_crime_ct_2018df.reset_index(drop=True)
society_crime_ct_2018df['Month'] = months
society_crime_ct_2018df = society_crime_ct_2018df[['Month','Number of Crime Incidence']]
society_crime_ct_2018df

In [None]:
# Visualize the incidence of Crime Against Society for each month from 2018
x_2018 = society_crime_ct_2018df['Month']
y_2018 = society_crime_ct_2018df['Number of Crime Incidence']

handle18, = plt.plot(x_2018, y_2018, marker='x')
plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Society for each Month in 2018')

plt.show()


In [None]:
# Number of crime in each month for 2019

society_month2019 = society_crime_2019df.groupby('Month')
society_crime_ct_2019 = society_month2019['Month'].count()

# Turn count of crime into a dataframe
society_crime_ct_2019df = pd.DataFrame({'Number of Crime Incidence':society_crime_ct_2019})

# Drop the index to get month column, add month name 
society_crime_ct_2019df = society_crime_ct_2019df.reset_index(drop=True)
society_crime_ct_2019df['Month'] = months
society_crime_ct_2019df = society_crime_ct_2019df[['Month','Number of Crime Incidence']]
society_crime_ct_2019df

In [None]:
# Visualize the incidence of Crime Against Society for each month from 2019
x_2019 = society_crime_ct_2019df['Month']
y_2019 = society_crime_ct_2019df['Number of Crime Incidence']

handle19, = plt.plot(x_2019, y_2019, marker='x')
plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Society for each Month in 2019')

plt.show()

In [None]:
#Generate a facet plot of all 3 figures 

# Graph from 2017
handle17, = plt.plot(x_2017, y_2017, marker='x', label='2017')
# Graph from 2018
handle18, = plt.plot(x_2018, y_2018, marker='x', label='2018')
# Graph from 2019
handle19, = plt.plot(x_2019, y_2019, marker='x', label='2019')

plt.xticks(rotation=45)

plt.xlabel('Month')
plt.ylabel('Number of Crime Incidence')
plt.title('Numbers of Crimes Against Society for each Month in during 2017-2019', fontsize=12, fontweight='bold')

plt.legend(loc="best")
plt.show()

In [None]:
# New Dataframe for number of crimes in each month, indexed by year & month --- don't know how to make useful plot out of this
societymonthyeargroup = society_crime_df.groupby(['Year','Month'])
societycrime_count_mmyy = societymonthyeargroup['Month'].count()

societycrime_count_mmyy_df = pd.DataFrame({'Number of Crime Incidence':societycrime_count_mmyy})
monthsforeachyear = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

societycrime_count_mmyy_df['Month'] = monthsforeachyear
societycrime_count_mmyy_df = societycrime_count_mmyy_df[['Month','Number of Crime Incidence']]
societycrime_count_mmyy_df

In [None]:
# Rose's code ends here

In [None]:
# Paul's code starts here - API!!!

In [None]:
# Paul's code ends here