In [1]:
# does everybody really die in the summer?

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# read in the data

shots = pd.read_csv('data/boston_shots_fired.csv')
shots.head()

Unnamed: 0,incident_num,incident_date,district,ballistics_evidence
0,I152000093-00,2015-01-01 04:17:00+00,C11,f
1,I152000042-00,2015-01-01 00:05:00+00,A15,f
2,I152000514-00,2015-01-02 15:51:00+00,B3,t
3,I152000965-00,2015-01-04 15:30:00+00,B2,t
4,I152000872-00,2015-01-04 00:13:00+00,B2,f


In [3]:
# cleaning up shots data

# we don't particuarly need the incident number as it stands
shots = shots.drop(columns=['incident_num'])

# let's convert the date column to a datetime object
shots['incident_date'] = pd.to_datetime(shots['incident_date'])

# let's simplify the incident_date column to show just year, month, and day
shots['incident_date'] = shots['incident_date'].dt.date
shots['incident_date'] = pd.to_datetime(shots['incident_date'])

# let's convert the ballistic entry to a boolean
shots['ballistics_evidence'] = shots['ballistics_evidence'] == 't'

In [4]:
# read in weather data

weather1 = pd.read_csv('data/bos_weather_2015_2017.csv')
weather2 = pd.read_csv('data/bos_weather_2018_2020.csv')
weather3 = pd.read_csv('data/bos_weather_2021_2023.csv')

# combine weather data

weather = pd.concat([weather1, weather2, weather3])

# isolate the columns we want
weather = weather[['DATE', 'AWND', 'PRCP', 'SNOW', 'TAVG']]
weather['DATE'] = pd.to_datetime(weather['DATE'])
weather.head()

Unnamed: 0,DATE,AWND,PRCP,SNOW,TAVG
0,2015-01-01,14.32,0.0,0.0,26
1,2015-01-02,12.75,0.0,0.0,34
2,2015-01-03,10.29,0.62,1.4,28
3,2015-01-04,8.28,0.57,0.0,38
4,2015-01-05,22.15,0.0,0.0,40


In [5]:
shots['ballistics_evidence'].value_counts()

ballistics_evidence
False    5317
True     2751
Name: count, dtype: int64

In [6]:
# let's create a column that shows the number of shots fired per day

shots['incident_count'] = 1
shots = shots.groupby('incident_date').sum().reset_index()
shots.head()

Unnamed: 0,incident_date,district,ballistics_evidence,incident_count
0,2015-01-01,C11A15,0,2
1,2015-01-02,B3E13,1,2
2,2015-01-03,B2,0,1
3,2015-01-04,B2B2,1,2
4,2015-01-05,C6,0,1


In [7]:
weather.head()

Unnamed: 0,DATE,AWND,PRCP,SNOW,TAVG
0,2015-01-01,14.32,0.0,0.0,26
1,2015-01-02,12.75,0.0,0.0,34
2,2015-01-03,10.29,0.62,1.4,28
3,2015-01-04,8.28,0.57,0.0,38
4,2015-01-05,22.15,0.0,0.0,40


In [10]:
# let's add the weather data to the shots data
shots_daily = pd.merge(shots, weather, left_on='incident_date', right_on='DATE')
shots_daily = shots_daily.drop(columns=['DATE'])
shots_daily['ballistics_evidence'] = shots_daily['ballistics_evidence'] == 1
shots_daily.head()

Unnamed: 0,incident_date,district,ballistics_evidence,incident_count,AWND,PRCP,SNOW,TAVG
0,2015-01-01,C11A15,False,2,14.32,0.0,0.0,26
1,2015-01-02,B3E13,True,2,12.75,0.0,0.0,34
2,2015-01-03,B2,False,1,10.29,0.62,1.4,28
3,2015-01-04,B2B2,True,2,8.28,0.57,0.0,38
4,2015-01-05,C6,False,1,22.15,0.0,0.0,40


In [9]:
weather.dtypes

DATE    datetime64[ns]
AWND           float64
PRCP           float64
SNOW           float64
TAVG             int64
dtype: object