In [None]:
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

In [None]:
df = pd.read_csv('../input/crimes-in-boston/crime.csv', encoding='latin-1')
df.head()

I am going to focus on the year with complete data (2018)

In [None]:
#lower
df.columns = map(str.lower, df.columns)

#only 2018
df = df[df['year'] == 2018]

#remove columns 
data = df.drop(['incident_number','offense_code','ucr_part','location'], axis=1)

#fill Nan values with N
data.fillna('N', inplace = True)

data.head(2)

**Top 10 most common crimes**

In [None]:
ax = sns.catplot(y='offense_code_group',
           kind='count',
            height=8, 
            aspect=1.5,
            order=data.offense_code_group.value_counts().nlargest(10).index,
           data=data)

# **When do serious crimes occur?**

In [None]:
sns.catplot(x='hour',
           kind='count',
            height=8, 
            aspect=1.5,
            color='red',
           data=data)
plt.xlabel('Hour',fontsize=25)

In [None]:
sns.catplot(x='day_of_week',
            kind='count',
            height=8, 
            aspect=1.5,
            data=data)
plt.xlabel('Day',fontsize=25)

# Geospacial Analysis

In [None]:
data.lat = pd.to_numeric(df.lat)
data.long = pd.to_numeric(df.long)

In [None]:
m = folium.Map(location=[42.361145, -71.057083], zoom_start=13)
 
#Add points to the map
cluster = MarkerCluster()
for idx, row in data.iterrows():
    if not math.isnan(row['long']) and not math.isnan(row['lat']):
        cluster.add_child(Marker([row['lat'], row['long']]))
m.add_child(cluster)

# **HeatMap**

In [None]:
#There can be no NaN values to plot the Heatmap
data.dropna(subset=['lat'], inplace=True)
data.dropna(subset=['long'], inplace=True)


m_1 = folium.Map(location=[42.361145, -71.057083], zoom_start=13)

HeatMap(data=data[['lat','long']], radius=10).add_to(m_1)

m_1

# WordCloud

In [None]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
summary = data.offense_description
all_summary = " ".join(s for s in summary)
stopwords = set(STOPWORDS)
stopwords.update(['M','V','MA'])

wordcloud = WordCloud(stopwords=stopwords,
                     background_color='black', width=1600,
                     height=800).generate(all_summary)

In [None]:
fig, ax = plt.subplots(figsize=(16,8))            
ax.imshow(wordcloud, interpolation='bilinear')
ax.set_axis_off()
plt.imshow(wordcloud) 