In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('/kaggle/input/homicide-reports/database.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.columns = df.columns.str.replace('\s+', '_') 

In [None]:
print('Year with most cases of homicide is: ', df['Year'].value_counts().idxmax())
print('Month with most cases of homicide is: ', df['Month'].value_counts().idxmax())
print('City with most cases of homicide is: ', df['City'].value_counts().idxmax())
print('State with most cases of homicide is: ', df['State'].value_counts().idxmax())
print('Weapon used the most in homicide cases is: ', df['Weapon'].value_counts().idxmax())
print('Most homicide cases are : ', df['Crime_Solved'].value_counts().idxmax())

In [None]:
solved = df['Crime_Solved'].value_counts()

fig = px.bar(y=solved.index,
            x=solved,
            text=(solved/len(df['Crime_Solved'])*100),
            title='Solved VS UnSolved Homicide Cases Count',
            orientation='h',
             height=400,
            width=800)

fig.update_traces(textposition='outside',texttemplate='%{text:.3s}%', marker=dict(color=solved, colorscale='twilight'))
fig.show()

In [None]:
agency_type = df['Agency_Type'].value_counts()

fig = px.pie(names=agency_type.index,
            values =agency_type,
            height=500,
            title='Agency Type')

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Pastel2))
fig.show()

In [None]:
year = df['Year'].value_counts()

fig = px.bar(x=year.index,
            y=year,
            text=year,
            title='Count Of Homicide Cases Over The Years')

fig.update_traces(textposition='outside', marker=dict(color=year, colorscale='earth'))
fig.show()

In [None]:
month = df['Month'].value_counts()

fig = px.bar(x=month.index,
            y=month,
            text=month,
            title='Count Of Homicide Cases Over The Months')

fig.update_traces(textposition='outside', marker=dict(color=month, colorscale='oranges'))
fig.show()

In [None]:
df['City'].nunique()

In [None]:
from wordcloud import WordCloud
from scipy import signal

cities = df['City']
plt.subplots(figsize=(10,10))
wordcloud = WordCloud(background_color = 'black',
                     width = 512,
                     height = 384).generate(' '.join(cities))
plt.axis('off')
plt.imshow(wordcloud)
plt.show()


In [None]:
cities = df['City'].value_counts().head(10)

fig = px.bar(x=cities.index,
            y=cities,
            text=(cities/len(df['City'])*100),
            title='Top 10 Cities With Most Homicide Cases',
            width=900)

fig.update_traces(textposition='outside',texttemplate='%{text:.2s}%', marker=dict(color=cities, colorscale='tealrose'))
fig.show()

In [None]:
df['State'].nunique()

In [None]:
states = df['State']
plt.subplots(figsize=(10,10))
wordcloud = WordCloud(background_color = 'white',
                     width = 512,
                     height = 384).generate(' '.join(states))
plt.axis('off')
plt.imshow(wordcloud)
plt.show()

In [None]:
states = df['State'].value_counts().head(10)

fig = px.bar(x=states.index,
            y=states,
            text=(states/len(df['State'])*100),
            title='Top 10 States With Most Homicide Cases',
            width=900)

fig.update_traces(textposition='outside',texttemplate='%{text:.2s}%', marker=dict(color=states, colorscale='tealrose'))
fig.show()

In [None]:
weapon = df['Weapon'].value_counts()

fig = px.bar(x=weapon.index,
            y=weapon,
            text=(weapon/len(df['Weapon'])*100),
            title='Weapon Used In Homicides')

fig.update_traces(textposition='outside',texttemplate='%{text:.3s}%', marker=dict(color=weapon, colorscale='tealrose'))
fig.show()

In [None]:
victim_count = df['Victim_Count'].value_counts()

fig = px.pie(names=victim_count.index,
            values=victim_count,
            title='Homicide Victim Count',)

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Pastel))
fig.show()

In [None]:
weapon_count = df.groupby('Weapon')['Victim_Count'].sum().reset_index().sort_values(by='Victim_Count', ascending=False)

fig = px.bar(x=weapon_count['Weapon'],
            y=weapon_count['Victim_Count'],
            text=weapon_count['Victim_Count'],
            title='Victim Count Of Weapons Used In Homicide')
fig.update_traces(textposition='outside', marker=dict(color=weapon_count['Victim_Count'], colorscale='rdylbu'))
fig.show()

In [None]:
crime_type = df['Crime_Type'].value_counts()

fig = px.pie(names=crime_type.index,
            values=crime_type,
            title='Crime Type')

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.T10))
fig.show()

In [None]:
plt.figure(figsize=(18,5))
sns.countplot(x=df['Relationship'])
plt.xlabel('Relationship', fontsize=12)
plt.xticks(rotation=60)
plt.title('Relationship Of Perpetrator With Victims', fontsize=15)
plt.show()

In [None]:
perp_eth = df['Perpetrator_Ethnicity'].value_counts()

fig = px.pie(names=perp_eth.index,
            values=perp_eth,
            title='Perpetrator Ethnicity')

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Set1))
fig.show()

In [None]:
vic_eth = df['Victim_Ethnicity'].value_counts()

fig = px.pie(names=vic_eth.index,
            values=vic_eth,
            title='Victim Ethnicity')

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Set2))
fig.show()

In [None]:
prep_sex = df['Perpetrator_Sex'].value_counts()
vic_sex = df['Victim_Sex'].value_counts()

fig = px.pie(names=prep_sex.index,
            values=prep_sex,
            title="Perpetrator Sex")

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Set2))
fig.show()

fig = px.pie(names=vic_sex.index,
            values=vic_sex,
            title="Victim's Sex")

fig.update_traces(textinfo='percent',
            insidetextorientation='radial',
            legendgroup='show', 
            marker = dict(colors=px.colors.qualitative.Set2))

fig.show()

In [None]:
weapons = df.groupby('Perpetrator_Sex')['Weapon'].value_counts().reset_index(name='count')

fig = px.bar(x=weapons['Perpetrator_Sex'],
                  y=weapons['count'],
                  color=weapons['Weapon'],
                  barmode='stack',
                  text=weapons['count'],
                  height=600,
                  width=900)
fig.update_traces(textposition='outside')
fig.show()

In [None]:
record = df['Record_Source'].value_counts()

fig = px.bar(y=record.index,
            x=record,
            text=(record/len(df['Record_Source'])*100),
            title='Crime Record Sources',
            orientation='h',
            height=400,
            width=700)

fig.update_traces(textposition='outside',texttemplate='%{text:.3s}%', 
                 marker=dict(color=record, colorscale='fall'))
fig.show()

**Unsolved Homicide Cases**

In [None]:
unsolved_crime = df[df['Crime_Solved']=='No']

unsolved_year = unsolved_crime['Year'].value_counts()

fig = px.bar(x=unsolved_year.index,
            y=unsolved_year,
            text=(unsolved_year/len(unsolved_crime['Year'])*100),
            title='Unsolved Crimes Over The Years')

fig.update_traces(textposition='outside',texttemplate='%{text:.3s}%', 
                 marker=dict(color=unsolved_year, colorscale='viridis'))
fig.show()

In [None]:
unsolved_crime['Record_Source'].value_counts().plot.bar()