# Pakistan Crime Data

    1. How many murders happened in different provinces
    2. In which Province most crimes have happened
    3. In which Province least crimes have happened
    4. Subplots of Crimes by Province


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Plotting Libraries

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

df = pd.read_csv('../input/pakistan-crimes-by-type/248a92b9-5817-43f2-b0e7-6d45a45a8ee9.csv')

## Initial Data View

In [None]:
print(df.shape)
df.head()

## Clean-up and Data Massage

In [None]:

# The data is still preserved in the original df and can be pulled if needed.
df2=df.drop('_id', axis=1)
df2

In [None]:
df_long=pd.melt(df2, id_vars=['Year', 'Offence'], var_name='Province' ) # Creates the long formatting
df_long=df_long.drop(df_long.loc[df_long['Province']== 'Pakistan'].index,axis=0) # Dropping Pakistan as a whole

In [None]:
totals=df_long.loc[df_long['Offence']== 'TOTAL RECORDED CRIME'] # grabbing totals rows just in case
df_long=df_long.drop(df_long.loc[df_long['Offence']== 'TOTAL RECORDED CRIME'].index,axis=0) # Dropping totals rows


# Crime Analysis

In [None]:
crime_by_prov=df_long.groupby('Province')['value'].sum()
max=df_long.groupby('Province')['value'].max()
px.bar( crime_by_prov, title='<b>Total Crimes by Province 2012-2017</b><br>Most Crimes:Punjab, 2.37M Crimes<br>Least Crimes:G.B, 9.3k Crimes ', labels={'_value':'Crime Total'}, 
            color= max, color_continuous_scale='Thermal')

## Subplots of Crimes by Province

In [None]:
df_long=df_long.drop(df_long.loc[((df_long['Offence'] == 'Others'))].index, axis=0) # Dropping "Others" Crime
# Note: Dropping 'Others' to enrigh graph visualization as it does not tell us anything other than a crime was commited.

In [None]:
# Splitting each Province into its own variable
#---
punjab_crime= df_long.loc[df_long['Province']=='Punjab'].groupby('Offence')['value'].sum()
#---
sindh_crime=df_long.loc[df_long['Province']=='Sindh'].groupby('Offence')['value'].sum()
#---
kp_crime=df_long.loc[df_long['Province']=='KP'].groupby('Offence')['value'].sum()
#---
balochistan_crime=df_long.loc[df_long['Province']=='Balochistan'].groupby('Offence')['value'].sum()
#---
islamabad_crime=df_long.loc[df_long['Province']=='Islamabad'].groupby('Offence')['value'].sum()
#---
railways_crime=df_long.loc[df_long['Province']=='Railways'].groupby('Offence')['value'].sum()
#---
gb_crime=df_long.loc[df_long['Province']=='G.B'].groupby('Offence')['value'].sum()
#---
ajk_crime=df_long.loc[df_long['Province']=='AJK'].groupby('Offence')['value'].sum()
#---

In [None]:
fig = make_subplots(
        rows=8,
        cols=2,
        subplot_titles=("Punjab", 'Sindh','','', 'KP', 'Balochistan','','', 'Islamabad', 'Railways','','', 'GB', 'AJK')

)
# Punjab
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=1, col=1)
# Sindh
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=1, col=2)
# KP
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=3, col=1)
# Balochistan
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=3, col=2)
# Islamabad
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=5, col=1)
# Railways
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=5, col=2)
# GB
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=7, col=1)
#AJK
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=7, col=2)

fig.update_layout(
    title_text='Crimes by Province',
    autosize=True,
    width=1000,
    height=1000,
    paper_bgcolor='#006600',
    font_color= 'white',
    showlegend=False
    )
