In [None]:
import warnings
warnings.filterwarnings('ignore')

import requests
import pandas as pd
import numpy as np
import plotly.graph_objects as go

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Data Preparation

In [None]:
path = '/kaggle/input/crime-against-women-20012014-india/crimes_against_women_2001-2014.csv'
df = pd.read_csv(path, index_col=0)
df.columns = ['state_unit', 'district', 'year', 'rape', 'kidnap_abduction', 'dowry_deaths', 
              'women_assault', 'women_insult', 'husband_relative_cruelty', 'girl_importation']
df.index = list(range(df.shape[0]))

for col in df.columns:
    df[col] = df[col].apply(lambda x : x.title() if isinstance(x, str) else x)

# replacements
replacements = {
    'A & N Islands' : 'Andaman and Nicobar',
    'A&N Islands' : 'Andaman and Nicobar',
    'Daman & Diu' : 'Daman and Diu',
    'Delhi Ut' : 'Delhi',
    'D & N Haveli' : 'Dadra and Nagar Haveli',
    'D&N Haveli' : 'Dadra and Nagar Haveli',
    'Odisha' : 'Orissa',
    'Jammu & Kashmir' : 'Jammu and Kashmir'
}

for (o, r) in replacements.items():
    df['state_unit'].replace(to_replace=o, value=r, inplace=True)

In [None]:
df.shape

In [None]:
df.columns

### Split data by year wise

In [None]:
def split_data(dframe):
    min_year = dframe['year'].min()
    max_year = dframe['year'].max()
    
    data_year_wise = {
        year : dframe[dframe['year'] == year] for year in range(min_year, max_year + 1)
    }
    
    return data_year_wise

In [None]:
data_splits = split_data(dframe=df)

### `print()` all shapes

In [None]:
for (y, d) in data_splits.items():
    print(y, '\t→', d.shape)

### Total crimes across all states of India - year wise

In [None]:
def categorize_crimes(data_source, state_unit=None):
    crime_list = list(data_source[2001].columns[3:])

    all_crimes_year_wise = {}
    for (y, d) in data_source.items():
        y_df = d[d['district'].str.contains('Total')]
        if state_unit:
            y_df = y_df[y_df['state_unit'] == state_unit.title()]
        crime_dict = {col : y_df[col].sum() for col in crime_list}
        # all_crimes_year_wise[y] = dict(sorted(crime_dict.items(), key=lambda x:x[1], reverse=True))
        all_crimes_year_wise[y] = crime_dict
    
    return all_crimes_year_wise

### Yearly wise plot that shows increase in the crimes

In [None]:
def plot_overall_crimes_by_year(data_source, state_unit=None, kind='bar'):
    crimes_data = categorize_crimes(data_source=data_source, state_unit=state_unit)
    year_sum_crimes = {y : sum(list(cr.values())) for (y, cr) in crimes_data.items()}
    
    y_keys = list(year_sum_crimes.keys())
    y_vals = list(year_sum_crimes.values())
    
    t = 'Total Crimes - {}'
    title = t.format(state_unit.title()) if state_unit else t.format('India')
    
    if kind == 'bar':
        trace = go.Bar(x=y_keys, y=y_vals)
    else:
        trace = go.Pie(labels=y_keys, values=y_vals)
    
    layout = go.Layout(
        height=400,
        width=600,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_overall_crimes_by_year(data_source=data_splits)

In [None]:
plot_overall_crimes_by_year(data_source=data_splits, kind='pie')

#### State wise

In [None]:
plot_overall_crimes_by_year(data_source=data_splits, state_unit='Andhra Pradesh')

In [None]:
plot_overall_crimes_by_year(data_source=data_splits, state_unit='Andhra Pradesh', kind='pie')

### Plotting the crimes that happened on a specific year

In [None]:
def plot_crimes_by_year(data_source, year, state_unit=None, kind='bar'):
    crimes_data = categorize_crimes(data_source=data_source, state_unit=state_unit)
    year_all_crimes = crimes_data[year]
    
    y_keys = list(year_all_crimes.keys())
    y_vals = list(year_all_crimes.values())
    
    t = '{} - Total Crimes - {}'
    title = t.format(year, state_unit.title()) if state_unit else t.format(year, 'India')
    
    if kind == 'bar':
        trace = go.Bar(x=y_keys,y=y_vals)
    else:
        trace = go.Pie(labels=y_keys, values=y_vals)
    
    layout = go.Layout(
        height=400,
        width=600,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_crimes_by_year(data_source=data_splits, year=2001)

In [None]:
plot_crimes_by_year(data_source=data_splits, year=2001, kind='pie')

#### State wise

In [None]:
plot_crimes_by_year(data_source=data_splits, year=2001, state_unit='Andhra Pradesh')

In [None]:
plot_crimes_by_year(data_source=data_splits, year=2001, state_unit='Andhra Pradesh', kind='pie')

### Overall increase in crime activity from lowest year to highest year

In [None]:
def plot_overall_difference(data_source, ideal_year, cwith_year, state_unit=None):
    crime_data = categorize_crimes(data_source=data_source, state_unit=state_unit)
    
    ideal_year_crimes = crime_data[ideal_year]
    cwith_year_crimes = crime_data[cwith_year]
    
    t = '{} vs {} - diff - {}'
    title = t.format(ideal_year, cwith_year, state_unit.title()) if state_unit else t.format(ideal_year, cwith_year, 'India')
    
    trace1 = go.Bar(
        x=list(ideal_year_crimes.keys()),
        y=list(ideal_year_crimes.values()),
        name=ideal_year
    )
    trace2 = go.Bar(
        x=list(cwith_year_crimes.keys()),
        y=list(cwith_year_crimes.values()),
        name=cwith_year
    )
    
    layout = go.Layout(
        height=400,
        width=600,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace1, trace2], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_overall_difference(data_source=data_splits, ideal_year=2001, cwith_year=2014)

#### State wise

In [None]:
plot_overall_difference(data_source=data_splits, ideal_year=2001, cwith_year=2012, state_unit='Andhra Pradesh')

### Plot single crime overall increase or decrease considering all years

In [None]:
def plot_crime_overall_diff(data_source, crime, state_unit=None, kind='bar'):
    crime_data = categorize_crimes(data_source=data_source, state_unit=state_unit)
    
    years_x = list(crime_data.keys())
    crime_y = [cr[crime] for (y, cr) in crime_data.items()]
    
    t = '{} - {} | {} → {}'
    min_y = years_x[0]; max_y = years_x[-1]
    title = t.format(min_y, max_y, state_unit.title(), crime) if state_unit else t.format(min_y, max_y, 'India', crime)
    
    if kind == 'bar':
        trace = go.Bar(x=years_x, y=crime_y)
    else:
        trace = go.Pie(labels=years_x, values=crime_y)
    
    layout = go.Layout(
        height=400,
        width=600,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_crime_overall_diff(data_source=data_splits, crime='rape')

In [None]:
plot_crime_overall_diff(data_source=data_splits, crime='rape', kind='pie')

#### State wise

In [None]:
plot_crime_overall_diff(data_source=data_splits, crime='rape', state_unit='Andhra Pradesh')

In [None]:
plot_crime_overall_diff(data_source=data_splits, crime='rape', state_unit='Andhra Pradesh', kind='pie')

### Get plotting features

In [None]:
def obtain_features(data_source, year, crime, state_unit=None):
    sub_df = data_source[year]
    
    if not state_unit:
        sub_df = sub_df[sub_df['district'].str.contains('Total')]
        states_x = sub_df['state_unit']
        states_x.loc[states_x.shape[0]] = 'Ladakh'
        crime_y = sub_df[crime]
        crime_y.loc[crime_y.shape[0]] = 0
    else:
        state_unit = state_unit.title()
        sub_df = sub_df[sub_df['state_unit'] == state_unit]
        sub_df = sub_df[:-1]
        states_x = sub_df['district']
        crime_y = sub_df[crime]
    
    return states_x.to_list(), crime_y.to_list()

### Plot by state and year with a specific crime as target

In [None]:
def plot_column(data_source, year, crime, state_unit=None, kind='bar'):
    states_x, crime_y = obtain_features(data_source=data_source, year=year, crime=crime, state_unit=state_unit)
    
    t = '{} | {} → {}'
    title = t.format(year, crime, state_unit.title()) if state_unit else t.format(year, crime, 'India')
    
    if kind == 'bar':
        trace = go.Bar(x=states_x, y=crime_y)
    else:
        trace = go.Pie(labels=states_x, values=crime_y)
    
    layout = go.Layout(
        height=400,
        width=600,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_column(data_source=data_splits, year=2001, crime='kidnap_abduction')

In [None]:
plot_column(data_source=data_splits, year=2001, crime='kidnap_abduction', kind='pie')

#### State wise

In [None]:
plot_column(data_source=data_splits, year=2001, crime='kidnap_abduction', state_unit='andhra pradesh')

In [None]:
plot_column(data_source=data_splits, year=2001, crime='kidnap_abduction', state_unit='andhra pradesh', kind='pie')

### Plot the comparison by year and state with a specific crime as target

In [None]:
def compare_crime_by_years(data_source, ideal_year, cwith_year, crime, state_unit=None):
    states_x1, crime_y1 = obtain_features(data_source=data_source, year=ideal_year, crime=crime, state_unit=state_unit)
    states_x2, crime_y2 = obtain_features(data_source=data_source, year=cwith_year, crime=crime, state_unit=state_unit)
    
    trace1 = go.Bar(x=states_x1, y=crime_y1, name=ideal_year)
    trace2 = go.Bar(x=states_x2, y=crime_y2, name=cwith_year)
    
    t = '{} vs {} | {} → {}'
    title = t.format(ideal_year, cwith_year, crime, state_unit.title()) if state_unit else t.format(ideal_year, cwith_year, crime, 'India')
    
    layout = go.Layout(
        height=400,
        width=800,
        title=title,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    fig = go.Figure(data=[trace1, trace2], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
compare_crime_by_years(
    data_source=data_splits, 
    ideal_year=2001, 
    cwith_year=2014, 
    crime='husband_relative_cruelty'
)

#### State wise

In [None]:
compare_crime_by_years(
    data_source=data_splits, 
    ideal_year=2001, 
    cwith_year=2014, 
    crime='husband_relative_cruelty', 
    state_unit='Andhra Pradesh'
)

### Geographical plot - crime activity state wise

In [None]:
def get_india_map(year, state_unit=None):
    if year < 2014:
        if not state_unit:
            return 'https://raw.githubusercontent.com/geohacker/india/master/state/india_state.geojson'
        return 'https://raw.githubusercontent.com/geohacker/india/master/district/india_district.geojson'
    return 'https://raw.githubusercontent.com/geohacker/india/master/state/india_telengana.geojson'

In [None]:
def get_districts_json(year, state_unit=None):
    geo_link = get_india_map(year=year, state_unit=state_unit)
    
    if not state_unit:
        return geo_link
    
    req_data = requests.get(url=geo_link)
    req_json = req_data.json()['features']
    
    state_districts = []
    for feature in req_json:
        if feature['properties']['NAME_1'] == state_unit:
            state_districts.append(feature)
    
    return {
        "type": "FeatureCollection",
        "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
        "features" : state_districts
    }

In [None]:
def plot_state_wise(data_source, year, crime, state_unit=None):
    state_unit = None
    state_x, crime_y = obtain_features(data_source=data_source, year=year, crime=crime, state_unit=state_unit)
    df_cols = ['name', 'crime_count']
    state_crime_df = pd.DataFrame(data=zip(state_x, crime_y), columns=df_cols)
    
    trace = go.Choropleth(
        geojson=get_districts_json(year=year, state_unit=state_unit),
        featureidkey='properties.NAME_1',
        locations=state_crime_df['name'],
        z=state_crime_df['crime_count'],
        colorscale='Reds',
        marker_line_color='black',
        colorbar=dict(
            title={'text': "Crime Range"},
        )
    )

    layout = go.Layout(
        title="{} → Crime Activity - {}".format(year, crime),
        geo=dict(
            visible=False,
            lonaxis={'range': [65, 100]},
            lataxis={'range': [5, 40]}
        ),
        margin=dict(l=0, b=0, t=30, r=0),
        height=600,
        width=600
    )

    fig = go.Figure(data=[trace], layout=layout)
    fig.show()
    
    return None

#### Country wise

In [None]:
plot_state_wise(data_source=data_splits, year=2001, crime='dowry_deaths')

In [None]:
plot_state_wise(data_source=data_splits, year=2014, crime='dowry_deaths')

### Conclusion

* Crimes against women is increasing no matter what the measures are taken.
* As there is increase in the years, there has to be decrease in the crime activity. But it is quite opposite.
* Moreover, this dataset includes the activity that had taken place till 2014, we do not know how many more happend till this date.
* The same case can be found if we take a particular state of the country.