In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
df = pd.read_csv('CrimesOnWomenData.csv')

df.head()

Unnamed: 0.1,Unnamed: 0,State,Year,Rape,K&A,DD,AoW,AoM,DV,WT
0,0,ANDHRA PRADESH,2001,871,765,420,3544,2271,5791,7
1,1,ARUNACHAL PRADESH,2001,33,55,0,78,3,11,0
2,2,ASSAM,2001,817,1070,59,850,4,1248,0
3,3,BIHAR,2001,888,518,859,562,21,1558,83
4,4,CHHATTISGARH,2001,959,171,70,1763,161,840,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  736 non-null    int64 
 1   State       736 non-null    object
 2   Year        736 non-null    int64 
 3   Rape        736 non-null    int64 
 4   K&A         736 non-null    int64 
 5   DD          736 non-null    int64 
 6   AoW         736 non-null    int64 
 7   AoM         736 non-null    int64 
 8   DV          736 non-null    int64 
 9   WT          736 non-null    int64 
dtypes: int64(9), object(1)
memory usage: 57.6+ KB


In [None]:
# CONVERTING THE STATE COLUMN IN LOWER CASE SO THAT EVERY STATE NAME TREATED EQUALLY
df['State'] = df['State'].str.lower() # IN DATA FRAME STATE COLUMN HAS MIXED NAME VALUES LIKE UTTAR PRADESH & uttar pradesh THAT'S WHY ITS IMPORTANT TO CONVERT ALL THE STATE VALUES IN LOWER CASE.....


In [None]:
# Replace the incorrect state name with the correct one
df['State'] = df['State'].replace('d & n haveli', 'd&n haveli')


In [None]:
# COLUMNS DESCRIPTION DATA FOR THE REFRENCE..........
desc = pd.read_csv('description.csv')

desc.iloc[:,[1,2]]

Unnamed: 0,Column Names,Explanation
0,State,State
1,Year,Year
2,Rape,No. of Rape cases
3,K&A,Kidnap And Assault
4,DD,Dowry Deaths
5,AoW,Assault against women
6,AoM,Assault against modesty of women
7,DV,Domestic violence
8,WT,Women Trafficking


# LINE CHART FOR ALL THE STATE & FOR ALL CRIMES

In [None]:
# LINE CHART TO LOOK AT THE EVERY CRIME RATE OVER TIME FOR EVERY STATE..........

# Define the columns for different crimes and states
col = df[['Rape','K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']]

# Create a list of unique states
states = df['State'].unique()

# Create a figure using go.Figure()
fig = go.Figure()

# Initialize visibility with all traces off
visibility = {state: {crime: False for crime in col.columns} for state in states}
visibility[states[0]]['Rape'] = True  # Make the first crime for the first state visible by default

# Add traces for each crime type and state, but keep them invisible except for the default state and crime
for state in states:
    for crime in col.columns:
        fig.add_trace(
            go.Scatter(x=df[df['State'] == state]['Year'],
                       y=df[df['State'] == state][crime],
                       mode='lines',
                       name=f"{crime} - {state}",
                       visible=visibility[state][crime])
        )


# Create a dropdown menu for state selection
state_dropdown_buttons = [
    {'label': state,
     'method': 'update',
     'args': [
         {'visible': [(trace.name.endswith(state)) for trace in fig.data]},
         {'title': f'State: {state}'}
     ]}
    for state in states
]

# Update the layout to include both dropdowns (crime and state)
fig.update_layout(
    updatemenus=[
        {
            'buttons': state_dropdown_buttons,
            'direction': 'down',
            'showactive': True,
            'x': 0.05,  # Adjust position of the dropdown
            'xanchor': 'left',
            'y': 1.15,
            'yanchor': 'top',
            'pad': {'r': 10, 't': 10}
        }
    ],
    title="Select State",
    xaxis_title="Year",
    yaxis_title="Crime Rate",
)

# Show the figure
fig.show()

# WHICH CRIME IS AT THE TOP IN INDIA

In [None]:
# Define the columns for different crimes
crime_columns = ['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']

# Sum the crime columns across all states to get total crime counts for India
crime_totals = df[crime_columns].sum()

# Create a DataFrame from the totals for better visualization
crime_totals_df = pd.DataFrame({'Crime': crime_totals.index, 'Total': crime_totals.values})

# Create a bar chart using Plotly Express to visualize which crime is at the top
fig = px.bar(crime_totals_df.sort_values(by='Total', ascending=False), x='Crime', y='Total', title='Total Crimes in India by Type',
             labels={'Total': 'Total Count', 'Crime': 'Crime Type'},
             color='Crime')

# Show the bar chart
fig.show()

# IN WHICH STATE WHICH CRIME IS HIGHEST

In [None]:
# Define the columns for different crimes
crime_columns = ['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']

# Create a figure object to hold all crime plots
figures = []

# Loop through each crime and find the top 10 states for that crime
for crime in crime_columns:
    # Group by state and sum the crime data, then sort to get the top 10 states
    top_10_states = df.groupby('State')[crime].sum().nlargest(37).reset_index()

    # Create a bar chart for the current crime
    fig = px.bar(top_10_states, x='State', y=crime, title=f'All States for {crime} in India',
                 labels={crime: 'Total Count', 'State': 'State'}, color='State',
                 text=crime, height=800)

    # Append the figure to the list of figures
    figures.append(fig)

# Show the charts (one for each crime)
for fig in figures:
    fig.show()

# CRIME WISE RANKING OF EACH STATE.......

In [None]:
# Define the columns for different crimes
crime_columns = ['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']

# Sum all crime columns for each state to get the total crime count per state
df['Total_Crime'] = df[crime_columns].sum(axis=1)

# Group by state and sum the total crime count
state_crime_totals = df.groupby('State')['Total_Crime'].sum().reset_index()

# Rank the states by total crime count (descending order)
state_crime_totals['Rank'] = state_crime_totals['Total_Crime'].rank(ascending=False, method='min')

# Sort the DataFrame by total crime count to show the highest at the top
state_crime_totals = state_crime_totals.sort_values(by='Total_Crime', ascending=False).reset_index(drop=True)

# Display the ranked list of states by total crime
print(state_crime_totals)

# Create a bar chart to visualize the total crimes per state
fig = px.bar(state_crime_totals, x='State', y='Total_Crime', title='Total Crimes by State in India (Overall Ranking)',
             labels={'Total_Crime': 'Total Crime Count', 'State': 'State'},
             text='Rank', color='State' , height = 800)

# Show the bar chart
fig.show()

                State  Total_Crime  Rank
0       uttar pradesh       529734   1.0
1      madhya pradesh       413157   2.0
2         west bengal       409242   3.0
3      andhra pradesh       391123   4.0
4           rajasthan       379264   5.0
5         maharashtra       365632   6.0
6               assam       291980   7.0
7              kerala       202986   8.0
8              odisha       194774   9.0
9               bihar       189886  10.0
10          karnataka       161364  11.0
11            gujarat       154339  12.0
12            haryana       152479  13.0
13             punjab       135575  14.0
14         tamil nadu       133884  15.0
15           delhi ut       104729  16.0
16            tripura       102185  17.0
17       chhattisgarh        97687  18.0
18        uttarakhand        88036  19.0
19          telangana        87121  20.0
20          jharkhand        82827  21.0
21    jammu & kashmir        60171  22.0
22           nagaland        50593  23.0
23   himachal pr

# EVERY CRIME RATIO IN EACH STATE............

In [None]:
# Define the columns for different crimes
crime_columns = ['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']

# Aggregate the data by state
state_crime_totals = df.groupby('State')[crime_columns].sum().reset_index()

# Number of states
num_states = len(state_crime_totals)

# Define the number of columns and rows for the subplots
num_cols = 4  # You can adjust this based on your preference
num_rows = (num_states + num_cols - 1) // num_cols  # Compute rows needed

# Create subplots
fig = make_subplots(rows=num_rows, cols=num_cols,
                    subplot_titles=state_crime_totals['State'],
                    specs=[[{'type': 'pie'}] * num_cols] * num_rows)

# Add pie charts for each state to the subplots
for idx, state in enumerate(state_crime_totals['State']):
    state_data = state_crime_totals[state_crime_totals['State'] == state].iloc[0]

    row = idx // num_cols + 1
    col = idx % num_cols + 1

    pie_chart = go.Pie(
        labels=crime_columns,
        values=state_data[crime_columns],
        name=state
    )

    fig.add_trace(pie_chart, row=row, col=col)

# Update layout for better spacing and titles
fig.update_layout(
    title_text='Crime Distribution by State',
    showlegend=False,  # Hide legends to avoid cluttering
    height=600 + (num_rows - 1) * 300 # Adjust height based on number of rows  # Adjust width as needed
)

# Show the figure
fig.show()