**Data Collection and Data Preprocessing**

In [None]:
import pandas as pd

df = "/content/32_Murder_victim_age_sex.csv"
crime_data = pd.read_csv(df)
crime_data.fillna(0, inplace=True)


**Feature bold text Selection and Data Splitting**

In [None]:
from sklearn.model_selection import train_test_split

X = crime_data[['Area_Name', 'Year']]
y = crime_data[['Victims_Total', 'Victims_Above_50_Yrs', 'Victims_Upto_10_15_Yrs',
                'Victims_Upto_10_Yrs', 'Victims_Upto_15_18_Yrs', 'Victims_Upto_18_30_Yrs',
                'Victims_Upto_30_50_Yrs']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Model Building(Ridge regression)**

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge

# Define preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(), [0])],
    remainder='passthrough'
)

# Define the Ridge regression pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler(with_mean=False)),
    ('ridge', Ridge())
])
# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = pipeline.predict(X_test)

# Calculate the R-squared score
accuracy = r2_score(y_test, y_pred)
print("R-squared score:", accuracy)

R-squared score: 0.5692321976670851


**Hyperparameter Tuning**


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

# Here i have used GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_alpha = grid_search.best_params_['ridge__alpha']


**Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Training the model with the best hyperparameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Make predictions
y_pred = best_model.predict(X_test)

# Evaluating the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Best Alpha:", best_alpha)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2) Score:", r2)
print("Mean Absolute Error (MAE):", mae)


Best Alpha: 10
Root Mean Squared Error (RMSE): 183.24128971518257
R-squared (R2) Score: 0.5732860665325085
Mean Absolute Error (MAE): 90.24632315895558


**Input Handling and Output Prediction**




In [None]:
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, widgets

def predict_victims(state, year):
    predicted_values = predict_victims_function(state, year)

    # Ensure that Total Victims is at least the sum of the other age group categories
    total_victims = max(0, predicted_values[0])
    total_victims = max(total_victims, sum(max(0, val) for val in predicted_values[1:]))

    # Print the predictions
    print(f"\nPredictions for {state} in {year}:")
    print("Total Victims:", total_victims)
    print("Victims Above 50 Years:", max(0, predicted_values[1]))
    print("Victims Upto 10-15 Years:", max(0, predicted_values[2]))
    print("Victims Upto 10 Years:", max(0, predicted_values[3]))
    print("Victims Upto 15-18 Years:", max(0, predicted_values[4]))
    print("Victims Upto 18-30 Years:", max(0, predicted_values[5]))
    print("Victims Upto 30-50 Years:", max(0, predicted_values[6]))

    # Visualize the predictions
    labels = ['Total Victims', 'Above 50', '10-15', 'Upto 10', '15-18', '18-30', '30-50']
    values = [total_victims] + [max(0, val) for val in predicted_values[1:]]  # Ensure non-negative values

    plt.figure(figsize=(10, 6))
    plt.bar(labels, values, color='skyblue')
    plt.title(f'Predicted Victims for {state} in {year}')
    plt.xlabel('Age Groups')
    plt.ylabel('Number of Victims')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

# Define a function to make predictions based on the selected state and year
def predict_victims_function(state, year):
    # Perform prediction using the model for the selected state and year
    prediction = best_model.predict([[state, year]])
    return prediction[0]

# The input box will be a dropdown menu widget for selecting states
state_dropdown = widgets.Dropdown(
    options=[
         'Andhra Pradesh', 'Andaman & Nicobar Islands','Arunachal Pradesh', 'Assam', 'Bihar',
        'Chandigarh', 'Chhattisgarh', 'Delhi', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh',
        'Jammu & Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Lakshadweep', 'Madhya Pradesh',
        'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Puducherry',
        'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Tripura', 'Uttar Pradesh', 'Uttarakhand',
        'West Bengal', 'Dadra & Nagar Haveli', 'Daman & Diu'
    ],
    description='State:',
    disabled=False,
)

# Define the text input widget for entering the year
year_input = widgets.Text(
    value='2013',
    description='Year:',
    disabled=False
)

# Use the interact function to create the form and link it to the prediction function
interact(predict_victims, state=state_dropdown, year=year_input);


interactive(children=(Dropdown(description='State:', options=('Andhra Pradesh', 'Andaman & Nicobar Islands', '…

In [None]:
import joblib

In [None]:
joblib.dump(best_model,'model.pkl')

['model.pkl']