In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
data = pd.read_csv('01_District_wise_crimes_committed_IPC_2001_2012.csv')

# Display basic information about the dataset
print(data.info())
print(data.head())
print(data.columns)

# Check for missing values
print(data.isnull().sum())

# Encode the 'STATE/UT' column to numeric values
label_encoder = LabelEncoder()
data['STATE/UT'] = label_encoder.fit_transform(data['STATE/UT'])

# Create a target column based on total crimes
# For simplicity, we sum up all crime columns to create a total crime value
data['Total_Crimes'] = data.drop(columns=['STATE/UT']).sum(axis=1)

# Define crime rate categories
bins = [0, data['Total_Crimes'].quantile(0.33), data['Total_Crimes'].quantile(0.66), data['Total_Crimes'].max()]
labels = ['Low', 'Medium', 'High']
data['Crime_Rate_Category'] = pd.cut(data['Total_Crimes'], bins=bins, labels=labels)

# Encode the target labels
data['Crime_Rate_Category'] = data['Crime_Rate_Category'].astype(str)
target_encoder = LabelEncoder()
data['Crime_Rate_Category'] = target_encoder.fit_transform(data['Crime_Rate_Category'])

# Select the features and the target column
X = data.drop(columns=['Crime_Rate_Category', 'Total_Crimes']).values  # Input: crime attributes
y = data['Crime_Rate_Category'].values  # Output: crime rate category

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the training set
y_train_pred = clf.predict(X_train)

# Make predictions on the test set
y_test_pred = clf.predict(X_test)

# Evaluate the model on the training set
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Detailed classification report for the test set
print("Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=target_encoder.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

# Function to predict crime rate category given a state name
def predict_crime_rate_category(state_name):
    if state_name not in label_encoder.classes_:
        return f"State name '{state_name}' not found in the dataset."
    
    state_encoded = label_encoder.transform([state_name])[0]
    state_row = data[data['STATE/UT'] == state_encoded].drop(columns=['Crime_Rate_Category', 'Total_Crimes'])

    if state_row.empty:
        return f"No data available for state name '{state_name}'."
    
    state_features = scaler.transform(state_row)
    predicted_category = clf.predict(state_features)
    
    return target_encoder.inverse_transform(predicted_category)[0]

# Example usage
state_name = 'BIHAR'
predicted_category = predict_crime_rate_category(state_name)
print(f"Predicted crime rate category for {state_name}: {predicted_category}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9017 entries, 0 to 9016
Data columns (total 18 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   STATE/UT                              9017 non-null   object
 1   YEAR                                  9017 non-null   int64 
 2   MURDER                                9017 non-null   int64 
 3   ATTEMPT TO MURDER                     9017 non-null   int64 
 4   RAPE                                  9017 non-null   int64 
 5   KIDNAPPING & ABDUCTION                9017 non-null   int64 
 6   DACOITY                               9017 non-null   int64 
 7   PREPARATION AND ASSEMBLY FOR DACOITY  9017 non-null   int64 
 8   ROBBERY                               9017 non-null   int64 
 9   BURGLARY                              9017 non-null   int64 
 10  THEFT                                 9017 non-null   int64 
 11  RIOTS                         

