# 📌 Phase 3: Model Training for Naxalite Hideout Prediction

In this notebook, we'll prepare the data and train a classification model to predict likely Naxalite hideout locations using the engineered features.

In [15]:
# ✅ Step 1: Load Feature-Enhanced Dataset
import pandas as pd

df = pd.read_csv("../Data/naxal_hideouts_features.csv")
df['label'] = 1  # Mark real hideouts as positive samples
df.head()


Unnamed: 0,name,latitude,longitude,date_range,elevation,distance_to_village,label
0,Vashudev rao@takkapali satish,19.12562,81.14555,2025-06-01_to_2025-06-10,479.0,32.243497,1
1,Ranita@Jaymati,19.38255,80.88472,2025-06-01_to_2025-06-10,475.0,7.747186,1
2,Ranita@Jaymati,19.37888,80.96871,2025-06-01_to_2025-06-10,788.0,3.752889,1
3,Ranita@Jaymati,19.42753,80.86589,2025-06-01_to_2025-06-10,365.0,12.317445,1
4,Vashudev rao@takkapali satish,19.1492,81.16334,2025-06-11_to_2025-06-20,553.0,31.57748,1


## ✅ Step 2: Generate Negative Samples
These are random nearby points that we assume are **not** hideouts.

In [16]:
pip install numpy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\manis\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [17]:
import numpy as np

def generate_negative_samples(df, n_samples=100):
    min_lat, max_lat = df['latitude'].min(), df['latitude'].max()
    min_lon, max_lon = df['longitude'].min(), df['longitude'].max()

    samples = []
    for _ in range(n_samples):
        lat = np.random.uniform(min_lat, max_lat)
        lon = np.random.uniform(min_lon, max_lon)
        samples.append([lat, lon])
    return pd.DataFrame(samples, columns=['latitude', 'longitude'])

neg_df = generate_negative_samples(df, n_samples=len(df))
neg_df['elevation'] = df['elevation'].sample(frac=1).values  # Optional: sample elevation
neg_df['distance_to_village'] = df['distance_to_village'].sample(frac=1).values
neg_df['label'] = 0
neg_df.head()

Unnamed: 0,latitude,longitude,elevation,distance_to_village,label
0,19.139896,80.850524,671.0,50.167818,0
1,19.765152,80.79925,553.0,50.036673,0
2,19.452025,80.815839,380.0,51.698886,0
3,19.299859,81.009749,620.0,7.747186,0
4,19.426366,80.743784,455.0,51.708635,0


## ✅ Step 3: Combine and Shuffle the Dataset

In [18]:
combined_df = pd.concat([df, neg_df], ignore_index=True).sample(frac=1).reset_index(drop=True)
combined_df.head()

Unnamed: 0,name,latitude,longitude,date_range,elevation,distance_to_village,label
0,,19.734473,81.019953,,479.0,8.204942,0
1,,19.299859,81.009749,,620.0,7.747186,0
2,Ranita@Jaymati,19.38255,80.88472,2025-06-01_to_2025-06-10,475.0,7.747186,1
3,,19.41998,80.918219,,563.0,32.243497,0
4,Ranita@Jaymati,19.375,80.87648,2025-06-30_to_2025-07-08,455.0,8.204942,1


## ✅ Step 4: Train-Test Split

In [19]:
from sklearn.model_selection import train_test_split

features = ['latitude', 'longitude', 'elevation', 'distance_to_village']
X = combined_df[features]
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## ✅ Step 5: Train Random Forest Model

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6666666666666666
[[1 1]
 [1 3]]
              precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.75      0.75      0.75         4

    accuracy                           0.67         6
   macro avg       0.62      0.62      0.62         6
weighted avg       0.67      0.67      0.67         6



## ✅ Step 6: Save the Trained Model

In [21]:
pip install joblib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\manis\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [22]:
import joblib

joblib.dump(model, "../Models/rf_model.pkl")
print("✅ Model saved to ../Models/rf_model.pkl")

✅ Model saved to ../Models/rf_model.pkl
