In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('cleaned.csv')

In [2]:
df.head()

Unnamed: 0.1,Unnamed: 0,ID,aspect,strdist,basarea,basin,curvature,curve_cont,curve_prof,curves,...,elev,cohesion,slide,scarpdist,scarps,frictang,slope,slopeleg,woods,specwt
0,2,3,265.2971,644.9806,65600,1,-2.156464,0.636057,-1.520407,0.214373,...,1057.005,10,1,28.28427,0.0,23,15.3465,2024.026,1,19
1,3,4,267.3936,647.7654,65600,1,2.61673,-2.301352,0.315377,0.135763,...,1065.42,10,1,20.0,0.0,23,12.92921,2015.106,1,19
2,6,7,272.0991,625.14,65600,1,0.029022,-0.073801,-0.044779,0.164955,...,1058.138,10,1,20.0,0.0,23,16.34302,1985.669,1,19
3,7,8,272.9689,628.0128,65600,1,-1.793457,0.88196,-0.911497,0.215654,...,1063.369,10,1,20.0,0.0,23,16.76664,1976.543,1,19
4,8,9,261.2346,631.5062,65600,1,2.723572,-2.240553,0.483019,0.142164,...,1072.22,10,1,20.0,0.0,23,13.31464,1968.201,1,19


In [3]:
df = df.drop('Unnamed: 0', axis=1)

In [4]:
df = df.drop('ID', axis=1)

In [5]:
df.columns

Index(['aspect', 'strdist', 'basarea', 'basin', 'curvature', 'curve_cont',
       'curve_prof', 'curves', 'drop', 'rockdist', 'flowdir', 'fos', 'lith',
       'elev', 'cohesion', 'slide', 'scarpdist', 'scarps', 'frictang', 'slope',
       'slopeleg', 'woods', 'specwt'],
      dtype='object')

In [6]:
len(list(df.columns))

23

In [7]:
# Split the dataset into features (X) and target variable (y)
X = df.drop('slide', axis=1)
y = df['slide']

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [8]:
# Define and train the models
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'XGBoost': XGBClassifier(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42)
}

In [9]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    # Evaluate model performance on validation set
    accuracy = accuracy_score(y_val, y_pred)
    classification_rep = classification_report(y_val, y_pred)

    print(f"{name} Model:")
    print(f"Validation Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_rep)
    print("="*50)

Random Forest Model:
Validation Accuracy: 0.9252
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      2048
           1       0.88      0.90      0.89      1015

    accuracy                           0.93      3063
   macro avg       0.91      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063





XGBoost Model:
Validation Accuracy: 0.9252
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      2048
           1       0.88      0.89      0.89      1015

    accuracy                           0.93      3063
   macro avg       0.92      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063

Decision Tree Model:
Validation Accuracy: 0.8897
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      2048
           1       0.83      0.83      0.83      1015

    accuracy                           0.89      3063
   macro avg       0.88      0.88      0.88      3063
weighted avg       0.89      0.89      0.89      3063

Logistic Regression Model:
Validation Accuracy: 0.6709
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.86      0.78      2048
           1       0.

In [10]:
# Evaluate models on the test set
for name, model in models.items():
    y_pred_test = model.predict(X_test)

    # Evaluate model performance on test set
    accuracy_test = accuracy_score(y_test, y_pred_test)
    classification_rep_test = classification_report(y_test, y_pred_test)

    print(f"{name} Model (Test Set):")
    print(f"Test Accuracy: {accuracy_test:.4f}")
    print("Test Classification Report:")
    print(classification_rep_test)
    print("="*50)

Random Forest Model (Test Set):
Test Accuracy: 0.9305
Test Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2020
           1       0.90      0.90      0.90      1043

    accuracy                           0.93      3063
   macro avg       0.92      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063

XGBoost Model (Test Set):
Test Accuracy: 0.9321
Test Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2020
           1       0.90      0.90      0.90      1043

    accuracy                           0.93      3063
   macro avg       0.92      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063

Decision Tree Model (Test Set):
Test Accuracy: 0.8867
Test Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.92      0.91      20

In [11]:
import pickle

# XGBoost model
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)

# Save the model as a pickle file
with open('xgb_model.pkl', 'wb') as model_file:
    pickle.dump(xgb_model, model_file)

# Load the model from the pickle file
with open('xgb_model.pkl', 'rb') as model_file:
    loaded_xgb_model = pickle.load(model_file)

# Predict on the test data using the loaded model
y_pred_test_xgb = loaded_xgb_model.predict(X_test)

# Evaluate model performance on the test set
accuracy_test_xgb = accuracy_score(y_test, y_pred_test_xgb)
classification_rep_test_xgb = classification_report(y_test, y_pred_test_xgb)

print("XGBoost Model (Test Set):")
print(f"Test Accuracy: {accuracy_test_xgb:.4f}")
print("Test Classification Report:")
print(classification_rep_test_xgb)






XGBoost Model (Test Set):
Test Accuracy: 0.9321
Test Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2020
           1       0.90      0.90      0.90      1043

    accuracy                           0.93      3063
   macro avg       0.92      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063



In [12]:
import pickle

# XGBoost model
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)

# Save the model as a pickle file
with open('xgb_model.pkl', 'wb') as model_file:
    pickle.dump(xgb_model, model_file)

# Load the model from the pickle file
with open('xgb_model.pkl', 'rb') as model_file:
    loaded_xgb_model = pickle.load(model_file)

# Predict on the test data using the loaded model
y_pred_test_xgb = loaded_xgb_model.predict(X_test)

# Evaluate model performance on the test set
accuracy_test_xgb = accuracy_score(y_test, y_pred_test_xgb)
classification_rep_test_xgb = classification_report(y_test, y_pred_test_xgb)

print("XGBoost Model (Test Set):")
print(f"Test Accuracy: {accuracy_test_xgb:.4f}")
print("Test Classification Report:")
print(classification_rep_test_xgb)






XGBoost Model (Test Set):
Test Accuracy: 0.9321
Test Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2020
           1       0.90      0.90      0.90      1043

    accuracy                           0.93      3063
   macro avg       0.92      0.92      0.92      3063
weighted avg       0.93      0.93      0.93      3063



In [15]:
import pickle

# Load the model from the pickle file
with open('xgb_model.pkl', 'rb') as model_file:
    loaded_xgb_model = pickle.load(model_file)



import numpy as np

def yield_fn(features_list):
                int_features2 = np.array(features_list)

                int_features1 = int_features2.reshape(1, -1)


                tested1=loaded_xgb_model.predict(int_features1)



                print(tested1)

                return  tested1



In [16]:
yield_fn([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])

[1]


array([1], dtype=int64)