In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier

# n_estimators range
n_estimators_list = range(1, 100)

# Load data
data = pd.read_csv('/content/Dhaka_17_42.csv')
X = data.drop(['system:index', '.geo', 'Class'], axis=1)
y = data['Class']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the maximum accuracy to 0
max_accuracy = 0
best_n_estimators_rf = 0

for n_estimators in n_estimators_list:
   rf_model = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
   rf_model.fit(X_train, y_train)
   y_pred_rf = rf_model.predict(X_test)
   accuracy_rf = accuracy_score(y_test, y_pred_rf)
   if accuracy_rf > max_accuracy:
       max_accuracy = accuracy_rf
       best_n_estimators_rf = n_estimators
print('best_n_estimators_rf = ',best_n_estimators_rf)

# Initialize the maximum accuracy to 0
max_accuracy = 0
best_n_estimators_ab = 0

for n_estimators in n_estimators_list:
   rf_model = AdaBoostClassifier(n_estimators=n_estimators, random_state=42)
   rf_model.fit(X_train, y_train)
   y_pred_rf = rf_model.predict(X_test)
   accuracy_rf = accuracy_score(y_test, y_pred_rf)
   if accuracy_rf > max_accuracy:
       max_accuracy = accuracy_rf
       best_n_estimators_ab = n_estimators
print('best_n_estimators_ab = ',best_n_estimators_ab)

# Initialize the maximum accuracy to 0
max_accuracy = 0
best_n_estimators_gb = 0

for n_estimators in n_estimators_list:
   rf_model = GradientBoostingClassifier(n_estimators=n_estimators, random_state=42)
   rf_model.fit(X_train, y_train)
   y_pred_rf = rf_model.predict(X_test)
   accuracy_rf = accuracy_score(y_test, y_pred_rf)
   if accuracy_rf > max_accuracy:
       max_accuracy = accuracy_rf
       best_n_estimators_gb = n_estimators
print('best_n_estimators_gb = ',best_n_estimators_gb)

# Initialize the maximum accuracy to 0
max_accuracy = 0
best_n_estimators_xgb = 0
for n_estimators in n_estimators_list:
   rf_model = XGBClassifier(n_estimators=n_estimators, random_state=42)
   rf_model.fit(X_train, y_train)
   y_pred_rf = rf_model.predict(X_test)
   accuracy_rf = accuracy_score(y_test, y_pred_rf)
   if accuracy_rf > max_accuracy:
       max_accuracy = accuracy_rf
       best_n_estimators_xgb = n_estimators
print('best_n_estimators_xgb = ',best_n_estimators_xgb)

# # printing best_n_estimators
# print(
# 'best_n_estimators_rf = ',best_n_estimators_rf,
# 'best_n_estimators_ab = ',best_n_estimators_ab,
# 'best_n_estimators_gb = ',best_n_estimators_gb,
# 'best_n_estimators_xgb = ',best_n_estimators_xgb
# )

# Define base models
rf = RandomForestClassifier(n_estimators=best_n_estimators_rf, random_state=42)
ada = AdaBoostClassifier(n_estimators=best_n_estimators_ab, random_state=42)
gb = GradientBoostingClassifier(n_estimators=best_n_estimators_gb, random_state=42)
xgb = XGBClassifier(n_estimators=best_n_estimators_xgb, random_state=42)

# Train and evaluate base models
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)

ada.fit(X_train, y_train)
ada_pred = ada.predict(X_test)
ada_accuracy = accuracy_score(y_test, ada_pred)

xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_pred)

gb.fit(X_train, y_train)
gb_pred = gb.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

# UISEM model (Urban Impoervious Surface Ensemble Model)
UISEM = VotingClassifier(estimators=[('rf', rf), ('ada', ada), ('xgb', xgb), ('gb', gb)],
                           voting='soft')
UISEM.fit(X_train, y_train)
y_pred = UISEM.predict(X_test)
UISEM_accuracy = accuracy_score(y_test, y_pred)

# Print accuracies
print("Random Forest accuracy:", rf_accuracy)
print("AdaBoost accuracy:", ada_accuracy)
print("XGBoost accuracy:", xgb_accuracy)
print("Gradient Boosting accuracy:", gb_accuracy)
print("UISEM accuracy soft:", UISEM_accuracy)

best_n_estimators_rf =  87
best_n_estimators_ab =  57
best_n_estimators_gb =  55
best_n_estimators_xgb =  21
Random Forest accuracy: 0.9985734664764622
AdaBoost accuracy: 0.9985734664764622
XGBoost accuracy: 0.9985734664764622
Gradient Boosting accuracy: 0.9985734664764622
Ensemble accuracy soft: 0.9985734664764622
