# Import Packages

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report

# Loading Dataset

In [5]:
# Load the dataset
data = pd.read_csv('our_data.csv')

# Data Preprocessing

In [6]:
# Feature Selection and Preprocessing
continuous_features = ['height(cm)', 'waist(cm)', 'age']
X = data[continuous_features]
y = data['smoking']

# Normalize continuous features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Bagging Classifier

In [7]:
bagging_model = BaggingClassifier(n_estimators=10, random_state=42)
bagging_model.fit(X_train, y_train)
bagging_pred = bagging_model.predict(X_test)
print("\nBagging Classifier:")
print("Accuracy:", accuracy_score(y_test, bagging_pred))
print(classification_report(y_test, bagging_pred))


Bagging Classifier:
Accuracy: 0.701467233187517
              precision    recall  f1-score   support

           0       0.76      0.67      0.72     26686
           1       0.64      0.74      0.69     21091

    accuracy                           0.70     47777
   macro avg       0.70      0.71      0.70     47777
weighted avg       0.71      0.70      0.70     47777



# Random Forest

In [8]:
rf_model = RandomForestClassifier(random_state=42)
rfc_params = {'n_estimators': [50, 100, 150], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]}
gs_rf = GridSearchCV(rf_model, rfc_params, cv=5, scoring='accuracy')
gs_rf.fit(X_train, y_train)
print("\nBest Parameters for Random Forest:", gs_rf.best_params_)
rf_best = gs_rf.best_estimator_
rf_pred = rf_best.predict(X_test)
print("\nRandom Forest Classifier (Tuned):")
print("Accuracy:", accuracy_score(y_test, rf_pred))
print(classification_report(y_test, rf_pred))


Best Parameters for Random Forest: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 50}

Random Forest Classifier (Tuned):
Accuracy: 0.7171023714339536
              precision    recall  f1-score   support

           0       0.83      0.62      0.71     26686
           1       0.64      0.84      0.72     21091

    accuracy                           0.72     47777
   macro avg       0.73      0.73      0.72     47777
weighted avg       0.74      0.72      0.72     47777



# AdaBoost Classifier

In [9]:
boosting_model = AdaBoostClassifier(random_state=42)
abc_params = {'n_estimators': [50, 100, 150], 'learning_rate': [0.01, 0.1, 1]}
gs_abc = GridSearchCV(boosting_model, abc_params, cv=5, scoring='accuracy')
gs_abc.fit(X_train, y_train)
print("\nBest Parameters for AdaBoost:", gs_abc.best_params_)
abc_best = gs_abc.best_estimator_
boosting_pred = abc_best.predict(X_test)
print("\nAdaBoost Classifier (Tuned):")
print("Accuracy:", accuracy_score(y_test, boosting_pred))
print(classification_report(y_test, boosting_pred))




Best Parameters for AdaBoost: {'learning_rate': 1, 'n_estimators': 150}

AdaBoost Classifier (Tuned):
Accuracy: 0.713376729388618
              precision    recall  f1-score   support

           0       0.82      0.62      0.71     26686
           1       0.63      0.83      0.72     21091

    accuracy                           0.71     47777
   macro avg       0.73      0.73      0.71     47777
weighted avg       0.74      0.71      0.71     47777

