In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, VotingRegressor, BaggingClassifier, BaggingRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, Ridge, ElasticNet, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, log_loss, classification_report
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.metrics import precision_score, accuracy_score, f1_score, mean_squared_error, mean_absolute_error
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from tqdm import tqdm
import os
os.chdir("D://meridianthe4//PML//Cases//Glass_Identification")

In [9]:
glass = pd.read_csv("Glass.csv")
X, y = glass.drop("Type", axis=1), glass["Type"]
le = LabelEncoder()
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25, stratify=y)

In [5]:
trees = [50, 100, 150, 200]
rates = [0.01, 0.1, 0.2, 0.5, 0.8]
depths = [None, 2, 3, 4, 5]
scores = []
for t in tqdm(trees):
    for r in rates:
        for d in depths:
            gbm = GradientBoostingClassifier(n_estimators=t, learning_rate=r, max_depth=d, random_state=25)
            gbm.fit(X_train, y_train)
            y_pred = gbm.predict(X_test)
            f1 = f1_score(y_test, y_pred, average='macro')
            scores.append([t, d, r, f1])
df_scores = pd.DataFrame(scores, columns=["n_estimators", "max_depth", "learning_rate", "score"])
df_scores.sort_values("score", ascending=False)

100%|██████████| 4/4 [01:09<00:00, 17.46s/it]


Unnamed: 0,n_estimators,max_depth,learning_rate,score
31,100,2.0,0.10,0.779074
56,150,2.0,0.10,0.771909
81,200,2.0,0.10,0.771909
36,100,2.0,0.20,0.765440
61,150,2.0,0.20,0.757599
...,...,...,...,...
47,100,3.0,0.80,0.561329
72,150,3.0,0.80,0.561329
97,200,3.0,0.80,0.561329
1,50,2.0,0.01,0.495520


## XGBoost

In [8]:
from xgboost import XGBClassifier

In [10]:
trees = [50, 100, 150, 200]
rates = [0.01, 0.1, 0.2, 0.5, 0.8]
depths = [None, 2, 3, 4, 5]
scores = []
for t in tqdm(trees):
    for r in rates:
        for d in depths:
            gbm = XGBClassifier(n_estimators=t, learning_rate=r, max_depth=d, random_state=25)
            gbm.fit(X_train, y_train)
            y_pred = gbm.predict(X_test)
            f1 = f1_score(y_test, y_pred, average='macro')
            scores.append([t, d, r, f1])
df_scores = pd.DataFrame(scores, columns=["n_estimators", "max_depth", "learning_rate", "score"])
df_scores.sort_values("score", ascending=False)

100%|██████████| 4/4 [00:06<00:00,  1.69s/it]


Unnamed: 0,n_estimators,max_depth,learning_rate,score
85,200,,0.20,0.832031
35,100,,0.20,0.824693
60,150,,0.20,0.824693
84,200,5.0,0.10,0.824693
49,100,5.0,0.80,0.817927
...,...,...,...,...
1,50,2.0,0.01,0.580505
28,100,4.0,0.01,0.573782
26,100,2.0,0.01,0.573671
3,50,4.0,0.01,0.565793


## LGBM

In [13]:
! pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------- ----------- 1.0/1.5 MB 7.7 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 4.4 MB/s  0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.6.0


In [14]:
from lightgbm import LGBMClassifier

In [18]:
trees = [50, 100, 150, 200]
rates = [0.01, 0.1, 0.2, 0.5, 0.8]
depths = [None, 2, 3, 4, 5]
scores = []
for t in tqdm(trees):
    for r in rates:
        for d in depths:
            gbm = LGBMClassifier(n_estimators=t, learning_rate=r, max_depth=d, random_state=25, verbose=-1)
            gbm.fit(X_train, y_train)
            y_pred = gbm.predict(X_test)
            f1 = f1_score(y_test, y_pred, average='macro')
            scores.append([t, d, r, f1])
df_scores = pd.DataFrame(scores, columns=["n_estimators", "max_depth", "learning_rate", "score"])
df_scores.sort_values("score", ascending=False)

100%|██████████| 4/4 [00:04<00:00,  1.12s/it]


Unnamed: 0,n_estimators,max_depth,learning_rate,score
40,100,,0.50,0.854124
65,150,,0.50,0.846927
90,200,,0.50,0.846927
66,150,2.0,0.50,0.839529
87,200,3.0,0.20,0.831932
...,...,...,...,...
0,50,,0.01,0.378053
4,50,5.0,0.01,0.378053
3,50,4.0,0.01,0.378053
2,50,3.0,0.01,0.364575


## CatBoost

In [19]:
! pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp313-cp313-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting plotly (from catboost)
  Downloading plotly-6.5.0-py3-none-any.whl.metadata (8.5 kB)
Collecting narwhals>=1.15.1 (from plotly->catboost)
  Downloading narwhals-2.13.0-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.8-cp313-cp313-win_amd64.whl (102.4 MB)
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   - -------------------------------------- 4.2/102.4 MB 23.3 MB/s eta 0:00:05
   -------- ------------------------------- 21.0/102.4 MB 53.3 MB/s eta 0:00:02
   --------------- ------------------------ 38.5/102.4 MB 65.1 MB/s eta 0:00:01
   --------------------- ------------------ 55.8/102.4 MB 69.9 MB/s eta 0:00:01
   ---------------------------- ----------- 73.7/102.4 MB 73.0 MB/s eta 0:00:01
   --------------------------------- ------ 86.0/102.4 MB 71.0 

In [20]:
from catboost import CatBoostClassifier

In [21]:
trees = [50, 100, 150, 200]
rates = [0.01, 0.1, 0.2, 0.5, 0.8]
depths = [None, 2, 3, 4, 5]
scores = []
for t in tqdm(trees):
    for r in rates:
        for d in depths:
            gbm = CatBoostClassifier(n_estimators=t, learning_rate=r, max_depth=d, random_state=25, verbose=False)
            gbm.fit(X_train, y_train)
            y_pred = gbm.predict(X_test)
            f1 = f1_score(y_test, y_pred, average='macro')
            scores.append([t, d, r, f1])
df_scores = pd.DataFrame(scores, columns=["n_estimators", "max_depth", "learning_rate", "score"])
df_scores.sort_values("score", ascending=False)

100%|██████████| 4/4 [00:34<00:00,  8.58s/it]


Unnamed: 0,n_estimators,max_depth,learning_rate,score
20,50,,0.80,0.853940
95,200,,0.80,0.853940
70,150,,0.80,0.853940
45,100,,0.80,0.853940
99,200,5.0,0.80,0.845659
...,...,...,...,...
28,100,4.0,0.01,0.365543
77,200,3.0,0.01,0.365543
2,50,3.0,0.01,0.364659
27,100,3.0,0.01,0.363680


## Gradient Boost Regressor

In [22]:
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

In [24]:
os.chdir("D://meridianthe4//PML//Cases//Concrete_Strength")
conc = pd.read_csv("Concrete_Data.csv")
X, y = conc.drop("Strength", axis=1), conc["Strength"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [28]:
trees = [50, 100, 150, 200]
rates = [0.01, 0.1, 0.2, 0.5, 0.8]
depths = [None, 2, 3, 4, 5]
scores = []
for t in tqdm(trees):
    for r in rates:
        for d in depths:
            gbm = GradientBoostingRegressor(n_estimators=t, learning_rate=r, max_depth=d, random_state=25, verbose=False)
            gbm.fit(X_train, y_train)
            y_pred = gbm.predict(X_test)
            mae = mean_absolute_error(y_test, y_pred)
            scores.append([t, d, r, mae])
df_scores = pd.DataFrame(scores, columns=["n_estimators", "max_depth", "learning_rate", "score"])
df_scores.sort_values("score", ascending=True)

100%|██████████| 4/4 [00:21<00:00,  5.31s/it]


Unnamed: 0,n_estimators,max_depth,learning_rate,score
89,200,5.0,0.20,2.952430
64,150,5.0,0.20,2.987907
39,100,5.0,0.20,3.049501
88,200,4.0,0.20,3.059607
63,150,4.0,0.20,3.082901
...,...,...,...,...
26,100,2.0,0.01,9.520785
4,50,5.0,0.01,9.588882
3,50,4.0,0.01,9.849491
2,50,3.0,0.01,10.365419
