In [1]:
import pandas as pd
import numpy as np
import os
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV


In [3]:
# filepath = "merged_u1-50_w001_resampled.csv"
filepath_train = "D:/online dataset csv files preprocessing/MEAN_SD_FRAMED_RESAMPLED_merged_u1-50_w001_resampled_differentFrames6_2_NewSplitManual_TRAIN.csv"
filepath_test = "D:/online dataset csv files preprocessing/MEAN_SD_FRAMED_RESAMPLED_merged_u1-50_w001_resampled_differentFrames6_2_NewSplitManual_TEST.csv"


df_original_train = pd.read_csv(filepath_train)
df_original_test = pd.read_csv(filepath_test)

In [4]:
df_original_train.shape, df_original_test.shape

((7059, 13), (2949, 13))

In [5]:
X_train_val = df_original_train.drop('name', axis=1).copy()  # Features (all columns except the target)
y_train_val = df_original_train['name'].copy()  # Labels

X_test = df_original_test.drop('name', axis=1).copy()  # Features (all columns except the target)
y_test = df_original_test['name'].copy()  # Labels

In [6]:
y_train_val = y_train_val -1
y_test = y_test -1

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.1, random_state = 0, stratify = y_train_val)
X_train.shape, X_test.shape, X_val.shape, y_train.shape, y_test.shape, y_val.shape

((6353, 12), (2949, 12), (706, 12), (6353,), (2949,), (706,))

In [8]:
# X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0, stratify = y)

# X_train, X_validation, y_train, y_validation = train_test_split(X_train_val, y_train_val, test_size = 0.1, random_state = 0, stratify = y_train_val)
# X_train.shape, X_test.shape, X_validation.shape, y_train.shape, y_test.shape, y_validation.shape

In [9]:
num_classes = len(np.unique(y_train))

In [10]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
dval = xgb.DMatrix(X_val, label=y_val)

In [11]:
dtrain, dtest, dval

(<xgboost.core.DMatrix at 0x2717023b4c0>,
 <xgboost.core.DMatrix at 0x2717023a770>,
 <xgboost.core.DMatrix at 0x2717023bd30>)

In [75]:
params = {
    'max_depth': 7,
    'learning_rate': 0.3,
    'objective': 'multi:softmax',
    'num_class': num_classes
}
epochs = 25
evals_result = {}  # Initialize an empty dictionary to store the evaluation results

In [76]:
model = xgb.train(params, dtrain, epochs,  verbose_eval=1)

In [77]:
model = xgb.train(params, dtrain, epochs, evals=[(dtrain, 'train'), (dval, 'valid')],
                  evals_result=evals_result, verbose_eval=1)

[0]	train-mlogloss:0.66475	valid-mlogloss:0.83132
[1]	train-mlogloss:0.50659	valid-mlogloss:0.70953
[2]	train-mlogloss:0.40238	valid-mlogloss:0.62727
[3]	train-mlogloss:0.32942	valid-mlogloss:0.56560
[4]	train-mlogloss:0.27580	valid-mlogloss:0.52221
[5]	train-mlogloss:0.23552	valid-mlogloss:0.48363
[6]	train-mlogloss:0.20439	valid-mlogloss:0.45560
[7]	train-mlogloss:0.18021	valid-mlogloss:0.43537
[8]	train-mlogloss:0.16120	valid-mlogloss:0.41606
[9]	train-mlogloss:0.14640	valid-mlogloss:0.40112
[10]	train-mlogloss:0.13445	valid-mlogloss:0.39123
[11]	train-mlogloss:0.12507	valid-mlogloss:0.38198
[12]	train-mlogloss:0.11752	valid-mlogloss:0.37569
[13]	train-mlogloss:0.11142	valid-mlogloss:0.37169
[14]	train-mlogloss:0.10648	valid-mlogloss:0.36903
[15]	train-mlogloss:0.10245	valid-mlogloss:0.36670
[16]	train-mlogloss:0.09924	valid-mlogloss:0.36547
[17]	train-mlogloss:0.09649	valid-mlogloss:0.36394
[18]	train-mlogloss:0.09426	valid-mlogloss:0.36338
[19]	train-mlogloss:0.09235	valid-mloglos

In [78]:
predictions = model.predict(dtest)

In [79]:
print(predictions)

[23. 17. 17. ... 48. 10. 19.]


In [80]:
accuracy_score(y_test, predictions)

0.8501186842997627

In [18]:
xgb_model = xgb.XGBClassifier()

In [19]:
param_grid = {
    'max_depth': [1, 3, 5 ],
    'learning_rate': [0.1, 0.05, 0.01],
    'objective': ['multi:softmax'],
    'num_class': [num_classes] 
}


In [20]:
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)  # X and y are your training data


In [21]:
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
best_score = grid_search.best_score_

In [22]:
best_params

{'learning_rate': 0.1,
 'max_depth': 1,
 'num_class': 50,
 'objective': 'multi:softmax'}

In [23]:
best_score

0.8835198275274598

In [24]:
y_pred = best_model.predict(X_test)

# Evaluate the predictions
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.8531705662936588

In [25]:
#2:24 0.2 , 3

In [26]:
# xgb_model2 = xgb.XGBClassifier()

In [27]:
# param_grid2 = {
#     'max_depth': [7, 9, 11],
#     'learning_rate': [ 0.3, 0.4, 0.5],
#     'n_estimators': [ 300, 400, 500],
#     'objective': ['multi:softmax'],
#     'num_class': [50] 
# }

In [28]:
# grid_search2 = GridSearchCV(estimator=xgb_model2, param_grid=param_grid2, cv=5)
# grid_search2.fit(X_train, y_train)  # X and y are your training data


In [29]:
# best_params2 = grid_search2.best_params_
# best_model2 = grid_search2.best_estimator_
# best_score2 = grid_search2.best_score_

In [30]:
# best_params2

In [31]:
# best_score2

In [32]:
# y_pred2 = best_model2.predict(X_test)

# # Evaluate the predictions
# accuracy2 = accuracy_score(y_test, y_pred2)
# accuracy2