Data preparation

In [None]:
import pandas as pd

df = pd.read_csv("/content/merged_final.csv")
X = df[['File:', 'Blockiness:', 'SA:', 'Blockloss:', 'Blur:', 'TA:',
       'Exposure(bri):', 'Contrast:', 'Noise:', 'Slice:', 'Flickering:']]
Y = df[['Tag:']]
for index, row in Y.iterrows():
  if row['Tag:']=='P':
    row['Tag:'] = int(1)
  elif row['Tag:'] =='U':
    row["Tag:"]= int(0)
X.drop('File:', inplace=True, axis=1)

Y = Y.reset_index()
X = X.reset_index()

Y.drop('index', inplace=True, axis=1)
X.drop('index', inplace=True, axis=1)
X=(X-X.mean())/X.std()
Y = Y['Tag:'].astype(str).astype(int)

Parameters modeling

In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
param_grid = {
    "max_depth": [10,11,12],
    "scale_pos_weight": [1,1.2,1.3],
    "learning_rate": [0.22,0.3,0.35],
    "reg_lambda": [0.5],
    "colsample_bytree": [0.4,0.5,0.6],
    "gamma": [0.2,0.3,0.4],
}
xgb_cl = xgb.XGBClassifier(objective="binary:logistic")
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=3, scoring="roc_auc")
_ = grid_cv.fit(X, Y)

In [None]:
grid_cv.best_score_

In [None]:
grid_cv.best_params_

Model testing, random data split

In [None]:
from sklearn.metrics import roc_auc_score
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
import random

randomlist = random.sample(range(1, 1000), 10)

param_grid = {
    "max_depth": [10],
    "scale_pos_weight": [1.2],
    "learning_rate": [0.22],
    "reg_lambda": [0.5],
    "colsample_bytree": [0.7],
    "gamma": [0.3],
}

xgb_cl = xgb.XGBClassifier(objective="binary:logistic")
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=3, scoring="roc_auc")
_ = grid_cv.fit(X, Y)

final_cl = xgb.XGBClassifier(
    **grid_cv.best_params_,
    objective="binary:logistic",

)

accuracy_list = []
for i in randomlist:
  X_train, X_test, y_train, y_test = train_test_split(
      X, Y, test_size=0.2, random_state=i
  )
  _ = final_cl.fit(X_train, y_train)

  preds = final_cl.predict(X_test)
  accuracy = accuracy_score(y_test, preds)
  accuracy_list.append(accuracy)

print(np.mean(accuracy_list))
print(accuracy_list)

Saving model 

In [None]:
from sklearn.metrics import roc_auc_score
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
import random
import pickle

randomlist = random.sample(range(1, 1000), 10)

param_grid = {
    "max_depth": [10],
    "scale_pos_weight": [0.7],
    "learning_rate": [0.12],
    "reg_lambda": [0.7],
    "colsample_bytree": [0.9],
    "gamma": [0.3],
}

xgb_cl = xgb.XGBClassifier(objective="binary:logistic")
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=3, scoring="roc_auc")
_ = grid_cv.fit(X, Y)

final_cl = xgb.XGBClassifier(
    **grid_cv.best_params_,
    objective="binary:logistic",

)

accuracy_list = []
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=1
)
_ = final_cl.fit(X, Y)
# config path
pickle.dump(final_cl, open("/content/12k_all_set_2f.json", 'wb'))
preds = final_cl.predict(X_test)
accuracy = accuracy_score(y_test, preds)
print(accuracy)