In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
import xgboost as xgb

In [None]:
test = pd.read_csv('test.csv')
feat = test.columns
train = pd.read_csv('train.csv')
train = train.loc[~train.sii.isna()]
object_columns = train.select_dtypes(include=['object']).columns

# Create a dictionary mapping object columns to 'category' dtype
dtype_dict = {col: 'category' for col in object_columns}

# Convert object columns to categorical
train = train.astype(dtype_dict)
train.dtypes

In [None]:
# Assuming you have your data in X (features) and y (target)
X = train[feat]
y = train['sii']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define quadratic weighted kappa objective function
def qwk_objective(y_true, y_pred):
    y_pred = y_pred.reshape(len(y_pred), -1)
    y_pred = np.argmax(y_pred, axis=1)
    kappa = cohen_kappa_score(y_true, y_pred, weights='quadratic')
    return 'qwk', -kappa  # Negative because XGBoost minimizes the objective

# Define evaluation metric
def qwk_metric(y_true, y_pred):
    y_pred = y_pred.reshape(len(y_pred), -1)
    y_pred = np.argmax(y_pred, axis=1)
    return 'qwk', cohen_kappa_score(y_true, y_pred, weights='quadratic')

# Convert data to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train,enable_categorical=True)
dtest = xgb.DMatrix(X_test, label=y_test,enable_categorical=True)

# Set XGBoost parameters
params = {
    'max_depth': 3,
    'eta': 0.1,
    'subsample':0.2,
    'objective': 'multi:softprob',#qwk_objective,
    'num_class': len(np.unique(y)),  # number of classes
    'eval_metric': 'mlogloss'#qwk_metric
}

# Train the model
num_rounds = 200
watchlist = [(dtrain, 'train'), (dtest, 'test')]
model = xgb.train(params, dtrain, num_rounds, watchlist, early_stopping_rounds=50, verbose_eval=2)

# Make predictions
y_train_pred = model.predict(dtrain)
y_test_pred = model.predict(dtest)

# Convert predictions to class labels
y_train_pred = np.argmax(y_train_pred, axis=1)
y_test_pred = np.argmax(y_test_pred, axis=1)

# Evaluate final performance
train_qwk = cohen_kappa_score(y_train, y_train_pred, weights='quadratic')
test_qwk = cohen_kappa_score(y_test, y_test_pred, weights='quadratic')

print(f"Final Train QWK: {train_qwk:.4f}")
print(f"Final Test QWK: {test_qwk:.4f}")