In [1]:
import tensorflow as tf

# Check if a GPU is available
if tf.config.list_physical_devices('GPU'):
    print("GPU is available")
    gpu_device_name = tf.test.gpu_device_name()
    print(f"GPU Device: {gpu_device_name}")
    gpu_info = !nvidia-smi --query-gpu=gpu_name --format=csv,noheader
    print(f"GPU Model: {gpu_info[0]}")
else:
    print("GPU is not available")


!pip install catboost
!pip install optuna


GPU is available
GPU Device: /device:GPU:0
GPU Model: NVIDIA L4
Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.5
Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.svm import SVR
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.base import clone
from sklearn.metrics import mean_squared_error, cohen_kappa_score, make_scorer
import optuna

In [3]:
!unzip /content/processsed_feats.csv.zip

Archive:  /content/processsed_feats.csv.zip
  inflating: processsed_feats.csv    


In [4]:
features = pd.read_csv('/content/processsed_feats.csv')

In [5]:
features

Unnamed: 0.1,Unnamed: 0,essay_id,ts_count_sentence,ts_count_word,ts_count_paragraph,ts_count_symbol,ts_count_punctuation,ts_count_stop_words,ts_count_ner,ts_CARDINAL,...,tfidf_you do,tfidf_you get,tfidf_you have,tfidf_you have to,tfidf_you should,tfidf_you think,tfidf_you to,tfidf_you want,tfidf_you will,tfidf_you would
0,0,000d118,13,498,1,2677,31,287,27,2,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
1,1,000fe60,20,334,9,1669,35,233,12,1,...,0.00000,0.0,0.000000,0.0,0.162533,0.16986,0.0,0.0,0.000000,0.0
2,2,001ab80,25,551,7,3077,53,344,2,1,...,0.07913,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
3,3,001bdc0,21,444,9,2701,54,234,29,2,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
4,4,002ba53,16,373,11,2208,40,206,22,2,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17302,17302,ffd378d,9,156,5,856,12,88,2,0,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
17303,17303,ffddf1f,26,568,11,3354,62,309,14,2,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0
17304,17304,fff016d,15,215,5,1122,15,149,3,0,...,0.19060,0.0,0.164996,0.0,0.290851,0.00000,0.0,0.0,0.115735,0.0
17305,17305,fffb49b,11,229,1,1427,34,113,26,2,...,0.00000,0.0,0.000000,0.0,0.000000,0.00000,0.0,0.0,0.000000,0.0


In [6]:

required_columns = ['essay_id', 'word_12_cnt', 'word_14_cnt', 'word_len_max', 'word_len_std', 'sentence_150_cnt', 'sentence_250_cnt', 'sentence_word_cnt_max', 'sentence_len_min', 'paragraph_100_cnt', 'paragraph_200_cnt', 'paragraph_300_cnt', 'paragraph_400_cnt', 'paragraph_600_cnt', 'paragraph_len_max', 'paragraph_sentence_cnt_max', 'paragraph_sentence_cnt_min', 'dw_score', 'dc_score', 'ar_score', 'cl_score', 'gi_score', 'si_score', 'lw_score', 'pos_ADV', 'pos_PART', 'pos_SCONJ', 'pos_CCONJ', 'pos_PRON', 'pos_PROPN', 'pos_.', 'pos_,', 'average_polarity', 'average_subjectivity', 'average_hapax_dislegomena', 'tfidf_. this', 'tfidf_get to', 'tfidf_, the', 'tfidf_just a', 'tfidf_. with', 'tfidf_able to', 'tfidf_. one', 'tfidf_, and', 'tfidf_. i', 'tfidf_of a', 'tfidf_. you', 'tfidf_. i think', 'tfidf_. we', 'tfidf_there are', 'tfidf_not only', 'tfidf_. people', 'tfidf_that the', 'tfidf_the time', 'tfidf_way to', 'tfidf_the first', 'tfidf_. the', 'tfidf_is a', 'tfidf_to go', 'tfidf_and the', 'tfidf_on the', 'tfidf_in a', 'tfidf_in the', 'tfidf_to the', 'tfidf_of the', 'tfidf_. in', 'tfidf_be a', 'tfidf_on a', 'tfidf_it is', 'tfidf_to be', 'tfidf_it .', 'tfidf_with the', 'tfidf_is the', 'tfidf_for the', 'tfidf_would be', 'tfidf_for a', 'tfidf_this is', 'tfidf_the same', 'tfidf_. there', 'tfidf_it would', 'tfidf_they are', 'tfidf_there is', 'tfidf_, it', 'tfidf_that is', 'tfidf_that they', 'tfidf_and it', 'tfidf_it would be', 'tfidf_can be', 'tfidf_i think', 'tfidf_have a', 'tfidf_such as', 'tfa_noun_verb_diff', 'tfa_neutral_sentiment', 'tfa_noun_to_verb_ratio', 'tfa_avg_word_to_sentence_length_ratio', 'tfa_char_per_paragraph', 'tfa_flesch_smog_product', 'tfa_ttr', 'tfa_adjective_to_adverb_ratio', 'tfa_lexical_diversity', 'tfa_positive_negative_sentiment_diff', 'tfa_transition_to_sentence_ratio', 'tfa_complex_sentence_indicator', 'tfa_transition_to_paragraph_ratio', 'ts_count_ner', 'ts_PERCENT', 'ts_DATE', 'ts_ORG', 'ts_CARDINAL', 'ts_PERSON', 'ts_ORDINAL', 'ts_LOC', 'ts_mean_ner_sentence', 'ts_max_ner_per_sentence', 'ts_independent_clauses', 'ts_simple_sentence_count', 'ts_has_intro', 'ts_coherence', 'ts_JJR', 'ts_JJS', 'ts_MD', 'ts_PRP$', 'ts_RP', 'ts_UH', 'ts_VBD', 'ts_VBG', 'ts_VBN', 'ts_VBZ', 'ts_WDT', 'ts_WRB', 'ts_HYPH', 'ts__SP', 'ts_acomp', 'ts_acl', 'ts_appos', 'ts_attr', 'ts_ccomp', 'ts_compound', 'ts_csubj', 'ts_neg', 'ts_nmod', 'ts_nsubjpass', 'ts_oprd', 'ts_pcomp', 'ts_relcl', 'ts_xcomp']

features = features.loc[:, [col for col in required_columns if col in features.columns]]

In [7]:
features

Unnamed: 0,essay_id,word_12_cnt,word_14_cnt,word_len_max,word_len_std,sentence_150_cnt,sentence_250_cnt,sentence_word_cnt_max,sentence_len_min,paragraph_100_cnt,...,ts_ccomp,ts_compound,ts_csubj,ts_neg,ts_nmod,ts_nsubjpass,ts_oprd,ts_pcomp,ts_relcl,ts_xcomp
0,000d118,6,5,25,2.538495,7,3,126,36,1,...,19,22,1,6,3,7,2,3,12,6
1,000fe60,0,0,11,2.060968,2,0,48,26,5,...,11,3,1,6,0,1,0,2,10,2
2,001ab80,14,5,15,2.604621,7,0,46,58,4,...,11,8,1,12,2,2,0,6,12,13
3,001bdc0,9,1,17,2.767791,6,3,64,25,5,...,11,12,1,2,0,2,0,5,3,2
4,002ba53,9,2,14,2.861249,6,2,88,17,4,...,3,19,1,3,1,4,0,9,9,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17302,ffd378d,2,0,12,2.447906,1,0,36,39,3,...,5,7,0,1,0,0,0,1,1,2
17303,ffddf1f,3,0,12,2.483199,7,2,61,37,6,...,19,14,4,0,0,3,0,6,15,12
17304,fff016d,0,0,11,1.889665,0,0,23,28,3,...,4,1,0,5,0,0,0,0,7,2
17305,fffb49b,4,0,12,2.653395,3,0,38,63,1,...,3,7,2,0,1,2,1,1,4,0


In [8]:
existing_columns = features.columns

missing_columns = [col for col in required_columns if col not in existing_columns]

# Output missing columns
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    print("All required columns are present.")

All required columns are present.


In [9]:
features = features.drop(['essay_id'],axis=1)

In [10]:
labels = pd.read_csv('/content/y_train.csv')
labels = labels['score']

In [11]:
from sklearn.model_selection import train_test_split

# Split off a test set
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [12]:
def qwk_mse_score(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    qwk = cohen_kappa_score(y_true, y_pred.round().astype(int), weights='quadratic')
    return qwk - mse

def objective_catboost(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
        'depth': trial.suggest_int('depth', 4, 8),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'grow_policy': trial.suggest_categorical('grow_policy', ['SymmetricTree', 'Depthwise', 'Lossguide']),
        'task_type': 'GPU',
        'random_seed': 42
    }

    # Split data for hyperparameter tuning
    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    model = CatBoostRegressor(**params, verbose=0)
    model.fit(X_train_opt, y_train_opt, eval_set=(X_val_opt, y_val_opt), early_stopping_rounds=50, verbose=False)

    y_pred = model.predict(X_val_opt)
    score = qwk_mse_score(y_val_opt, y_pred)

    return -score  # Negative because Optuna minimizes

def optimize_model(objective, n_trials=30):
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials)
    return study.best_params

def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    cv_scores = cross_val_score(model, X_train, y_train, cv=3, scoring=lambda estimator, X, y: qwk_mse_score(y, estimator.predict(X)))

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    qwk = cohen_kappa_score(y_test, y_pred.round().astype(int), weights='quadratic')

    return np.mean(cv_scores), rmse, qwk

print("Optimizing CatBoost...")
best_params_catboost = optimize_model(objective_catboost, n_trials=40)
print("Best parameters for CatBoost:", best_params_catboost)

[I 2024-07-01 21:41:08,349] A new study created in memory with name: no-name-d0d1e8a9-a206-4b0b-9e90-7d008be9516b


Optimizing CatBoost...


[I 2024-07-01 21:41:09,522] Trial 0 finished with value: -0.33205147791899087 and parameters: {'iterations': 367, 'learning_rate': 0.13849089623599722, 'depth': 7, 'l2_leaf_reg': 0.002430316203334117, 'bagging_temperature': 0.6287486980131421, 'border_count': 90, 'grow_policy': 'Depthwise'}. Best is trial 0 with value: -0.33205147791899087.
[I 2024-07-01 21:41:11,422] Trial 1 finished with value: -0.36072856708280654 and parameters: {'iterations': 234, 'learning_rate': 0.08625340799378714, 'depth': 5, 'l2_leaf_reg': 0.01187829510538352, 'bagging_temperature': 0.6723701340070187, 'border_count': 215, 'grow_policy': 'Lossguide'}. Best is trial 1 with value: -0.36072856708280654.
[I 2024-07-01 21:41:13,841] Trial 2 finished with value: -0.3697636559879305 and parameters: {'iterations': 492, 'learning_rate': 0.03537170719397901, 'depth': 6, 'l2_leaf_reg': 0.00023849564710853462, 'bagging_temperature': 0.3526580852558845, 'border_count': 125, 'grow_policy': 'Depthwise'}. Best is trial 2 wit

Best parameters for CatBoost: {'iterations': 390, 'learning_rate': 0.051625965735031594, 'depth': 5, 'l2_leaf_reg': 1.4727238279939334, 'bagging_temperature': 0.5205213221018217, 'border_count': 119, 'grow_policy': 'Depthwise'}


In [13]:
catboost_model_op = CatBoostRegressor(**best_params_catboost, verbose=False)

# Unoptimized model
unop_params = {
    'loss_function': 'RMSE',
    'iterations': 500,
    'learning_rate': 0.03,
    'depth': 6,
    'l2_leaf_reg': 3,
    'random_seed': 42,
    'task_type': 'GPU'
}
catboost_model_unop = CatBoostRegressor(**unop_params, verbose=False)

# Train and evaluate both models
cv_score_unop, rmse_unop, qwk_unop = train_and_evaluate(catboost_model_unop, X_train, y_train, X_test, y_test)
cv_score_op, rmse_op, qwk_op = train_and_evaluate(catboost_model_op, X_train, y_train, X_test, y_test)

print(f"Unoptimized CatBoost - CV Score: {cv_score_unop:.4f}, RMSE: {rmse_unop:.4f}, QWK: {qwk_unop:.4f}")
print(f"Optimized CatBoost - CV Score: {cv_score_op:.4f}, RMSE: {rmse_op:.4f}, QWK: {qwk_op:.4f}")

Unoptimized CatBoost - CV Score: 0.3691, RMSE: 0.5973, QWK: 0.7665
Optimized CatBoost - CV Score: 0.3941, RMSE: 0.5816, QWK: 0.7792


In [14]:
best_params_catboost

{'iterations': 390,
 'learning_rate': 0.051625965735031594,
 'depth': 5,
 'l2_leaf_reg': 1.4727238279939334,
 'bagging_temperature': 0.5205213221018217,
 'border_count': 119,
 'grow_policy': 'Depthwise'}

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class OrdinalNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(OrdinalNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )
        self.num_labels = num_classes
        self.dist_matrix = torch.tensor([[abs(i - j) for j in range(num_classes)] for i in range(num_classes)], dtype=torch.float32)

    def forward(self, x):
        return self.layers(x)

def ordinal_log_loss(model, logits, labels):
    num_classes = model.num_labels
    dist_matrix = model.dist_matrix.to(logits.device)
    probas = F.softmax(logits, dim=1)

    true_labels = labels.unsqueeze(1).expand(-1, num_classes)
    label_ids = torch.arange(num_classes).expand(labels.size(0), -1).to(labels.device)

    distances = dist_matrix[true_labels, label_ids]

    err = -torch.log(1 - probas + 1e-7) * distances.pow(2)
    loss = err.sum(dim=1).mean()

    return loss

def objective_nn(trial):
    params = {
        'hidden_size': trial.suggest_int('hidden_size', 32, 256),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64, 128]),
        'epochs': trial.suggest_int('epochs', 50, 200),
    }

    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_opt)
    X_val_scaled = scaler.transform(X_val_opt)

    input_size = X_train.shape[1]
    num_classes = len(np.unique(y_train))

    model = OrdinalNN(input_size, params['hidden_size'], num_classes)
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

    for epoch in range(params['epochs']):
        model.train()
        for i in range(0, len(X_train_scaled), params['batch_size']):
            batch_X = torch.FloatTensor(X_train_scaled[i:i+params['batch_size']])
            batch_y = torch.LongTensor(y_train_opt[i:i+params['batch_size']])

            optimizer.zero_grad()
            logits = model(batch_X)
            loss = ordinal_log_loss(model, logits, batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        val_logits = model(torch.FloatTensor(X_val_scaled))
        val_probas = F.softmax(val_logits, dim=1)
        val_preds = val_probas.argmax(dim=1).numpy()

    return -qwk_mse_score(y_val_opt, val_preds)

# Update the train_and_evaluate function for neural network
def train_and_evaluate(model, X_train, y_train, X_test, y_test, is_nn=False):
    if is_nn:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        optimizer = optim.Adam(model.parameters(), lr=1e-3)  # You might want to use the optimized learning rate here

        # Train the model
        model.train()
        for epoch in range(100):  # You might want to use the optimized number of epochs here
            optimizer.zero_grad()
            logits = model(torch.FloatTensor(X_train_scaled))
            loss = ordinal_log_loss(model, logits, torch.LongTensor(y_train))
            loss.backward()
            optimizer.step()

        # Evaluate the model
        model.eval()
        with torch.no_grad():
            test_logits = model(torch.FloatTensor(X_test_scaled))
            test_probas = F.softmax(test_logits, dim=1)
            y_pred = test_probas.argmax(dim=1).numpy()
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    qwk = cohen_kappa_score(y_test, y_pred.astype(int), weights='quadratic')

    return rmse, qwk

In [None]:
import numpy as np
import pandas as pd
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, cohen_kappa_score
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.svm import SVR

# Assuming X_train, y_train, X_test, y_test are already defined

def qwk_mse_score(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    qwk = cohen_kappa_score(y_true, y_pred.round().astype(int), weights='quadratic')
    return qwk - mse

# Neural Network definition
class OrdinalNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(OrdinalNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes - 1)
        )

    def forward(self, x):
        return self.layers(x)

# Objective functions for each model
def objective_lgbm(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 20, 3000),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
    }

    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    model = LGBMRegressor(**params, n_estimators=1000, random_state=42)
    model.fit(X_train_opt, y_train_opt, eval_set=[(X_val_opt, y_val_opt)], early_stopping_rounds=100, verbose=False)

    y_pred = model.predict(X_val_opt)
    return -qwk_mse_score(y_val_opt, y_pred)

def objective_xgb(trial):
    params = {
        'max_depth': trial.suggest_int('max_depth', 1, 9),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
    }

    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    model = XGBRegressor(**params, random_state=42)
    model.fit(X_train_opt, y_train_opt, eval_set=[(X_val_opt, y_val_opt)], early_stopping_rounds=100, verbose=False)

    y_pred = model.predict(X_val_opt)
    return -qwk_mse_score(y_val_opt, y_pred)


def objective_nn(trial):
    params = {
        'hidden_size': trial.suggest_int('hidden_size', 32, 256),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64, 128]),
        'epochs': trial.suggest_int('epochs', 50, 200),
    }

    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_opt)
    X_val_scaled = scaler.transform(X_val_opt)

    input_size = X_train.shape[1]
    num_classes = len(np.unique(y_train))

    model = OrdinalNN(input_size, params['hidden_size'], num_classes)
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

    for epoch in range(params['epochs']):
        model.train()
        for i in range(0, len(X_train_scaled), params['batch_size']):
            batch_X = torch.FloatTensor(X_train_scaled[i:i+params['batch_size']])
            batch_y = torch.LongTensor(y_train_opt[i:i+params['batch_size']])

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = nn.BCEWithLogitsLoss()(outputs, batch_y.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(torch.FloatTensor(X_val_scaled))
        val_preds = torch.sigmoid(val_outputs).numpy()

    return -qwk_mse_score(y_val_opt, val_preds)

def objective_svr(trial):
    params = {
        'C': trial.suggest_float('C', 1e-2, 100, log=True),
        'epsilon': trial.suggest_float('epsilon', 1e-3, 1.0, log=True),
        'gamma': trial.suggest_float('gamma', 1e-3, 1.0, log=True),
    }

    X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_opt)
    X_val_scaled = scaler.transform(X_val_opt)

    model = SVR(**params)
    model.fit(X_train_scaled, y_train_opt)

    y_pred = model.predict(X_val_scaled)
    return -qwk_mse_score(y_val_opt, y_pred)

def optimize_model(objective, n_trials=100):
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials)
    return study.best_params

def train_and_evaluate(model, X_train, y_train, X_test, y_test, is_nn=False):
    if is_nn:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model.eval()
        with torch.no_grad():
            y_pred = torch.sigmoid(model(torch.FloatTensor(X_test_scaled))).numpy()
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    qwk = cohen_kappa_score(y_test, y_pred.round().astype(int), weights='quadratic')

    return rmse, qwk

# Optimize and evaluate each model
models = ['LightGBM', 'XGBoost', 'CatBoost', 'Neural Network', 'SVR']
objectives = [objective_lgbm, objective_xgb, objective_catboost, objective_nn, objective_svr]

for model_name, objective in zip(models, objectives):
    print(f"\nOptimizing {model_name}...")
    best_params = optimize_model(objective, n_trials=100)
    print(f"Best parameters for {model_name}:", best_params)

    if model_name == 'LightGBM':
        model = LGBMRegressor(**best_params, random_state=42)
    elif model_name == 'XGBoost':
        model = XGBRegressor(**best_params, random_state=42)
    elif model_name == 'CatBoost':
        model = CatBoostRegressor(**best_params, random_state=42)
    elif model_name == 'Neural Network':
        input_size = X_train.shape[1]
        num_classes = len(np.unique(y_train))
        model = OrdinalNN(input_size, best_params['hidden_size'], num_classes)
    else:  # SVR
        model = SVR(**best_params)

    rmse, qwk = train_and_evaluate(model, X_train, y_train, X_test, y_test, is_nn=(model_name == 'Neural Network'))
    print(f"Optimized {model_name} - RMSE: {rmse:.4f}, QWK: {qwk:.4f}")



In [1]:
######XGBOOST HYPERPARAMETER TUNING#######
def objective(trial):
  n_estimators = trial.suggest_int('n_estimators',200,600)
  max_depth = trial.suggest_int('max_depth',2,10)
  max_leaves = trial.suggest_int('max_leaves',2,10)
  learning_rate = trial.suggest_int('learning_rate',2,10)
  verbosity = 3


SyntaxError: incomplete input (<ipython-input-1-5dbc68967b42>, line 2)