In [72]:
import DataProcess
from scipy.stats import norm
import math
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.optim import Adam
import optuna
from sklearn.model_selection import train_test_split
EPOCHES = 5
BATCH_SIZE = 500
NUM_OF_LAYERS = 3

In [56]:
class Model(nn.Module):
    in_features = 136
    def __init__(self, params):
        super(Model, self).__init__()
        self.layer1_linear = nn.Linear(self.in_features, params['n1'])
        self.layer1_activation = nn.ReLU()
        self.layer2_linear = nn.Linear(params['n1'], params['n2'])
        self.layer2_activation = nn.ReLU()
        self.layer3_linear = nn.Linear(params['n2'], 1)
        self.layer3_activation = nn.Sigmoid()
    def forward(self, x):
        x = self.layer1_linear(x)
        x = self.layer1_activation(x)
        x = self.layer2_linear(x)
        x = self.layer2_activation(x)
        x = self.layer3_linear(x)
        x = self.layer3_activation(x)
        return x

In [3]:
q = DataProcess.getQuery("D:\\MSLR-WEB10K\Fold1\\train.txt",10)
test_set = DataProcess.getQuery("D:\\MSLR-WEB10K\Fold1\\test.txt",10)

In [85]:
def train_and_evaluate(params):
    model = Model(params)
    model.layer1_linear.requires_grad_(requires_grad=True)
    model.layer2_linear.requires_grad_(requires_grad=True)
    model.layer3_linear.requires_grad_(requires_grad=True)
    opt = getattr(optim, params['optimizer'])(model.parameters(), lr= params['learning_rate'])
    #train
    for i in range(100):
        PI = q[i].claculate_pi()
        P = q[i].prob_all(PI)
        dG_ds = q[i].G_derivative_by_score(PI, P)

        # model predictions
        score = []
        dG_dw = []
        for param in model.parameters():
            dG_dw.append(torch.zeros(param.shape))
        for idx,x in enumerate(q[i].documents):
            temp = model.forward(x.feature)
            score.append(temp)
            temp.backward()
            for idx1,param in enumerate(model.parameters()):
                dG_dw[idx1] += param.grad*dG_ds[idx]/10
                param = torch.zeros(param.shape)
        for idx,param in enumerate(model.parameters()):
            param.grad = dG_dw[idx]
        q[i].score = score
        # loss

        # gradient
        # Update Parameters
        opt.zero_grad()
        opt.step()
        model.layer1_linear.weight.grad.zero_()
        model.layer1_linear.bias.grad.zero_()
        model.layer2_linear.weight.grad.zero_()
        model.layer2_linear.bias.grad.zero_()
        model.layer3_linear.weight.grad.zero_()
        model.layer3_linear.bias.grad.zero_()
    #test
    res = []
    with torch.no_grad():
        for i in range(100):
            # model predictions
            score = []
            for x in test_set[i].documents:
                temp = model.forward(x.feature)
                score.append(temp)
            test_set[i].score = score
            res.append(test_set[i].NDCG())

    print(f"Mean of Test set NDCG :{np.mean(res)*100} %")
    return np.mean(res)

In [86]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-7, 1e-1),
        'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]),
        'n1': trial.suggest_int("n_unit", 50, 70),
        'n2': trial.suggest_int("n_unit",50,70)
    }

    meanNDCG = train_and_evaluate(params)

    return meanNDCG

In [87]:
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=30)

[32m[I 2022-12-29 01:12:49,114][0m A new study created in memory with name: no-name-e1c9ae15-ecfe-4b3d-a664-de5d82489b37[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-7, 1e-1),
[32m[I 2022-12-29 01:12:58,672][0m Trial 0 finished with value: 0.8141693536545379 and parameters: {'learning_rate': 0.00030289256042013166, 'optimizer': 'Adam', 'n_unit': 60}. Best is trial 0 with value: 0.8141693536545379.[0m


Mean of Test set NDCG :0.8141693536545379 %


[32m[I 2022-12-29 01:13:08,139][0m Trial 1 finished with value: 0.833345742360745 and parameters: {'learning_rate': 1.8616864444296277e-06, 'optimizer': 'RMSprop', 'n_unit': 61}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.833345742360745 %


[32m[I 2022-12-29 01:13:18,510][0m Trial 2 finished with value: 0.8124454388763508 and parameters: {'learning_rate': 0.024133429242262857, 'optimizer': 'Adam', 'n_unit': 56}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.8124454388763508 %


[32m[I 2022-12-29 01:13:27,731][0m Trial 3 finished with value: 0.81491832509823 and parameters: {'learning_rate': 0.005254358776641404, 'optimizer': 'Adam', 'n_unit': 58}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.81491832509823 %


[32m[I 2022-12-29 01:13:37,105][0m Trial 4 finished with value: 0.7946705284673634 and parameters: {'learning_rate': 0.00016567800572475508, 'optimizer': 'SGD', 'n_unit': 65}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.7946705284673634 %


[32m[I 2022-12-29 01:13:46,824][0m Trial 5 finished with value: 0.791019973968684 and parameters: {'learning_rate': 0.0029020664925183868, 'optimizer': 'RMSprop', 'n_unit': 51}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.791019973968684 %


[32m[I 2022-12-29 01:13:56,123][0m Trial 6 finished with value: 0.8206524543473183 and parameters: {'learning_rate': 0.06051005334820072, 'optimizer': 'Adam', 'n_unit': 53}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.8206524543473183 %


[32m[I 2022-12-29 01:14:05,542][0m Trial 7 finished with value: 0.799101240317712 and parameters: {'learning_rate': 0.007736001446549905, 'optimizer': 'Adam', 'n_unit': 52}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.799101240317712 %


[32m[I 2022-12-29 01:14:14,848][0m Trial 8 finished with value: 0.7873272102355467 and parameters: {'learning_rate': 0.00045363910550669565, 'optimizer': 'SGD', 'n_unit': 52}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.7873272102355467 %


[32m[I 2022-12-29 01:14:24,094][0m Trial 9 finished with value: 0.8106370348765505 and parameters: {'learning_rate': 4.066472273560783e-05, 'optimizer': 'RMSprop', 'n_unit': 64}. Best is trial 1 with value: 0.833345742360745.[0m


Mean of Test set NDCG :0.8106370348765505 %


[32m[I 2022-12-29 01:14:33,272][0m Trial 10 finished with value: 0.841736078220058 and parameters: {'learning_rate': 6.591195057359098e-07, 'optimizer': 'RMSprop', 'n_unit': 69}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.841736078220058 %


[32m[I 2022-12-29 01:14:42,592][0m Trial 11 finished with value: 0.8115218181623689 and parameters: {'learning_rate': 3.9090277300794426e-07, 'optimizer': 'RMSprop', 'n_unit': 68}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8115218181623689 %


[32m[I 2022-12-29 01:14:52,223][0m Trial 12 finished with value: 0.833080851949549 and parameters: {'learning_rate': 4.2731968890281427e-07, 'optimizer': 'RMSprop', 'n_unit': 70}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.833080851949549 %


[32m[I 2022-12-29 01:15:02,085][0m Trial 13 finished with value: 0.8145691300881316 and parameters: {'learning_rate': 4.828429176721745e-06, 'optimizer': 'RMSprop', 'n_unit': 63}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8145691300881316 %


[32m[I 2022-12-29 01:15:12,040][0m Trial 14 finished with value: 0.8156282317173098 and parameters: {'learning_rate': 5.038734530042507e-06, 'optimizer': 'RMSprop', 'n_unit': 67}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8156282317173098 %


[32m[I 2022-12-29 01:15:21,244][0m Trial 15 finished with value: 0.8107903239469618 and parameters: {'learning_rate': 1.3357703725842462e-07, 'optimizer': 'RMSprop', 'n_unit': 61}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8107903239469618 %


[32m[I 2022-12-29 01:15:30,398][0m Trial 16 finished with value: 0.815967237245575 and parameters: {'learning_rate': 4.016073151801102e-06, 'optimizer': 'RMSprop', 'n_unit': 55}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.815967237245575 %


[32m[I 2022-12-29 01:15:39,543][0m Trial 17 finished with value: 0.8292362816425828 and parameters: {'learning_rate': 2.4917826694262927e-05, 'optimizer': 'SGD', 'n_unit': 70}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8292362816425828 %


[32m[I 2022-12-29 01:15:48,662][0m Trial 18 finished with value: 0.8102354661750311 and parameters: {'learning_rate': 7.419530500860646e-07, 'optimizer': 'RMSprop', 'n_unit': 61}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8102354661750311 %


[32m[I 2022-12-29 01:15:58,250][0m Trial 19 finished with value: 0.8166764729724331 and parameters: {'learning_rate': 2.0380376612144293e-06, 'optimizer': 'RMSprop', 'n_unit': 66}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8166764729724331 %


[32m[I 2022-12-29 01:16:07,519][0m Trial 20 finished with value: 0.8034594973518145 and parameters: {'learning_rate': 1.1841953236118073e-07, 'optimizer': 'SGD', 'n_unit': 58}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8034594973518145 %


[32m[I 2022-12-29 01:16:17,098][0m Trial 21 finished with value: 0.8245236697752988 and parameters: {'learning_rate': 8.191944723190583e-07, 'optimizer': 'RMSprop', 'n_unit': 70}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8245236697752988 %


[32m[I 2022-12-29 01:16:26,342][0m Trial 22 finished with value: 0.812469715606299 and parameters: {'learning_rate': 1.2640335943368407e-05, 'optimizer': 'RMSprop', 'n_unit': 68}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.812469715606299 %


[32m[I 2022-12-29 01:16:35,515][0m Trial 23 finished with value: 0.8155266731473242 and parameters: {'learning_rate': 5.241141853030843e-07, 'optimizer': 'RMSprop', 'n_unit': 70}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8155266731473242 %


[32m[I 2022-12-29 01:16:44,682][0m Trial 24 finished with value: 0.8130749515070357 and parameters: {'learning_rate': 1.5591911250765889e-06, 'optimizer': 'RMSprop', 'n_unit': 63}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8130749515070357 %


[32m[I 2022-12-29 01:16:54,194][0m Trial 25 finished with value: 0.8035639770545899 and parameters: {'learning_rate': 2.3998516560973604e-07, 'optimizer': 'RMSprop', 'n_unit': 68}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8035639770545899 %


[32m[I 2022-12-29 01:17:03,770][0m Trial 26 finished with value: 0.8105695075514627 and parameters: {'learning_rate': 1.354573750481175e-05, 'optimizer': 'RMSprop', 'n_unit': 66}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8105695075514627 %


[32m[I 2022-12-29 01:17:13,058][0m Trial 27 finished with value: 0.7958913819020033 and parameters: {'learning_rate': 1.851495466517629e-06, 'optimizer': 'RMSprop', 'n_unit': 69}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.7958913819020033 %


[32m[I 2022-12-29 01:17:22,435][0m Trial 28 finished with value: 0.8354519289150467 and parameters: {'learning_rate': 7.075181851800666e-05, 'optimizer': 'RMSprop', 'n_unit': 65}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.8354519289150467 %


[32m[I 2022-12-29 01:17:31,573][0m Trial 29 finished with value: 0.792123049194 and parameters: {'learning_rate': 0.0006981896481138222, 'optimizer': 'SGD', 'n_unit': 62}. Best is trial 10 with value: 0.841736078220058.[0m


Mean of Test set NDCG :0.792123049194 %


In [88]:
best_trial = study.best_trial

for key, value in best_trial.params.items():
    print("{}: {}".format(key, value))

learning_rate: 6.591195057359098e-07
optimizer: RMSprop
n_unit: 69


In [91]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_learning_rate,params_n_unit,params_optimizer,state
0,0,0.814169,2022-12-29 01:12:49.116209,2022-12-29 01:12:58.671573,0 days 00:00:09.555364,0.0003028926,60,Adam,COMPLETE
1,1,0.833346,2022-12-29 01:12:58.672574,2022-12-29 01:13:08.139024,0 days 00:00:09.466450,1.861686e-06,61,RMSprop,COMPLETE
2,2,0.812445,2022-12-29 01:13:08.140027,2022-12-29 01:13:18.509560,0 days 00:00:10.369533,0.02413343,56,Adam,COMPLETE
3,3,0.814918,2022-12-29 01:13:18.510523,2022-12-29 01:13:27.730852,0 days 00:00:09.220329,0.005254359,58,Adam,COMPLETE
4,4,0.794671,2022-12-29 01:13:27.731793,2022-12-29 01:13:37.104012,0 days 00:00:09.372219,0.000165678,65,SGD,COMPLETE
5,5,0.79102,2022-12-29 01:13:37.105013,2022-12-29 01:13:46.823105,0 days 00:00:09.718092,0.002902066,51,RMSprop,COMPLETE
6,6,0.820652,2022-12-29 01:13:46.824103,2022-12-29 01:13:56.123332,0 days 00:00:09.299229,0.06051005,53,Adam,COMPLETE
7,7,0.799101,2022-12-29 01:13:56.124346,2022-12-29 01:14:05.541104,0 days 00:00:09.416758,0.007736001,52,Adam,COMPLETE
8,8,0.787327,2022-12-29 01:14:05.542058,2022-12-29 01:14:14.848765,0 days 00:00:09.306707,0.0004536391,52,SGD,COMPLETE
9,9,0.810637,2022-12-29 01:14:14.849787,2022-12-29 01:14:24.094967,0 days 00:00:09.245180,4.066472e-05,64,RMSprop,COMPLETE
