# template

In [1]:
## template
import math

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler

from typing import List


class ListNet(torch.nn.Module):
    def __init__(self, num_input_features: int, hidden_dim: int):
        super().__init__()
        self.hidden_dim = hidden_dim
        # укажите архитектуру простой модели здесь
        self.model = None

    def forward(self, input_1: torch.Tensor) -> torch.Tensor:
        logits = self.model(input_1)
        return logits


class Solution:
    def __init__(self, n_epochs: int = 5, listnet_hidden_dim: int = 30,
                 lr: float = 0.001, ndcg_top_k: int = 10):
        self._prepare_data()
        self.num_input_features = self.X_train.shape[1]
        self.ndcg_top_k = ndcg_top_k
        self.n_epochs = n_epochs

        self.model = self._create_model(
            self.num_input_features, listnet_hidden_dim)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    def _get_data(self) -> List[np.ndarray]:
        train_df, test_df = msrank_10k()

        X_train = train_df.drop([0, 1], axis=1).values
        y_train = train_df[0].values
        query_ids_train = train_df[1].values.astype(int)

        X_test = test_df.drop([0, 1], axis=1).values
        y_test = test_df[0].values
        query_ids_test = test_df[1].values.astype(int)

        return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

    def _prepare_data(self) -> None:
        (X_train, y_train, self.query_ids_train,
            X_test, y_test, self.query_ids_test) = self._get_data()
        # допишите ваш код здесь
        pass

    def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
                                        inp_query_ids: np.ndarray) -> np.ndarray:
        # допишите ваш код здесь
        pass

    def _create_model(self, listnet_num_input_features: int,
                      listnet_hidden_dim: int) -> torch.nn.Module:
        torch.manual_seed(0)
        # допишите ваш код здесь
        net = ...
        return net

    def fit(self) -> List[float]:
        # допишите ваш код здесь
        pass

    def _calc_loss(self, batch_ys: torch.FloatTensor,
                   batch_pred: torch.FloatTensor) -> torch.FloatTensor:
        # допишите ваш код здесь
        pass

    def _train_one_epoch(self) -> None:
        self.model.train()
        # допишите ваш код здесь
        pass

    def _eval_test_set(self) -> float:
        with torch.no_grad():
            self.model.eval()
            ndcgs = []
            # допишите ваш код здесь
            return np.mean(ndcgs)

    def _ndcg_k(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
                ndcg_top_k: int) -> float:
        # допишите ваш код здесь
        pass


# code to send

In [282]:
## template
import math

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler

from typing import List


class ListNet(torch.nn.Module):
    def __init__(self, num_input_features: int, hidden_dim: int):
        super().__init__()
        self.hidden_dim = hidden_dim
        # укажите архитектуру простой модели здесь
        self.model = torch.nn.Sequential(
            torch.nn.Linear(num_input_features, self.hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(self.hidden_dim, 1),
        )
        self.out_activation = torch.nn.Sigmoid()

    def forward(self, input_1: torch.Tensor) -> torch.Tensor:
        logits = self.model(input_1)
        return logits
    
class Solution:
    def __init__(self, n_epochs: int = 5, listnet_hidden_dim: int = 30,
                 lr: float = 0.001, ndcg_top_k: int = 10):
        self._prepare_data()
        self.num_input_features = self.X_train.shape[1]
        self.ndcg_top_k = ndcg_top_k
        self.n_epochs = n_epochs

        self.model = self._create_model(
            self.num_input_features, listnet_hidden_dim)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    def _get_data(self) -> List[np.ndarray]:
        train_df, test_df = msrank_10k()

        X_train = train_df.drop([0, 1], axis=1).values
        y_train = train_df[0].values
        query_ids_train = train_df[1].values.astype(int)

        X_test = test_df.drop([0, 1], axis=1).values
        y_test = test_df[0].values
        query_ids_test = test_df[1].values.astype(int)

        return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

    def _prepare_data(self) -> None:
        (X_train, y_train, self.query_ids_train,
            X_test, y_test, self.query_ids_test) = self._get_data()
        
        self.X_train = torch.FloatTensor(self._scale_features_in_query_groups(
            inp_feat_array=X_train, inp_query_ids=self.query_ids_train))
        self.ys_train = torch.FloatTensor(y_train)
        
        
        self.X_test = torch.FloatTensor(self._scale_features_in_query_groups(
            inp_feat_array=X_test, inp_query_ids=self.query_ids_test))
        self.ys_test = torch.FloatTensor(y_test)

    def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
                                    inp_query_ids: np.ndarray) -> np.ndarray:
    
        inp_query_ids_uniq = np.unique(inp_query_ids)

        for id_i in inp_query_ids_uniq:
            scaler = StandardScaler()
            inp_feat_array[inp_query_ids == id_i, :] = \
                scaler.fit_transform(inp_feat_array[inp_query_ids == id_i, :])

        return inp_feat_array

    def _create_model(self, listnet_num_input_features: int,
                      listnet_hidden_dim: int) -> torch.nn.Module:
        torch.manual_seed(0)
        # допишите ваш код здесь
        net = ListNet(num_input_features=listnet_num_input_features,\
                      hidden_dim=listnet_hidden_dim)
        return net

    
    def fit(self) -> List[float]:
        # допишите ваш код здесь
#         self.model.train()
        ndcg_k = []
        for epoche in range(5):
            self._train_one_epoch()
            ndcg_k.append(self._eval_test_set())
#             print('epoche {} finished'.format(epoche + 1))
#             print('mean test nDCG top 10 score {}'.format(self._eval_test_set()))
        return ndcg_k
    
    def _calc_loss(self, batch_ys: torch.FloatTensor,
                   batch_pred: torch.FloatTensor) -> torch.FloatTensor:
        # допишите ваш код здесь
        #         def listnet_ce_loss(y_i, z_i):
        #     """
        #     y_i: (n_i, 1) GT
        #     z_i: (n_i, 1) preds
        #     """

        P_y_i = torch.softmax(batch_ys, dim=0)
        P_z_i = torch.softmax(batch_pred, dim=0)
        
#         return -torch.sum(P_y_i * torch.log(P_z_i))
        return -torch.sum(P_y_i * torch.log(P_z_i/P_y_i))

    def _train_one_epoch(self) -> None:
        # допишите ваш код здесь
        self.model.train()
        ids_uniq = np.unique(self.query_ids_train)
#         ids_uniq_rand = ids_uniq
        ids_uniq_rand = ids_uniq[torch.randperm(len(ids_uniq))]
        
        for n_id in ids_uniq_rand:
        
            X_train_id = self.X_train[self.query_ids_train == n_id]
            ys_train_id = self.ys_train[self.query_ids_train == n_id]
            
#             N_train = np.shape(X_train_id)[0]
#             idx_rand = torch.randperm(N_train)
#             X_train_id = X_train_id[idx_rand]
#             ys_train_id = ys_train_id[idx_rand]
            
#             batch_X = X_train_id
#             batch_ys = ys_train_id

#             self.optimizer.zero_grad()
#             # self.optimizer.no_grad()
#             # torch.no_grad()
            
# #             if len(batch_X) > 0:
#             batch_pred = self.model(batch_X).reshape(-1,)
# #                 print('batch_pred')
# #                 print(batch_pred)
#             batch_loss = self._calc_loss(batch_ys=batch_ys, batch_pred=batch_pred)
#             batch_loss.backward(retain_graph=False)
# #             batch_loss.backward(retain_graph=True)
#             self.optimizer.step()
            
            batch_size = 25
            cur_batch = 0
            N_train = np.shape(X_train_id)[0]

            for n_batch in range(N_train // batch_size):
                batch_X = X_train_id[cur_batch: cur_batch + batch_size]
                batch_ys = ys_train_id[cur_batch: cur_batch + batch_size]
                cur_batch += batch_size

                self.optimizer.zero_grad()
#                 if len(batch_X) > 0:
                batch_pred = self.model(batch_X).reshape(-1,)
                batch_loss = self._calc_loss(batch_ys=batch_ys, batch_pred=batch_pred)
                batch_loss.backward(retain_graph=False)
                self.optimizer.step()

                
    def _eval_test_set(self) -> float:
        with torch.no_grad():
            self.model.eval()
            ndcgs = []
            # допишите ваш код здесь
            with torch.no_grad():
                
                ids_test_uniq = np.unique(self.query_ids_test)
                
                for id_test in ids_test_uniq:
                    cur_X_test = self.X_test[self.query_ids_test == id_test]
                    valid_pred = self.model(cur_X_test)
#                     print('self.ys_test[self.query_ids_test == id_test]')
#                     print(self.ys_test[self.query_ids_test == id_test])
                    cur_ndcg_k = self._ndcg_k(\
                        ys_true=self.ys_test[self.query_ids_test == id_test],\
                        ys_pred=valid_pred,\
                        ndcg_top_k=10)
        
                    ndcgs.append(cur_ndcg_k)
                
                #  ndcgs_avg = sum(ndcgs) / len(ndcgs)
                #  print('mean test nDCG top 10 score {}'.format(ndcgs_avg))
            
            return np.mean(ndcgs)
        
        
    def _ndcg_k(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
                ndcg_top_k: int) -> float:
        # допишите ваш код здесь
        ys_true = torch.reshape(ys_true, (-1,))
        ys_pred = torch.reshape(ys_pred, (-1,))
        
        ys_pred_sorted = torch.sort(ys_pred, descending=True)
        ys_true_sorted = ys_true[ys_pred_sorted[1]]
        ys_true_sorted_sep = torch.sort(ys_true, descending=True)[0]
        
#         print('ys_true')
#         print(np.shape(ys_true_sorted[:ndcg_top_k]))
#         print(ys_true_sorted[:ndcg_top_k])
#         print('ys_pred')
#         print(np.shape(ys_pred_sorted[0][:ndcg_top_k]))
#         print(ys_pred_sorted[0][:ndcg_top_k])
        
#         ndcg_k = self._ndcg(ys_true=ys_true_sorted[:self.ndcg_top_k],\
#                             ys_pred=ys_pred_sorted[0][:self.ndcg_top_k],\
#                             gain_scheme='exp2')
        dcg_act = self._dcg(ys_true=ys_true_sorted[:self.ndcg_top_k],\
                            ys_pred=ys_pred_sorted[0][:self.ndcg_top_k],\
                            gain_scheme='exp2')
        
        dcg_max = self._dcg(ys_true=ys_true_sorted_sep[:self.ndcg_top_k],\
                            ys_pred=ys_true_sorted_sep[:self.ndcg_top_k],\
                            gain_scheme='exp2')
        
        try:
            ndcg_k = dcg_act / dcg_max
        except:
            ndcg_k = 0
            
        if np.isnan(ndcg_k):
            ndcg_k = 0
        
        return ndcg_k


    def _ndcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str = 'const') -> float:
#     def _ndcg(self, ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str) -> float:
        try:
            ndcg_val = self._dcg(ys_true=ys_true, ys_pred=ys_pred, gain_scheme=gain_scheme) / \
                self._dcg(ys_true=ys_true, ys_pred=ys_true, gain_scheme=gain_scheme)
        except:
            ndcg_val = 0.
        
        return ndcg_val
    
    def _compute_gain(self, y_value: float, gain_scheme: str) -> float:
#         if gain_scheme == 'exp2':
#             return 2 ** y_value - 1.
#         else:
#             return y_value + 0.
        return 2 ** y_value - 1.

    def _dcg(self, ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str) -> float:
        ys_pred_sorted = torch.sort(ys_pred, descending=True)
        log2_list = [math.log2(x) for x in range(2, len(ys_pred) + 2)]

        dcg_val = 0.
        for i in range(len(log2_list)):
            dcg_val += self._compute_gain(ys_true[ys_pred_sorted[1]][i].item(), \
                                          gain_scheme=gain_scheme) / log2_list[i]

        return dcg_val
     
        

In [283]:
tst_sol1 = Solution()

In [284]:
%%time
tst_sol1.fit()

CPU times: user 6.28 s, sys: 20.2 ms, total: 6.3 s
Wall time: 2.75 s


[0.4383475148550584,
 0.4460355182606021,
 0.43370513192682997,
 0.43956455185826937,
 0.44388108952724925]

In [253]:
%%time
tst_sol1.fit()

CPU times: user 11.7 s, sys: 140 ms, total: 11.9 s
Wall time: 1.66 s


[0.4159571081953855,
 0.4348203274238781,
 0.4177221612058919,
 0.4354188390581321,
 0.4312588827113882]

In [126]:
torch.unique(tst_sol1.ys_train)
torch.unique(tst_sol1.ys_test)

tensor([0., 1., 2., 3., 4.])

In [125]:
np.shape(tst_sol1.X_train)[0]

10000

# solution

In [None]:
def _get_data(self) -> List[np.ndarray]:
    train_df, test_df = msrank_10k()

    X_train = train_df.drop([0, 1], axis=1).values
    y_train = train_df[0].values
    query_ids_train = train_df[1].values.astype(int)

    X_test = test_df.drop([0, 1], axis=1).values
    y_test = test_df[0].values
    query_ids_test = test_df[1].values.astype(int)

    return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

In [2]:
train_df, test_df = msrank_10k()
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,128,129,130,131,132,133,134,135,136,137
0,2.0,1,3,3,0,0,3,1.0,1.0,0.0,...,62,11089534,2,116,64034,13,3,0,0,0.0
1,2.0,1,3,0,3,0,3,1.0,0.0,1.0,...,54,11089534,2,124,64034,1,2,0,0,0.0
2,0.0,1,3,0,2,0,3,1.0,0.0,0.666667,...,45,3,1,124,3344,14,67,0,0,0.0
3,2.0,1,3,0,3,0,3,1.0,0.0,1.0,...,56,11089534,13,123,63933,1,3,0,0,0.0
4,1.0,1,3,0,3,0,3,1.0,0.0,1.0,...,64,5,7,256,49697,1,13,0,0,0.0


In [10]:
X_train = train_df.drop([0, 1], axis=1).values
np.shape(X_train)

(10000, 136)

In [29]:
X_train

array([[3., 3., 0., ..., 0., 0., 0.],
       [3., 0., 3., ..., 0., 0., 0.],
       [3., 0., 2., ..., 0., 0., 0.],
       ...,
       [2., 0., 2., ..., 0., 0., 0.],
       [2., 0., 1., ..., 0., 0., 0.],
       [2., 1., 1., ..., 0., 0., 0.]])

In [13]:
y_train = train_df[0].values
np.shape(y_train)

(10000,)

In [16]:
query_ids_train = train_df[1].values.astype(int)

In [17]:
X_test = test_df.drop([0, 1], axis=1).values
y_test = test_df[0].values
query_ids_test = test_df[1].values.astype(int)

In [18]:
train_df[1].values.astype(int)

array([   1,    1,    1, ..., 1291, 1291, 1291])

In [75]:
def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
                                    inp_query_ids: np.ndarray) -> np.ndarray:
    
    inp_query_ids_uniq = np.unique(inp_query_ids)
    
    for id_i in inp_query_ids_uniq:
        scaler = StandardScaler()
        inp_feat_array[inp_query_ids == id_i, :] = \
            scaler.fit_transform(inp_feat_array[inp_query_ids == id_i, :])
        
    return inp_feat_array

In [22]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,128,129,130,131,132,133,134,135,136,137
0,2.0,1,3,3,0,0,3,1.0,1.0,0.0,...,62,11089534,2,116,64034,13,3,0,0,0.0
1,2.0,1,3,0,3,0,3,1.0,0.0,1.0,...,54,11089534,2,124,64034,1,2,0,0,0.0
2,0.0,1,3,0,2,0,3,1.0,0.0,0.666667,...,45,3,1,124,3344,14,67,0,0,0.0
3,2.0,1,3,0,3,0,3,1.0,0.0,1.0,...,56,11089534,13,123,63933,1,3,0,0,0.0
4,1.0,1,3,0,3,0,3,1.0,0.0,1.0,...,64,5,7,256,49697,1,13,0,0,0.0


In [26]:
scaler = StandardScaler()

In [27]:
dir(scaler)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_n_features',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_reset',
 '_validate_data',
 'copy',
 'fit',
 'fit_transform',
 'get_params',
 'inverse_transform',
 'partial_fit',
 'set_params',
 'transform',
 'with_mean',
 'with_std']

In [30]:
scaler.fit(X_train)

StandardScaler()

In [31]:
scaler.get_params

<bound method BaseEstimator.get_params of StandardScaler()>

In [35]:
scaler.fit_transform(X_train)

array([[ 0.96937326,  4.72565641, -1.21789047, ..., -0.05501908,
        -0.04255521, -0.01363624],
       [ 0.96937326, -0.37232238,  1.79644119, ..., -0.05501908,
        -0.04255521, -0.01363624],
       [ 0.96937326, -0.37232238,  0.79166397, ..., -0.05501908,
        -0.04255521, -0.01363624],
       ...,
       [ 0.10246818, -0.37232238,  0.79166397, ..., -0.05501908,
        -0.04255521, -0.01363624],
       [ 0.10246818, -0.37232238, -0.21311325, ..., -0.05501908,
        -0.04255521, -0.01363624],
       [ 0.10246818,  1.32700388, -0.21311325, ..., -0.05501908,
        -0.04255521, -0.01363624]])

In [43]:
np.shape(scaler.fit_transform(X_train)[0])

(136,)

In [50]:
np.shape(scaler.fit_transform(X_train)[:][0])
np.shape(scaler.fit_transform(X_train)[:, 0])

(10000,)

In [65]:
np.shape(scaler.fit_transform(X_train)[:100, 0])
X_train[:100, :]

array([[3., 3., 0., ..., 0., 0., 0.],
       [3., 0., 3., ..., 0., 0., 0.],
       [3., 0., 2., ..., 0., 0., 0.],
       ...,
       [6., 2., 2., ..., 0., 0., 0.],
       [3., 0., 3., ..., 0., 0., 0.],
       [6., 0., 3., ..., 0., 0., 0.]])

In [56]:
query_ids_train_uniq = np.unique(query_ids_train)
query_ids_train_uniq

array([   1,   16,   31,   46,   61,   76,   91,  106,  121,  136,  151,
        166,  181,  196,  211,  226,  241,  256,  271,  286,  301,  316,
        331,  346,  361,  376,  391,  406,  421,  436,  451,  466,  481,
        496,  511,  526,  541,  556,  571,  586,  601,  616,  631,  646,
        661,  676,  691,  706,  721,  736,  751,  766,  781,  796,  811,
        826,  841,  856,  871,  886,  901,  916,  931,  946,  961,  976,
        991, 1006, 1021, 1036, 1051, 1066, 1081, 1096, 1111, 1126, 1141,
       1156, 1171, 1186, 1201, 1216, 1231, 1246, 1261, 1276, 1291])

In [71]:
np.shape(X_train[query_ids_train == query_ids_train_uniq[0], :])

(86, 136)

In [74]:
for id_i in query_ids_train_uniq:
    scaler = StandardScaler()
    X_train[query_ids_train == id_i, :] = \
        scaler.fit_transform(X_train[query_ids_train == id_i, :])

In [83]:
X_train[query_ids_train == query_ids_train_uniq[0], :]

array([[ 0.31606376,  4.81705177, -2.17593133, ..., -0.11175774,
        -0.19593518, -0.26622504],
       [ 0.31606376, -0.23497813,  0.61705515, ..., -0.11175774,
        -0.19593518, -0.26622504],
       [ 0.31606376, -0.23497813, -0.31394034, ..., -0.11175774,
        -0.19593518, -0.26622504],
       ...,
       [ 0.31606376,  4.81705177,  0.61705515, ..., -0.11175774,
        -0.19593518, -0.26622504],
       [-1.28284703, -0.23497813, -0.31394034, ..., -0.11175774,
        -0.07383065,  1.91992331],
       [ 0.31606376, -0.23497813,  0.61705515, ..., -0.11175774,
        -0.19593518, -0.26622504]])

In [86]:
torch.tensor(X_train[query_ids_train == query_ids_train_uniq[0], :], dtype=float)

tensor([[ 0.3161,  4.8171, -2.1759,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350,  0.6171,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350, -0.3139,  ..., -0.1118, -0.1959, -0.2662],
        ...,
        [ 0.3161,  4.8171,  0.6171,  ..., -0.1118, -0.1959, -0.2662],
        [-1.2828, -0.2350, -0.3139,  ..., -0.1118, -0.0738,  1.9199],
        [ 0.3161, -0.2350,  0.6171,  ..., -0.1118, -0.1959, -0.2662]],
       dtype=torch.float64)

In [88]:
torch.tensor(X_train[query_ids_train == query_ids_train_uniq[0], :], dtype=float).dtype

torch.float64

In [99]:
torch.FloatTensor(X_train[query_ids_train == query_ids_train_uniq[0], :])
X_train_tens = torch.FloatTensor(X_train)
X_train_tens

tensor([[ 0.3161,  4.8171, -2.1759,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350,  0.6171,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350, -0.3139,  ..., -0.1118, -0.1959, -0.2662],
        ...,
        [ 0.4786, -0.4121,  1.3276,  ...,  0.0000, -0.1552, -0.3721],
        [ 0.4786, -0.4121, -0.0675,  ...,  0.0000, -0.1552, -0.3721],
        [ 0.4786,  1.7170, -0.0675,  ...,  0.0000, -0.1552, -0.3721]])

In [100]:
X_train_tens[query_ids_train == query_ids_train_uniq[0], :]

tensor([[ 0.3161,  4.8171, -2.1759,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350,  0.6171,  ..., -0.1118, -0.1959, -0.2662],
        [ 0.3161, -0.2350, -0.3139,  ..., -0.1118, -0.1959, -0.2662],
        ...,
        [ 0.3161,  4.8171,  0.6171,  ..., -0.1118, -0.1959, -0.2662],
        [-1.2828, -0.2350, -0.3139,  ..., -0.1118, -0.0738,  1.9199],
        [ 0.3161, -0.2350,  0.6171,  ..., -0.1118, -0.1959, -0.2662]])

In [103]:
np.shape(X_train_tens[query_ids_train == query_ids_train_uniq[0], :])

torch.Size([86, 136])