In [None]:
import random

import torch
import torch.optim as optim
import pandas as pd
from sklearn import metrics
import seaborn as sns

In [None]:
%load_ext autoreload
%autoreload 1

In [None]:
%aimport listnet

In [None]:
n_train = 500
n_valid = 100
x_dim = 50
n_epochs = 10
query_size = 16

X_train, X_valid, ys_train, ys_valid, train_queries, valid_queries = listnet.make_dataset(n_train, n_valid, x_dim, query_size)

In [None]:
X_train

In [None]:
ys_train.shape

In [None]:
# モデルを宣言
net = listnet.Net(x_dim)
optimizer = optim.Adam(net.parameters())

In [None]:
for epoch in range(n_epochs):
	# クエリ内で呼び出されるレコードの順序をランダムにする
	idx = torch.randperm(n_train)
	X_train = X_train[idx]
	ys_train = ys_train[idx]

	# クエリの呼び出す順序をランダムにする
	query_keys = list(train_queries.keys())
	random.shuffle(query_keys)

	for query_num in query_keys:
		batch_X = X_train[train_queries[query_num]]
		batch_ys = ys_train[train_queries[query_num]]

		optimizer.zero_grad()
		if len(batch_X) > 0:
			# 現在のモデルの重みで予測を行う
			batch_pred = net(batch_X)

			# 本バッチの予測と正解を比較して損失を計算
			batch_loss = listnet.listnet_loss(batch_ys, batch_pred)

			# requires_grad=True とした変数に対して勾配を計算
			batch_loss.backward(retain_graph=True)

			# モデルの重みを更新する
			optimizer.step()

	with torch.no_grad():		# メモリ消費を抑えるために勾配計算を行わない
		valid_pred = net(X_valid)
		valid_swapped = listnet.swapped_pairs(valid_pred, ys_valid)
		ndcg_score = metrics.ndcg_score(ys_valid.numpy().reshape(1, -1), valid_pred.numpy().reshape(1, -1))
		print('epoch: {}, swapped: {}, ndcg: {}'.format(epoch, valid_swapped, ndcg_score))

In [None]:
df_ys_valid = pd.DataFrame(ys_valid)
df_valid_pred = pd.DataFrame(valid_pred)
df_valid = pd.concat([df_ys_valid, df_valid_pred], axis=1)
df_valid.columns = ['ys', 'pred']
df_valid

In [None]:
sns.scatterplot(x='ys', y='pred', data=df_valid)