In [1]:
import pandas as pd

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
anime_data = pd.read_csv('anime.csv')
profile_data = pd.read_csv('profile.csv')

In [2]:
# score が 1より小さいものを1にする
train_data['score'] = train_data['score'].apply(lambda x: 1 if x < 1 else x)

# score が 10より大きいものを10にする
train_data['score'] = train_data['score'].apply(lambda x: 10 if x > 10 else x)

In [3]:
# animeの重複を削除
anime_data = anime_data.drop_duplicates(subset='id')

# train_data と anime_data を結合
train_data = pd.merge(train_data, anime_data, left_on='anime_id', right_on='id', how='left')

train_data = train_data[['user', 'title', 'score']]

train_data.head()

Unnamed: 0,user,title,score
0,cd931c240b,Hanamaru Youchien,9
1,bf666ac921,Sakamichi no Apollon,8
2,24a73e9958,Glasslip,5
3,d45057b3ec,Ore no Imouto ga Konnani Kawaii Wake ga Nai,7
4,9b33e2839e,Shining Hearts: Shiawase no Pan,2


In [9]:
from scipy.sparse import csr_matrix

anime_pivot = train_data.pivot_table(index='user', columns='title', values='score').fillna(0)


# 転置して形状を合わせる
sparse_anime_pivot = csr_matrix(anime_pivot.T)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

anime1 = anime_pivot['vivi'].values.reshape(1, -1)  # type: ignore

similarities = cosine_similarity(anime1, sparse_anime_pivot)
similarities_series = pd.Series(similarities[0], index=anime_pivot.columns)

# 類似度の高い順に並び替え
similarities_series.sort_values(ascending=False)

# 全アニメについて類似度を計算する
sim_animes = {}
for anime in anime_pivot.columns:
    animetmp = anime_pivot[anime].values.reshape(1, -1)  # type: ignore
    similarities = cosine_similarity(animetmp, sparse_anime_pivot)
    sim_animes[anime] = similarities[0]



title
Yasashii Fue, Tori, Ishi          1.0
Docchi ni Suru?                   1.0
Mori no Kuma-san (2009)           1.0
I Was King                        1.0
Survivor                          1.0
                                 ... 
High Score                        0.0
High School! Kimengumi (Movie)    0.0
High School Fleet                 0.0
High School DxD Specials          0.0
Kyousou Giga                      0.0
Length: 7573, dtype: float64

In [3]:
import numpy as np

y_val_fold = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_val_fold = np.log(11 - y_val_fold)
print(y_val_fold)

y_val_fold = 11 - np.exp(y_val_fold)
print(y_val_fold)


[2.30258509 2.19722458 2.07944154 1.94591015 1.79175947 1.60943791
 1.38629436 1.09861229 0.69314718 0.        ]
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
