In [29]:
import pandas as pd
import numpy as np
import warnings
import time
warnings.filterwarnings("ignore")
#设置jupyter显示多行结果
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all' #默认为'last'
# #显示所有列
pd.set_option('display.max_columns', None) #原来中间会有部分列的显示被省略
# #显示所有行
# pd.set_option('display.max_rows', None)
#设置value的显示长度为100，默认为50
pd.set_option('max_colwidth',100)

In [30]:
rating_df = pd.read_csv('~/Dataset/ml-1m/ratings.dat',sep="::", names=["userid","itemid","rating","timestamp"])
rating_df
#数据总缺失情况查阅
(rating_df.isna().sum()/rating_df.shape[0]).sort_values(ascending=False)

Unnamed: 0,userid,itemid,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


userid       0.0
itemid       0.0
rating       0.0
timestamp    0.0
dtype: float64

In [31]:
user_df = pd.read_csv('~/Dataset/ml-1m/users.dat',sep="::", names=["userid","gender","age","occupation","zip_code"])
user_df
#数据总缺失情况查阅
(user_df.isna().sum()/user_df.shape[0]).sort_values(ascending=False)

Unnamed: 0,userid,gender,age,occupation,zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


userid        0.0
gender        0.0
age           0.0
occupation    0.0
zip_code      0.0
dtype: float64

In [32]:
movie_df = pd.read_csv('~/Dataset/ml-1m/movies.dat',sep="::", names=["itemid","title","genres"],encoding="latin-1")
movie_df
#数据总缺失情况查阅
(movie_df.isna().sum()/movie_df.shape[0]).sort_values(ascending=False)

Unnamed: 0,itemid,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


itemid    0.0
title     0.0
genres    0.0
dtype: float64

In [33]:
# 过滤掉没有title的movie
movie_title_exist_idset = movie_df.itemid.tolist()
rating_df_fliter = rating_df[rating_df["itemid"].isin(movie_title_exist_idset)].reset_index(drop=True)
rating_df_fliter

Unnamed: 0,userid,itemid,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [34]:
# 将rating变成0/1
rating_df_fliter["label"] = rating_df_fliter["rating"].apply(lambda x: 1 if x >= 4 else 0)
rating_df_fliter

Unnamed: 0,userid,itemid,rating,timestamp,label
0,1,1193,5,978300760,1
1,1,661,3,978302109,0
2,1,914,3,978301968,0
3,1,3408,4,978300275,1
4,1,2355,5,978824291,1
...,...,...,...,...,...
1000204,6040,1091,1,956716541,0
1000205,6040,1094,5,956704887,1
1000206,6040,562,5,956704746,1
1000207,6040,1096,4,956715648,1


In [35]:
# groupby成seq
rating_df_fliter.sort_values(["userid", "timestamp"] , inplace=True, ascending=True) # 先按时间排序
sequence_df = rating_df_fliter.groupby(['userid']).agg(
    itemid_seq=("itemid", list),
    rating_seq=("rating", list),
    timestamp_seq=("timestamp", list),
    label_seq=("label", list)
).reset_index()
sequence_df

Unnamed: 0,userid,itemid_seq,rating_seq,timestamp_seq,label_seq
0,1,"[3186, 1270, 1721, 1022, 2340, 1836, 3408, 2804, 1207, 1193, 720, 260, 919, 608, 2692, 1961, 202...","[4, 5, 4, 5, 3, 5, 4, 5, 4, 5, 3, 4, 4, 4, 4, 5, 5, 5, 4, 5, 4, 4, 5, 5, 4, 3, 5, 4, 4, 4, 3, 4,...","[978300019, 978300055, 978300055, 978300055, 978300103, 978300172, 978300275, 978300719, 9783007...","[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,..."
1,2,"[1198, 1210, 1217, 2717, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1213, 1945, 1207, 593, 3095, 3...","[4, 4, 3, 3, 5, 4, 5, 5, 5, 4, 4, 2, 5, 4, 5, 4, 5, 4, 5, 2, 5, 4, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5,...","[978298124, 978298151, 978298151, 978298196, 978298261, 978298372, 978298391, 978298413, 9782984...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."
2,3,"[593, 2858, 3534, 1968, 1431, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 31...","[3, 4, 3, 4, 3, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5, 5, 4, 4, 4, 5, 3, 1, 4, 5, 4, 5, 4, 4, 4, 4, 4, 2,...","[978297018, 978297039, 978297068, 978297068, 978297095, 978297095, 978297396, 978297419, 9782974...","[0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,..."
3,4,"[1210, 1097, 3468, 480, 3527, 260, 1196, 1198, 1387, 2028, 2366, 1201, 2692, 2947, 1214, 3418, 3...","[3, 4, 5, 4, 1, 5, 2, 5, 5, 5, 4, 5, 5, 5, 4, 4, 4, 5, 4, 4, 5]","[978293924, 978293964, 978294008, 978294008, 978294008, 978294199, 978294199, 978294199, 9782941...","[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
4,5,"[2717, 908, 919, 1250, 356, 2858, 1127, 2188, 2683, 3051, 2997, 2770, 2355, 2908, 3786, 3016, 27...","[1, 4, 4, 5, 1, 4, 1, 1, 3, 2, 5, 4, 5, 4, 3, 4, 3, 2, 5, 1, 3, 3, 4, 3, 4, 3, 4, 2, 4, 4, 5, 4,...","[978241072, 978241072, 978241072, 978241112, 978241112, 978241390, 978241390, 978241390, 9782414...","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,..."
...,...,...,...,...,...
6035,6036,"[1721, 2428, 3438, 1883, 2376, 2492, 2826, 2827, 2858, 2572, 2683, 2699, 2706, 2707, 3005, 2842,...","[4, 2, 2, 4, 2, 1, 1, 2, 5, 2, 3, 2, 2, 2, 2, 2, 1, 2, 4, 1, 1, 5, 2, 3, 3, 4, 2, 3, 3, 2, 2, 2,...","[956709349, 956709350, 956709350, 956709350, 956709350, 956709453, 956709453, 956709453, 9567094...","[1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,..."
6036,6037,"[1882, 3508, 702, 1267, 2028, 3148, 858, 562, 912, 3543, 1221, 923, 2804, 2858, 2132, 1193, 318,...","[1, 3, 2, 5, 4, 4, 5, 4, 4, 3, 5, 4, 4, 4, 3, 4, 5, 3, 5, 4, 4, 4, 5, 3, 4, 5, 5, 4, 4, 4, 3, 4,...","[956708997, 956708997, 956708997, 956708997, 956708997, 956709118, 956709118, 956709118, 9567091...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,..."
6037,6038,"[920, 3396, 1210, 2146, 356, 1387, 1079, 1148, 3548, 1276, 2716, 3088, 232, 1136, 1223, 1296, 13...","[3, 3, 4, 4, 4, 2, 5, 5, 4, 3, 3, 5, 4, 4, 5, 5, 3, 4, 1, 5]","[956706827, 956706827, 956706876, 956706909, 956707005, 956707005, 956707547, 956707604, 9567076...","[0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1]"
6038,6039,"[282, 111, 2067, 930, 1230, 3022, 947, 3088, 3133, 1294, 3421, 2804, 1269, 955, 1244, 1276, 2622...","[4, 4, 4, 4, 3, 4, 3, 5, 3, 3, 2, 4, 3, 4, 2, 4, 3, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 4, 3,...","[956705158, 956705158, 956705178, 956705202, 956705202, 956705281, 956705281, 956705281, 9567052...","[1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,..."


In [37]:
def get_seq(row, pos_or_neg):
    res = []
    for i in range(len(row.label_seq)):
        if row.label_seq[i]==pos_or_neg:
            res.append(row.itemid_seq[i])
    return res


sequence_df["pos_seq"] = sequence_df.apply(get_seq, pos_or_neg=1, axis=1) 
sequence_df["neg_seq"] = sequence_df.apply(get_seq, pos_or_neg=0, axis=1) 
sequence_df["seq_length"] = sequence_df['itemid_seq'].apply(lambda x: len(x))
sequence_df["pos_seq_length"] = sequence_df['pos_seq'].apply(lambda x: len(x))
sequence_df["neg_seq_length"] = sequence_df['neg_seq'].apply(lambda x: len(x))
sequence_df

Unnamed: 0,userid,itemid_seq,rating_seq,timestamp_seq,label_seq,pos_seq,neg_seq,seq_length,pos_seq_length,neg_seq_length
0,1,"[3186, 1270, 1721, 1022, 2340, 1836, 3408, 2804, 1207, 1193, 720, 260, 919, 608, 2692, 1961, 202...","[4, 5, 4, 5, 3, 5, 4, 5, 4, 5, 3, 4, 4, 4, 4, 5, 5, 5, 4, 5, 4, 4, 5, 5, 4, 3, 5, 4, 4, 4, 3, 4,...","[978300019, 978300055, 978300055, 978300055, 978300103, 978300172, 978300275, 978300719, 9783007...","[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,...","[3186, 1270, 1721, 1022, 1836, 3408, 2804, 1207, 1193, 260, 919, 608, 2692, 1961, 2028, 3105, 93...","[2340, 720, 914, 661, 2321, 1197, 2687, 745]",53,45,8
1,2,"[1198, 1210, 1217, 2717, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1213, 1945, 1207, 593, 3095, 3...","[4, 4, 3, 3, 5, 4, 5, 5, 5, 4, 4, 2, 5, 4, 5, 4, 5, 4, 5, 2, 5, 4, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5,...","[978298124, 978298151, 978298151, 978298196, 978298261, 978298372, 978298391, 978298413, 9782984...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1198, 1210, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1945, 1207, 593, 3095, 3468, 1873, 515, 25...","[1217, 2717, 1213, 1090, 1084, 3654, 3735, 2728, 1968, 1103, 902, 2852, 2312, 1253, 1244, 3699, ...",129,73,56
2,3,"[593, 2858, 3534, 1968, 1431, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 31...","[3, 4, 3, 4, 3, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5, 5, 4, 4, 4, 5, 3, 1, 4, 5, 4, 5, 4, 4, 4, 4, 4, 2,...","[978297018, 978297039, 978297068, 978297068, 978297095, 978297095, 978297396, 978297419, 9782974...","[0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[2858, 1968, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 3168, 1210, 1291, 2...","[593, 3534, 1431, 1580, 1261, 2617, 648, 3114, 2997, 3619, 1270, 1265, 1641, 3868]",51,37,14
3,4,"[1210, 1097, 3468, 480, 3527, 260, 1196, 1198, 1387, 2028, 2366, 1201, 2692, 2947, 1214, 3418, 3...","[3, 4, 5, 4, 1, 5, 2, 5, 5, 5, 4, 5, 5, 5, 4, 4, 4, 5, 4, 4, 5]","[978293924, 978293964, 978294008, 978294008, 978294008, 978294199, 978294199, 978294199, 9782941...","[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1097, 3468, 480, 260, 1198, 1387, 2028, 2366, 1201, 2692, 2947, 1214, 3418, 3702, 1240, 2951, 1...","[1210, 3527, 1196]",21,18,3
4,5,"[2717, 908, 919, 1250, 356, 2858, 1127, 2188, 2683, 3051, 2997, 2770, 2355, 2908, 3786, 3016, 27...","[1, 4, 4, 5, 1, 4, 1, 1, 3, 2, 5, 4, 5, 4, 3, 4, 3, 2, 5, 1, 3, 3, 4, 3, 4, 3, 4, 2, 4, 4, 5, 4,...","[978241072, 978241072, 978241072, 978241112, 978241112, 978241390, 978241390, 978241390, 9782414...","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,...","[908, 919, 1250, 2858, 2997, 2770, 2355, 2908, 3016, 2599, 2959, 2333, 2580, 2318, 2390, 913, 27...","[2717, 356, 1127, 2188, 2683, 3051, 3786, 2759, 1093, 3113, 3408, 2428, 3409, 2716, 2607, 2734, ...",198,82,116
...,...,...,...,...,...,...,...,...,...,...
6035,6036,"[1721, 2428, 3438, 1883, 2376, 2492, 2826, 2827, 2858, 2572, 2683, 2699, 2706, 2707, 3005, 2842,...","[4, 2, 2, 4, 2, 1, 1, 2, 5, 2, 3, 2, 2, 2, 2, 2, 1, 2, 4, 1, 1, 5, 2, 3, 3, 4, 2, 3, 3, 2, 2, 2,...","[956709349, 956709350, 956709350, 956709350, 956709350, 956709453, 956709453, 956709453, 9567094...","[1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","[1721, 1883, 2858, 2997, 2710, 223, 3203, 1093, 3408, 2575, 2599, 2336, 3159, 2580, 2712, 2757, ...","[2428, 3438, 2376, 2492, 2826, 2827, 2572, 2683, 2699, 2706, 2707, 3005, 2842, 2555, 2975, 3285,...",888,399,489
6036,6037,"[1882, 3508, 702, 1267, 2028, 3148, 858, 562, 912, 3543, 1221, 923, 2804, 2858, 2132, 1193, 318,...","[1, 3, 2, 5, 4, 4, 5, 4, 4, 3, 5, 4, 4, 4, 3, 4, 5, 3, 5, 4, 4, 4, 5, 3, 4, 5, 5, 4, 4, 4, 3, 4,...","[956708997, 956708997, 956708997, 956708997, 956708997, 956709118, 956709118, 956709118, 9567091...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,...","[1267, 2028, 3148, 858, 562, 912, 1221, 923, 2804, 2858, 1193, 318, 527, 3006, 908, 3196, 1225, ...","[1882, 3508, 702, 3543, 2132, 1250, 1299, 1237, 1949, 2728, 3362, 1233, 2289, 3270, 2890, 2919, ...",202,120,82
6037,6038,"[920, 3396, 1210, 2146, 356, 1387, 1079, 1148, 3548, 1276, 2716, 3088, 232, 1136, 1223, 1296, 13...","[3, 3, 4, 4, 4, 2, 5, 5, 4, 3, 3, 5, 4, 4, 5, 5, 3, 4, 1, 5]","[956706827, 956706827, 956706876, 956706909, 956707005, 956707005, 956707547, 956707604, 9567076...","[0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1]","[1210, 2146, 356, 1079, 1148, 3548, 3088, 232, 1136, 1223, 1296, 1419, 1183]","[920, 3396, 1387, 1276, 2716, 1354, 2700]",20,13,7
6038,6039,"[282, 111, 2067, 930, 1230, 3022, 947, 3088, 3133, 1294, 3421, 2804, 1269, 955, 1244, 1276, 2622...","[4, 4, 4, 4, 3, 4, 3, 5, 3, 3, 2, 4, 3, 4, 2, 4, 3, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 4, 3,...","[956705158, 956705158, 956705178, 956705202, 956705202, 956705281, 956705281, 956705281, 9567052...","[1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,...","[282, 111, 2067, 930, 3022, 3088, 2804, 955, 1276, 2791, 2300, 2396, 1028, 1197, 3548, 951, 1211...","[1230, 947, 3133, 1294, 3421, 1269, 1244, 2622, 2863, 3072, 1066, 935, 2671, 1014, 909, 1265, 21...",123,90,33


In [38]:
# 统计看一下不同的过滤有多少数据
sequence_df[(sequence_df["pos_seq_length"]>=6) & (sequence_df["neg_seq_length"]>=6)].shape[0]
sequence_df[(sequence_df["pos_seq_length"]>=11) & (sequence_df["neg_seq_length"]>=11)].shape[0]
sequence_df[(sequence_df["pos_seq_length"]>=21) & (sequence_df["neg_seq_length"]>=4)].shape[0]
sequence_df[(sequence_df["pos_seq_length"]>=100) & (sequence_df["neg_seq_length"]>=100)].shape[0]
sequence_df[(sequence_df["pos_seq_length"]>=11)].shape[0]

5707

4882

4988

1050

5923

In [39]:
# 保留正负样本交互都大于11的
# 原因：1. 当任务为挑选正样本时，以1条正样本和9条负样本构成10个候选集的集合；
#       2. 当反向测试时，以1条负样本和9条正样本构成10个候选集的集合；
#       3. 此外，history最多可以给10条
sequence_df_fliter = sequence_df[(sequence_df["pos_seq_length"]>=11) & (sequence_df["neg_seq_length"]>=11)].reset_index(drop=True)
sequence_df_fliter

Unnamed: 0,userid,itemid_seq,rating_seq,timestamp_seq,label_seq,pos_seq,neg_seq,seq_length,pos_seq_length,neg_seq_length
0,2,"[1198, 1210, 1217, 2717, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1213, 1945, 1207, 593, 3095, 3...","[4, 4, 3, 3, 5, 4, 5, 5, 5, 4, 4, 2, 5, 4, 5, 4, 5, 4, 5, 2, 5, 4, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5,...","[978298124, 978298151, 978298151, 978298196, 978298261, 978298372, 978298391, 978298413, 9782984...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1198, 1210, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1945, 1207, 593, 3095, 3468, 1873, 515, 25...","[1217, 2717, 1213, 1090, 1084, 3654, 3735, 2728, 1968, 1103, 902, 2852, 2312, 1253, 1244, 3699, ...",129,73,56
1,3,"[593, 2858, 3534, 1968, 1431, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 31...","[3, 4, 3, 4, 3, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5, 5, 4, 4, 4, 5, 3, 1, 4, 5, 4, 5, 4, 4, 4, 4, 4, 2,...","[978297018, 978297039, 978297068, 978297068, 978297095, 978297095, 978297396, 978297419, 9782974...","[0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[2858, 1968, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 3168, 1210, 1291, 2...","[593, 3534, 1431, 1580, 1261, 2617, 648, 3114, 2997, 3619, 1270, 1265, 1641, 3868]",51,37,14
2,5,"[2717, 908, 919, 1250, 356, 2858, 1127, 2188, 2683, 3051, 2997, 2770, 2355, 2908, 3786, 3016, 27...","[1, 4, 4, 5, 1, 4, 1, 1, 3, 2, 5, 4, 5, 4, 3, 4, 3, 2, 5, 1, 3, 3, 4, 3, 4, 3, 4, 2, 4, 4, 5, 4,...","[978241072, 978241072, 978241072, 978241112, 978241112, 978241390, 978241390, 978241390, 9782414...","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,...","[908, 919, 1250, 2858, 2997, 2770, 2355, 2908, 3016, 2599, 2959, 2333, 2580, 2318, 2390, 913, 27...","[2717, 356, 1127, 2188, 2683, 3051, 3786, 2759, 1093, 3113, 3408, 2428, 3409, 2716, 2607, 2734, ...",198,82,116
3,6,"[3072, 2006, 912, 1043, 3534, 1210, 377, 17, 1441, 3685, 1296, 2081, 3699, 2100, 1674, 2802, 195...","[4, 4, 4, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 4, 4, 3, 5, 4, 5, 5, 3, 3, 4, 1, 4, 3, 4, 3, 3, 4, 3,...","[978236075, 978236122, 978236122, 978236219, 978236219, 978236219, 978236383, 978236383, 9782363...","[1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,...","[3072, 2006, 912, 1043, 3534, 17, 1441, 2081, 3699, 1674, 2802, 2406, 1101, 3501, 1088, 1188, 23...","[1210, 377, 3685, 1296, 2100, 1959, 2469, 3524, 2858, 1806, 2506, 2082, 2321, 590, 296, 3600, 20...",71,50,21
4,8,"[1210, 111, 908, 3481, 1573, 480, 589, 393, 2028, 3265, 3418, 2692, 2916, 2571, 110, 377, 2427, ...","[4, 5, 5, 4, 4, 5, 5, 2, 5, 5, 3, 5, 5, 5, 5, 4, 5, 4, 5, 5, 5, 3, 2, 5, 4, 3, 3, 5, 4, 5, 5, 2,...","[978228789, 978228832, 978228882, 978228882, 978228960, 978228960, 978229138, 978229138, 9782291...","[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,...","[1210, 111, 908, 3481, 1573, 480, 589, 2028, 3265, 2692, 2916, 2571, 110, 377, 2427, 555, 3267, ...","[393, 3418, 2278, 2490, 733, 2006, 2600, 39, 1059, 1277, 3260, 3386, 3252, 2908, 2336, 3250, 300...",139,85,54
...,...,...,...,...,...,...,...,...,...,...
4877,6035,"[902, 25, 898, 1007, 1193, 2827, 2628, 589, 3175, 1584, 2571, 32, 480, 1748, 2046, 1591, 1909, 2...","[3, 1, 1, 3, 5, 1, 5, 1, 5, 4, 5, 4, 2, 1, 4, 1, 2, 1, 3, 4, 1, 1, 5, 4, 4, 5, 4, 1, 2, 4, 1, 2,...","[956710846, 956710846, 956710846, 956710846, 956710879, 956711024, 956711024, 956711064, 9567110...","[0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,...","[1193, 2628, 3175, 1584, 2571, 32, 2046, 1372, 1527, 1580, 1356, 1396, 329, 2393, 1196, 1210, 12...","[902, 25, 898, 1007, 2827, 589, 480, 1748, 1591, 1909, 2916, 1653, 504, 1876, 3156, 788, 512, 20...",280,94,186
4878,6036,"[1721, 2428, 3438, 1883, 2376, 2492, 2826, 2827, 2858, 2572, 2683, 2699, 2706, 2707, 3005, 2842,...","[4, 2, 2, 4, 2, 1, 1, 2, 5, 2, 3, 2, 2, 2, 2, 2, 1, 2, 4, 1, 1, 5, 2, 3, 3, 4, 2, 3, 3, 2, 2, 2,...","[956709349, 956709350, 956709350, 956709350, 956709350, 956709453, 956709453, 956709453, 9567094...","[1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","[1721, 1883, 2858, 2997, 2710, 223, 3203, 1093, 3408, 2575, 2599, 2336, 3159, 2580, 2712, 2757, ...","[2428, 3438, 2376, 2492, 2826, 2827, 2572, 2683, 2699, 2706, 2707, 3005, 2842, 2555, 2975, 3285,...",888,399,489
4879,6037,"[1882, 3508, 702, 1267, 2028, 3148, 858, 562, 912, 3543, 1221, 923, 2804, 2858, 2132, 1193, 318,...","[1, 3, 2, 5, 4, 4, 5, 4, 4, 3, 5, 4, 4, 4, 3, 4, 5, 3, 5, 4, 4, 4, 5, 3, 4, 5, 5, 4, 4, 4, 3, 4,...","[956708997, 956708997, 956708997, 956708997, 956708997, 956709118, 956709118, 956709118, 9567091...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,...","[1267, 2028, 3148, 858, 562, 912, 1221, 923, 2804, 2858, 1193, 318, 527, 3006, 908, 3196, 1225, ...","[1882, 3508, 702, 3543, 2132, 1250, 1299, 1237, 1949, 2728, 3362, 1233, 2289, 3270, 2890, 2919, ...",202,120,82
4880,6039,"[282, 111, 2067, 930, 1230, 3022, 947, 3088, 3133, 1294, 3421, 2804, 1269, 955, 1244, 1276, 2622...","[4, 4, 4, 4, 3, 4, 3, 5, 3, 3, 2, 4, 3, 4, 2, 4, 3, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 4, 3,...","[956705158, 956705158, 956705178, 956705202, 956705202, 956705281, 956705281, 956705281, 9567052...","[1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,...","[282, 111, 2067, 930, 3022, 3088, 2804, 955, 1276, 2791, 2300, 2396, 1028, 1197, 3548, 951, 1211...","[1230, 947, 3133, 1294, 3421, 1269, 1244, 2622, 2863, 3072, 1066, 935, 2671, 1014, 909, 1265, 21...",123,90,33


In [40]:
# 下面这个函数可以根据任务生成不同的数据集
# 以挑选正样本的任务为例
import random

def get_target(row, is_seq):
    '''
        is_seq表示是否是序列推荐还是一般推荐，1为序列推荐
    '''
    if is_seq == 1:
        return row.pos_seq[-1]
    else:
        return random.sample(list(row.pos_seq), 1)


def get_candidate(row, n_canidadate):
    '''
        n_canidadate表示candidate数目
    '''
    candidate = [int(row.target)] + (random.sample(list(row.neg_seq), n_canidadate-1))
    random.shuffle(candidate)
    
    return candidate

def get_history(row, is_seq, n_history):
    '''
        is_seq表示是否是序列推荐还是一般推荐，1为序列推荐
        n_history表示给几个交互过的正样本
    '''
    if is_seq == 1:
        history = row.pos_seq[-1-n_history : -1]
    else:
        his_tmp = copy.deepcopy(row.pos_seq)
        his_tmp.remove(row.target)
        history = (random.sample(list(his_tmp), n_history))
    return history

In [41]:
# 以序列推荐 n_history=5 n_candidate
sequence_df_fliter["target"] = sequence_df_fliter.apply(get_target, is_seq=1, axis=1)
sequence_df_fliter["candidate"] = sequence_df_fliter.apply(get_candidate, n_canidadate=5, axis=1)
sequence_df_fliter["history"] = sequence_df_fliter.apply(get_history, is_seq=1, n_history=5, axis=1)
sequence_df_fliter

Unnamed: 0,userid,itemid_seq,rating_seq,timestamp_seq,label_seq,pos_seq,neg_seq,seq_length,pos_seq_length,neg_seq_length,target,candidate,history
0,2,"[1198, 1210, 1217, 2717, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1213, 1945, 1207, 593, 3095, 3...","[4, 4, 3, 3, 5, 4, 5, 5, 5, 4, 4, 2, 5, 4, 5, 4, 5, 4, 5, 2, 5, 4, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5,...","[978298124, 978298151, 978298151, 978298196, 978298261, 978298372, 978298391, 978298413, 9782984...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1198, 1210, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1945, 1207, 593, 3095, 3468, 1873, 515, 25...","[1217, 2717, 1213, 1090, 1084, 3654, 3735, 2728, 1968, 1103, 902, 2852, 2312, 1253, 1244, 3699, ...",129,73,56,1544,"[1090, 2728, 3699, 1544, 498]","[1370, 648, 368, 736, 2002]"
1,3,"[593, 2858, 3534, 1968, 1431, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 31...","[3, 4, 3, 4, 3, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5, 5, 4, 4, 4, 5, 3, 1, 4, 5, 4, 5, 4, 4, 4, 4, 4, 2,...","[978297018, 978297039, 978297068, 978297068, 978297095, 978297095, 978297396, 978297419, 9782974...","[0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[2858, 1968, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 3168, 1210, 1291, 2...","[593, 3534, 1431, 1580, 1261, 2617, 648, 3114, 2997, 3619, 1270, 1265, 1641, 3868]",51,37,14,2081,"[1265, 1641, 3114, 2617, 2081]","[1079, 1259, 2355, 3552, 104]"
2,5,"[2717, 908, 919, 1250, 356, 2858, 1127, 2188, 2683, 3051, 2997, 2770, 2355, 2908, 3786, 3016, 27...","[1, 4, 4, 5, 1, 4, 1, 1, 3, 2, 5, 4, 5, 4, 3, 4, 3, 2, 5, 1, 3, 3, 4, 3, 4, 3, 4, 2, 4, 4, 5, 4,...","[978241072, 978241072, 978241072, 978241112, 978241112, 978241390, 978241390, 978241390, 9782414...","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,...","[908, 919, 1250, 2858, 2997, 2770, 2355, 2908, 3016, 2599, 2959, 2333, 2580, 2318, 2390, 913, 27...","[2717, 356, 1127, 2188, 2683, 3051, 3786, 2759, 1093, 3113, 3408, 2428, 3409, 2716, 2607, 2734, ...",198,82,116,2029,"[202, 1747, 3249, 2013, 2029]","[1580, 1921, 1897, 2427, 551]"
3,6,"[3072, 2006, 912, 1043, 3534, 1210, 377, 17, 1441, 3685, 1296, 2081, 3699, 2100, 1674, 2802, 195...","[4, 4, 4, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 4, 4, 3, 5, 4, 5, 5, 3, 3, 4, 1, 4, 3, 4, 3, 3, 4, 3,...","[978236075, 978236122, 978236122, 978236219, 978236219, 978236219, 978236383, 978236383, 9782363...","[1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,...","[3072, 2006, 912, 1043, 3534, 17, 1441, 2081, 3699, 1674, 2802, 2406, 1101, 3501, 1088, 1188, 23...","[1210, 377, 3685, 1296, 2100, 1959, 2469, 3524, 2858, 1806, 2506, 2082, 2321, 590, 296, 3600, 20...",71,50,21,597,"[1007, 3508, 597, 2321, 1296]","[3624, 3565, 3717, 920, 1569]"
4,8,"[1210, 111, 908, 3481, 1573, 480, 589, 393, 2028, 3265, 3418, 2692, 2916, 2571, 110, 377, 2427, ...","[4, 5, 5, 4, 4, 5, 5, 2, 5, 5, 3, 5, 5, 5, 5, 4, 5, 4, 5, 5, 5, 3, 2, 5, 4, 3, 3, 5, 4, 5, 5, 2,...","[978228789, 978228832, 978228882, 978228882, 978228960, 978228960, 978229138, 978229138, 9782291...","[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,...","[1210, 111, 908, 3481, 1573, 480, 589, 2028, 3265, 2692, 2916, 2571, 110, 377, 2427, 555, 3267, ...","[393, 3418, 2278, 2490, 733, 2006, 2600, 39, 1059, 1277, 3260, 3386, 3252, 2908, 2336, 3250, 300...",139,85,54,741,"[2429, 1476, 2268, 3386, 741]","[3528, 105, 1027, 1274, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4877,6035,"[902, 25, 898, 1007, 1193, 2827, 2628, 589, 3175, 1584, 2571, 32, 480, 1748, 2046, 1591, 1909, 2...","[3, 1, 1, 3, 5, 1, 5, 1, 5, 4, 5, 4, 2, 1, 4, 1, 2, 1, 3, 4, 1, 1, 5, 4, 4, 5, 4, 1, 2, 4, 1, 2,...","[956710846, 956710846, 956710846, 956710846, 956710879, 956711024, 956711024, 956711064, 9567110...","[0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,...","[1193, 2628, 3175, 1584, 2571, 32, 2046, 1372, 1527, 1580, 1356, 1396, 329, 2393, 1196, 1210, 12...","[902, 25, 898, 1007, 2827, 589, 480, 1748, 1591, 1909, 2916, 1653, 504, 1876, 3156, 788, 512, 20...",280,94,186,153,"[673, 1590, 153, 1918, 1513]","[2108, 471, 1377, 1689, 2266]"
4878,6036,"[1721, 2428, 3438, 1883, 2376, 2492, 2826, 2827, 2858, 2572, 2683, 2699, 2706, 2707, 3005, 2842,...","[4, 2, 2, 4, 2, 1, 1, 2, 5, 2, 3, 2, 2, 2, 2, 2, 1, 2, 4, 1, 1, 5, 2, 3, 3, 4, 2, 3, 3, 2, 2, 2,...","[956709349, 956709350, 956709350, 956709350, 956709350, 956709453, 956709453, 956709453, 9567094...","[1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","[1721, 1883, 2858, 2997, 2710, 223, 3203, 1093, 3408, 2575, 2599, 2336, 3159, 2580, 2712, 2757, ...","[2428, 3438, 2376, 2492, 2826, 2827, 2572, 2683, 2699, 2706, 2707, 3005, 2842, 2555, 2975, 3285,...",888,399,489,1924,"[1357, 1924, 407, 410, 2723]","[1200, 1206, 2363, 1584, 2140]"
4879,6037,"[1882, 3508, 702, 1267, 2028, 3148, 858, 562, 912, 3543, 1221, 923, 2804, 2858, 2132, 1193, 318,...","[1, 3, 2, 5, 4, 4, 5, 4, 4, 3, 5, 4, 4, 4, 3, 4, 5, 3, 5, 4, 4, 4, 5, 3, 4, 5, 5, 4, 4, 4, 3, 4,...","[956708997, 956708997, 956708997, 956708997, 956708997, 956709118, 956709118, 956709118, 9567091...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,...","[1267, 2028, 3148, 858, 562, 912, 1221, 923, 2804, 2858, 1193, 318, 527, 3006, 908, 3196, 1225, ...","[1882, 3508, 702, 3543, 2132, 1250, 1299, 1237, 1949, 2728, 3362, 1233, 2289, 3270, 2890, 2919, ...",202,120,82,968,"[1248, 968, 2529, 3543, 589]","[1210, 1584, 2640, 2527, 2641]"
4880,6039,"[282, 111, 2067, 930, 1230, 3022, 947, 3088, 3133, 1294, 3421, 2804, 1269, 955, 1244, 1276, 2622...","[4, 4, 4, 4, 3, 4, 3, 5, 3, 3, 2, 4, 3, 4, 2, 4, 3, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 4, 3,...","[956705158, 956705158, 956705178, 956705202, 956705202, 956705281, 956705281, 956705281, 9567052...","[1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,...","[282, 111, 2067, 930, 3022, 3088, 2804, 955, 1276, 2791, 2300, 2396, 1028, 1197, 3548, 951, 1211...","[1230, 947, 3133, 1294, 3421, 1269, 1244, 2622, 2863, 3072, 1066, 935, 2671, 1014, 909, 1265, 21...",123,90,33,1254,"[1265, 783, 1254, 2174, 471]","[858, 1148, 912, 922, 1204]"


In [None]:
import os

data_name = "ml-1m"
task_name = "seq_history@5_candidate@5"
if not os.path.exists(f"data/{data_name}/{task_name}"):
    os.makedirs(f"data/{data_name}/{task_name}")

In [None]:
sequence_df_fliter.to_csv("./Dataset/ml-1m/processed/seq1_candidate5_history5", sep="\t", index=False)

In [56]:
sequence_df_fliter.columns

Index(['userid', 'itemid_seq', 'rating_seq', 'timestamp_seq', 'label_seq',
       'pos_seq', 'neg_seq', 'seq_length', 'pos_seq_length', 'neg_seq_length',
       'target', 'candidate', 'history'],
      dtype='object')

In [57]:
# 读取，eval恢复成list
sequence_df = pd.read_csv("./Dataset/ml-1m/processed/seq1_candidate5_history5", delimiter="\t")
for col in ['itemid_seq', 'rating_seq', 'timestamp_seq', 'label_seq',
       'pos_seq', 'neg_seq', 'candidate', 'history']:
    sequence_df[col] = sequence_df[col].apply(lambda x: eval(x))
    
sequence_df

Unnamed: 0,userid,itemid_seq,rating_seq,timestamp_seq,label_seq,pos_seq,neg_seq,seq_length,pos_seq_length,neg_seq_length,target,candidate,history
0,2,"[1198, 1210, 1217, 2717, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1213, 1945, 1207, 593, 3095, 3...","[4, 4, 3, 3, 5, 4, 5, 5, 5, 4, 4, 2, 5, 4, 5, 4, 5, 4, 5, 2, 5, 4, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5,...","[978298124, 978298151, 978298151, 978298196, 978298261, 978298372, 978298391, 978298413, 9782984...","[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1198, 1210, 1293, 2943, 1225, 1193, 318, 3030, 2858, 1945, 1207, 593, 3095, 3468, 1873, 515, 25...","[1217, 2717, 1213, 1090, 1084, 3654, 3735, 2728, 1968, 1103, 902, 2852, 2312, 1253, 1244, 3699, ...",128,73,55,1544,"[292, 3809, 3654, 1544, 1217]","[1370, 648, 368, 736, 2002]"
1,3,"[593, 2858, 3534, 1968, 1431, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 31...","[3, 4, 3, 4, 3, 4, 5, 5, 4, 5, 4, 5, 4, 4, 5, 5, 4, 4, 4, 5, 3, 1, 4, 5, 4, 5, 4, 4, 4, 4, 4, 2,...","[978297018, 978297039, 978297068, 978297068, 978297095, 978297095, 978297396, 978297419, 9782974...","[0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[2858, 1968, 1961, 1266, 1378, 1379, 3671, 590, 260, 1196, 2871, 1197, 1198, 3168, 1210, 1291, 2...","[593, 3534, 1431, 1580, 1261, 2617, 648, 3114, 2997, 3619, 1270, 1265, 1641, 3868]",51,37,14,2081,"[3534, 2081, 1431, 1261, 1270]","[1079, 1259, 2355, 3552, 104]"
2,5,"[2717, 908, 919, 1250, 356, 2858, 1127, 2188, 2683, 3051, 2997, 2770, 2355, 2908, 3786, 3016, 27...","[1, 4, 4, 5, 1, 4, 1, 1, 3, 2, 5, 4, 5, 4, 3, 4, 3, 2, 5, 1, 3, 3, 4, 3, 4, 3, 4, 2, 4, 4, 5, 4,...","[978241072, 978241072, 978241072, 978241112, 978241112, 978241390, 978241390, 978241390, 9782414...","[0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,...","[908, 919, 1250, 2858, 2997, 2770, 2355, 2908, 3016, 2599, 2959, 2333, 2580, 2318, 2390, 913, 27...","[2717, 356, 1127, 2188, 2683, 3051, 3786, 2759, 1093, 3113, 3408, 2428, 3409, 2716, 2607, 2734, ...",198,82,116,2029,"[2029, 2058, 581, 1719, 3418]","[1580, 1921, 1897, 2427, 551]"
3,6,"[3072, 2006, 912, 1043, 3534, 1210, 377, 17, 1441, 3685, 1296, 2081, 3699, 2100, 1674, 2802, 195...","[4, 4, 4, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 4, 4, 3, 5, 4, 5, 5, 3, 3, 4, 1, 4, 3, 4, 3, 3, 4, 3,...","[978236075, 978236122, 978236122, 978236219, 978236219, 978236219, 978236383, 978236383, 9782363...","[1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,...","[3072, 2006, 912, 1043, 3534, 17, 1441, 2081, 3699, 1674, 2802, 2406, 1101, 3501, 1088, 1188, 23...","[1210, 377, 3685, 1296, 2100, 1959, 2469, 3524, 2858, 1806, 2506, 2082, 2321, 590, 296, 3600, 20...",71,50,21,597,"[1210, 2082, 377, 1007, 597]","[3624, 3565, 3717, 920, 1569]"
4,8,"[1210, 111, 908, 3481, 1573, 480, 589, 393, 2028, 3265, 3418, 2692, 2916, 2571, 110, 377, 2427, ...","[4, 5, 5, 4, 4, 5, 5, 2, 5, 5, 3, 5, 5, 5, 5, 4, 5, 4, 5, 5, 5, 3, 2, 5, 4, 3, 3, 5, 4, 5, 5, 2,...","[978228789, 978228832, 978228882, 978228882, 978228960, 978228960, 978229138, 978229138, 9782291...","[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,...","[1210, 111, 908, 3481, 1573, 480, 589, 2028, 3265, 2692, 2916, 2571, 110, 377, 2427, 555, 3267, ...","[393, 3418, 2278, 2490, 733, 2006, 2600, 39, 1059, 1277, 3260, 3386, 3252, 2908, 2336, 3250, 300...",139,85,54,741,"[1277, 741, 1476, 2490, 2006]","[3528, 105, 1027, 1274, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4844,6035,"[902, 25, 898, 1007, 1193, 2827, 2628, 589, 3175, 1584, 2571, 32, 480, 1748, 2046, 1591, 1909, 2...","[3, 1, 1, 3, 5, 1, 5, 1, 5, 4, 5, 4, 2, 1, 4, 1, 2, 1, 3, 4, 1, 1, 5, 4, 4, 5, 4, 1, 2, 4, 1, 2,...","[956710846, 956710846, 956710846, 956710846, 956710879, 956711024, 956711024, 956711064, 9567110...","[0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,...","[1193, 2628, 3175, 1584, 2571, 32, 2046, 1372, 1527, 1580, 1356, 1396, 329, 2393, 1196, 1210, 12...","[902, 25, 898, 1007, 2827, 589, 480, 1748, 1591, 1909, 2916, 1653, 504, 1876, 3156, 788, 512, 20...",280,94,186,153,"[153, 504, 1480, 1839, 4]","[2108, 471, 1377, 1689, 2266]"
4845,6036,"[1721, 2428, 3438, 1883, 2376, 2492, 2826, 2827, 2858, 2572, 2683, 2699, 2706, 2707, 3005, 2842,...","[4, 2, 2, 4, 2, 1, 1, 2, 5, 2, 3, 2, 2, 2, 2, 2, 1, 2, 4, 1, 1, 5, 2, 3, 3, 4, 2, 3, 3, 2, 2, 2,...","[956709349, 956709350, 956709350, 956709350, 956709350, 956709453, 956709453, 956709453, 9567094...","[1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...","[1721, 1883, 2858, 2997, 2710, 223, 3203, 1093, 3408, 2575, 2599, 2336, 3159, 2580, 2712, 2757, ...","[2428, 3438, 2376, 2492, 2826, 2827, 2572, 2683, 2699, 2706, 2707, 3005, 2842, 2555, 2975, 3285,...",888,399,489,1924,"[1924, 357, 504, 1625, 3363]","[1200, 1206, 2363, 1584, 2140]"
4846,6037,"[1882, 3508, 702, 1267, 2028, 3148, 858, 562, 912, 3543, 1221, 923, 2804, 2858, 2132, 1193, 318,...","[1, 3, 2, 5, 4, 4, 5, 4, 4, 3, 5, 4, 4, 4, 3, 4, 5, 3, 5, 4, 4, 4, 5, 3, 4, 5, 5, 4, 4, 4, 3, 4,...","[956708997, 956708997, 956708997, 956708997, 956708997, 956709118, 956709118, 956709118, 9567091...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,...","[1267, 2028, 3148, 858, 562, 912, 1221, 923, 2804, 2858, 1193, 318, 527, 3006, 908, 3196, 1225, ...","[1882, 3508, 702, 3543, 2132, 1250, 1299, 1237, 1949, 2728, 3362, 1233, 2289, 3270, 2890, 2919, ...",202,120,82,968,"[435, 1407, 2919, 968, 2728]","[1210, 1584, 2640, 2527, 2641]"
4847,6039,"[282, 111, 2067, 930, 1230, 3022, 947, 3088, 3133, 1294, 3421, 2804, 1269, 955, 1244, 1276, 2622...","[4, 4, 4, 4, 3, 4, 3, 5, 3, 3, 2, 4, 3, 4, 2, 4, 3, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 3, 3, 4, 3,...","[956705158, 956705158, 956705178, 956705202, 956705202, 956705281, 956705281, 956705281, 9567052...","[1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,...","[282, 111, 2067, 930, 3022, 3088, 2804, 955, 1276, 2791, 2300, 2396, 1028, 1197, 3548, 951, 1211...","[1230, 947, 3133, 1294, 3421, 1269, 1244, 2622, 2863, 3072, 1066, 935, 2671, 1014, 909, 1265, 21...",123,90,33,1254,"[1254, 2946, 1230, 3133, 2087]","[858, 1148, 912, 922, 1204]"
