In [203]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [204]:
articles = pd.read_csv('./data/articles.csv')
customers = pd.read_csv('./data/customers.csv')
transactions_train = pd.read_csv('./data/sept_2020.csv')

In [205]:
print(transactions_train.shape)
transactions_train.head()

(798269, 8)


Unnamed: 0.1,Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,year,month
0,30990055,2020-09-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,777148006,0.013542,1,2020,9
1,30990056,2020-09-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,835801001,0.018627,1,2020,9
2,30990057,2020-09-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,923134005,0.012695,1,2020,9
3,30990058,2020-09-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,865929003,0.016932,1,2020,9
4,30990059,2020-09-01,0005ed68483efa39644c45185550a82dd09acb07622acb...,863646004,0.033881,1,2020,9


In [206]:
# rating as the number of purchases for the item
article_trans = pd.DataFrame(transactions_train.groupby(['customer_id', 'article_id']).count()['t_dat']).reset_index().rename(columns={'t_dat': 'article_rating'})
article_trans.head()

Unnamed: 0,customer_id,article_id,article_rating
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,568601043,1
1,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,794321007,1
2,0000757967448a6cb83efb3ea7a3fb9d418ac7adf2379d...,448509014,1
3,0000757967448a6cb83efb3ea7a3fb9d418ac7adf2379d...,719530003,1
4,000172a9c322560c849754ffbdfdb2180d408aa7176b94...,685814001,3


In [207]:
# transactions per item
trans_per_item = pd.DataFrame(transactions_train.groupby(['article_id']).count()['customer_id']).reset_index()
trans_per_item.head()

Unnamed: 0,article_id,customer_id
0,108775044,10
1,110065011,1
2,111565001,95
3,111586001,250
4,111593001,257


In [208]:
filtered_trans_per_item = trans_per_item[trans_per_item.customer_id >= 20]

In [209]:
popular_articles = filtered_trans_per_item.article_id.tolist()

In [210]:
# transactions per user
trans_per_user = pd.DataFrame(transactions_train.groupby(['customer_id']).count()['article_id']).reset_index()
trans_per_user.head()

Unnamed: 0,customer_id,article_id
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,1
1,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,1
2,0000757967448a6cb83efb3ea7a3fb9d418ac7adf2379d...,2
3,000172a9c322560c849754ffbdfdb2180d408aa7176b94...,3
4,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,10


In [211]:
filtered_trans_per_user = trans_per_user[trans_per_user.article_id >= 10]

In [212]:
busy_users = filtered_trans_per_user.customer_id.tolist()

In [213]:
# filter transactions to only popular articles and busy users
filtered_trans = article_trans[article_trans.article_id.isin(popular_articles)]
filtered_trans = filtered_trans[filtered_trans.customer_id.isin(busy_users)]

In [214]:
print(filtered_trans.shape)
filtered_trans.head()

(175483, 3)


Unnamed: 0,customer_id,article_id,article_rating
5,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,572998013,1
9,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,865929003,1
11,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,909869004,1
12,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,923134003,1
13,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,923134005,1


In [215]:
trans_matrix = filtered_trans.pivot_table(index='customer_id', columns='article_id', values='article_rating')
# replace NaN values with 0
trans_matrix = trans_matrix.fillna(0)
# display the top few rows
trans_matrix.head()

article_id,111565001,111586001,111593001,111609001,123173001,129085001,130035001,146730001,148033001,153115019,153115020,153115021,153115040,153115043,156224001,156224002,156227002,156231001,156231002,158340001,160442007,160442010,160442043,176754003,179123001,179208001,179950001,179950002,179950017,182909001,186262001,186262006,186266004,189616001,189616006,189616028,189616032,189626001,194037001,200182001,200182002,201219001,201219003,201219016,201219017,212629047,212629048,214844001,214844002,214844003,215589002,224606019,226959007,226959008,228257001,228257002,228257003,228257004,228257008,235596002,237347045,237347059,237347060,237347063,240561001,243937001,243937028,250099001,250099002,253448001,253448002,253448003,265630004,265630006,266875001,266875006,268305001,270375004,278811006,278811011,282832018,293433002,294008005,294008056,297067002,297078001,297078002,297078008,300024058,301227002,301227019,301656026,305931001,307239004,307239005,307239010,309864002,309864012,309864013,309864016,312878010,316085001,316085002,316441001,316441005,319906009,322017003,322017040,324946001,324963051,333323030,333323038,335037005,337991001,337991009,338016009,341129001,341186001,342060001,347200001,349080001,349301001,351484002,351484039,357751001,357751002,362219001,362219002,372860001,372860002,372860024,372860068,372860069,372860072,372860073,373506001,373506004,373506008,373506027,375917006,379963002,386678001,392938001,395127001,395127006,395127011,395127012,399061008,399061026,399061028,399061032,399061033,399087010,399087021,399087047,399087054,399136004,399136009,399136027,399136061,399201042,399223001,399223052,399256001,399256002,399256005,399256023,399256036,399256037,400285006,403448008,407653001,407653002,408571002,408875001,408875021,408875023,408875028,412370001,417951001,417951005,429313008,433414040,433414041,436083001,436083002,436261001,441386001,441386004,448509001,448509014,448509018,448509026,448509028,448509030,448509031,448509035,449570010,449570041,449570056,449570061,449570064,452618001,452818001,452818031,456163060,456163083,456163084,456163085,456163086,456163087,456163090,457466001,457466002,464297007,464297031,464679015,464679016,464927027,469039019,470789001,470789019,470789030,470789031,470985003,473954008,473954013,473954014,473954015,479167002,480093001,484398001,486639003,486639013,492892002,492897001,492897002,493103034,493103045,497640002,499334001,501722004,501914002,505882002,505882006,506098006,506098007,506098016,507909001,507909003,...,928111001,928152001,928152003,928157002,928158002,928158003,928171001,928206001,928210002,928216002,928331001,928331002,928332001,928351001,928446001,928461001,928461002,928461003,928719001,928723001,928802001,928808001,928813001,928820001,928824001,928835001,928839001,928845001,928857001,928858001,928892001,928892002,928898002,928900001,928905001,928907001,928910001,928912001,928915001,928917003,928995001,929001001,929042001,929165001,929165002,929226002,929226003,929275001,929388002,929397001,929508001,929508002,929511001,929591001,929594001,929599001,929603001,929673001,929689001,929695001,929744001,929745001,929872001,929938002,929980001,929980002,929980006,930058001,930058002,930350001,930350002,930380001,930380003,930533001,930533003,930578001,930829001,930866001,931282001,931335001,931419001,931437001,931696001,931720001,931729004,931769001,931769003,931869001,931981001,932107001,932238001,932243001,932243002,932243003,932365008,932383001,932426001,932578001,932642001,932698001,932798001,932798002,933032001,933032002,933032003,933214001,933327001,933373001,933404001,933408001,933408002,933409001,933409002,933476001,933706001,933802002,933882001,933885004,933889001,933891001,933910002,933928002,933932001,933963001,933989001,933989002,933990001,934053001,934054002,934072001,934114001,934128001,934135001,934211004,934212003,934296001,934312001,934312002,934380001,934536001,934727001,934727002,934793001,934835001,934873001,934873002,934981001,935092001,935196001,935357001,935541001,935547001,935548001,935618001,935635001,935635002,935689001,935694002,935694003,935787001,935840001,935840002,935858001,935858002,935892001,936012001,936012003,936057001,936057002,936099001,936217001,936282001,936428003,936610001,936622001,936862001,936979001,936990001,937052001,937066001,937138001,937249001,937252001,937466001,937466002,937915001,937915002,937915003,938182001,938190001,938190002,938208001,938667001,938804001,939492001,939501001,939503001,939812001,939927001,940098001,940532001,941005001,941005002,941005003,941005004,941005005,941326002,941454002,941658001,941976001,942058001,942058002,942064001,942069001,942090001,942187001,942188001,942596001,942733001,942863002,942937001,942941001,942955001,942955002,942955003,942955004,942955005,943097001,943212001,944241001,944506001,944989001,945995002,946095001,946282001,946387001,946748001,946764002,946764003,946795001,946827001,947060001,947509001,947934001,949198001,949551001,949551002,952267001,953763001,956217002
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1
0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed6396773839f6bf71a9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00077dbd5c4a4991e092e63893ccf29294a9d5c46e85010e95f2fc10bf9437a4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000fa1b80857fa40bf25990bc1b1b65afc63923a8e4b5762db34e4bb46032d29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000fb6e772c5d0023892065e659963da90b1866035558ec16fca51b0dcfb7e59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [216]:
u = list(filtered_trans[filtered_trans.customer_id=='0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94'].article_id)       
n = list(filtered_trans[filtered_trans.customer_id=='000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed6396773839f6bf71a9'].article_id)
len(list(set(n).intersection(u)))

0

In [217]:
def jaccard_index(user, other_users):
    '''
    input: G-graph, K-number of requested suggested links for each node
    output: .csv file consisting of n lines that have K comma separated values on each line indicating the node index of the
    nodes that are suggested new links that are currently not conncted to that node.
    '''
    similarities = []
    user = list(user.index)[0]
    other_users = list(other_users.index)
    
    for other_user in other_users:
        
        # neighbors of node u
        articles_u = list(filtered_trans[filtered_trans.customer_id==user].article_id)       
                
        # neighbors of node n
        articles_n = list(filtered_trans[filtered_trans.customer_id==other_user].article_id)
        
        # keep nodes that appear in both neighbor lists and get length of remaining nodes
        same_nodes = len(list(set(articles_n).intersection(articles_u)))

        # we divide the above number with the total number of neighbors
        total_neighbors = len(articles_u)

        # append this to the list (common neighbors, node j)
        similarities.append(same_nodes/total_neighbors)
    
    return similarities

In [218]:
from sklearn.metrics.pairwise import cosine_similarity
import operator

# function to find most similar customers (k most similar)
def similar_customers(customer_id, matrix, k=3, sim='cosine'):
    # create a df of just the current user
    user = matrix[matrix.index == customer_id]
    
    # and a df of all other users
    other_users = matrix[matrix.index != customer_id]
    
    if sim == 'cosine':
        # calc cosine similarity between user and each other user
        similarities = cosine_similarity(user,other_users)[0].tolist()
    
    elif sim == 'jaccard':
        similarities = jaccard_index(user, other_users)
    
    # create list of indices of these users
    indices = other_users.index.tolist()
    
    # create key/values pairs of user index and their similarity
    index_similarity = dict(zip(indices, similarities))
    
    # sort by similarity
    index_similarity_sorted = sorted(index_similarity.items(), key=operator.itemgetter(1))
    index_similarity_sorted.reverse()
    
    # grab k users off the top
    top_users_similarities = index_similarity_sorted[:k]
    users = [u[0] for u in top_users_similarities]
    scores = [u[1] for u in top_users_similarities]
    
    return users, scores
    
current_user = '0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94'

# try it out
similar_customer_indices, scores = similar_customers(current_user, trans_matrix)
print(similar_customer_indices)
print(scores)

['2a7da7083a8f2c313fe1832034823ab67873d29a26e494c8b5aad53db79eb66c', '02d3b2db21e8b096b453c5c8baa840adace232f7b72feabb1f0db460fd1b415a', 'ccf8134ab4b4fe45d4f58d9262e36f11170ac7f4c8c286db562c0b415f67bb7a']
[0.34403123102809335, 0.33968311024337877, 0.28097574347450816]


In [182]:
# load vectors for similar users
similar_customers = trans_matrix[trans_matrix.index.isin(similar_customer_indices)]
# calc avg ratings across the 3 similar users
similar_customers = similar_customers.mean(axis=0)
# convert to dataframe so its easy to sort and filter
similar_users_df = pd.DataFrame(similar_customers, columns=['mean'])
similar_users_df

# load vector for the current user
user_df = trans_matrix[trans_matrix.index == '0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94']
# transpose it so its easier to filter
user_df_transposed = user_df.transpose()
# rename the column as 'rating'
user_df_transposed.columns = ['rating']
# remove any rows without a 0 value. Article not purchased yet
#user_df_transposed = user_df_transposed[user_df_transposed['rating']==0]
# generate a list of articles the user has not purchased
articles_unpurchased = user_df_transposed.index.tolist()
articles_unpurchased

# filter avg ratings of similar users
similar_users_df_filtered = similar_users_df[similar_users_df.index.isin(articles_unpurchased)]
# order the dataframe
similar_users_df_ordered = similar_users_df_filtered.sort_values(by=['mean'], ascending=False)
similar_users_df_ordered

df = similar_users_df_ordered.merge(user_df_transposed, on='article_id', how='left')
df = df.reset_index()
df

item_rating = []
for item_i in range(len(df.article_id)):
    sum_neighbors_numerator = []
    sum_neighbors_denominator = []
    avg_rating_u = user_df_transposed.rating.mean()
    prediction = 0
    for n in range(len(similar_customer_indices)):
        test = trans_matrix[trans_matrix.index.isin([similar_customer_indices[n]])].T
        test.columns=['rating']
        test = test.reset_index()

        sum_neighbors_numerator.append(scores[n] * (test[test.article_id==test.article_id[item_i]].rating[item_i] - np.mean(test.rating)))
        sum_neighbors_denominator.append(scores[n])

    prediction += avg_rating_u + (np.sum(sum_neighbors_numerator)/np.sum(sum_neighbors_denominator))
    item_rating.append(prediction)

df['item_rating'] = item_rating
similar_users_df_ordered = df.sort_values(by='item_rating', ascending=False)

similar_users_df_ordered

# grab the top n articles   
top_n_articles = similar_users_df_ordered.head(items)
top_n_articles_indices = top_n_articles.article_id.tolist()

Unnamed: 0,article_id,mean,rating,item_rating
488,891322004,0.0,0.0,2.494987
3755,752689001,0.0,0.0,0.872403
2489,927530006,0.0,0.0,0.711869
4902,599580014,0.0,0.0,0.711869
6237,809411004,0.0,0.0,0.702855
...,...,...,...,...
2218,942955004,0.0,0.0,-0.001378
2217,942955002,0.0,0.0,-0.001378
2216,941005001,0.0,0.0,-0.001378
2215,941005002,0.0,0.0,-0.001378


In [219]:
# make article recommendations for that customer
def recommend_item(user_index, similar_customer_indices, scores, matrix, items=5):
    
    # load vectors for similar users
    similar_customers = matrix[matrix.index.isin(similar_customer_indices)]
    # calc avg ratings across the 3 similar users
    similar_customers = similar_customers.mean(axis=0)
    # convert to dataframe so its easy to sort and filter
    similar_users_df = pd.DataFrame(similar_customers, columns=['mean'])
    
    # load vector for the current user
    user_df = matrix[matrix.index == user_index]
    # transpose it so its easier to filter
    user_df_transposed = user_df.transpose()
    # rename the column as 'rating'
    user_df_transposed.columns = ['rating']
    # remove any rows without a 0 value. Article not purchased yet
    #user_df_transposed = user_df_transposed[user_df_transposed['rating']==0]
    # generate a list of articles the user has not purchased
    articles_unpurchased = user_df_transposed.index.tolist()
    
    # filter avg ratings of similar users
    similar_users_df_filtered = similar_users_df[similar_users_df.index.isin(articles_unpurchased)]
    # order the dataframe
    similar_users_df_ordered = similar_users_df_filtered.sort_values(by=['mean'], ascending=False)
    
    df = similar_users_df_ordered.merge(user_df_transposed, on='article_id', how='left')
    df = df.reset_index()
    
    item_rating = []
    for item_i in range(len(df.article_id)):
        sum_neighbors_numerator = []
        sum_neighbors_denominator = []
        avg_rating_u = user_df_transposed.rating.mean()
        prediction = 0
        for n in range(len(similar_customer_indices)):
            test = trans_matrix[trans_matrix.index.isin([similar_customer_indices[n]])].T
            test.columns=['rating']
            test = test.reset_index()

            sum_neighbors_numerator.append(scores[n] * (test[test.article_id==test.article_id[item_i]].rating[item_i] - np.mean(test.rating)))
            sum_neighbors_denominator.append(scores[n])
        
        prediction += avg_rating_u + (np.sum(sum_neighbors_numerator)/np.sum(sum_neighbors_denominator))
        item_rating.append(prediction)
    
    df['item_rating'] = item_rating
    similar_users_df_ordered = df.sort_values(by='item_rating', ascending=False)
    
    # grab the top n articles   
    top_n_articles = similar_users_df_ordered.head(items)
    top_n_articles_indices = top_n_articles.article_id.tolist()
    
    # lookup these articles in the other dataframe to find names
    #article_information = articles[articles['article_id'].isin(top_n_articles_indices)]
    
    df = df.dropna(axis=0)
    rmse = np.sqrt((1/df.shape[0]) * np.sum((df.item_rating-df.rating)**2))
    
    # maybe change this to return the list for each customer
    #return article_information #items
    return top_n_articles_indices, rmse

# try it out
customerx_recommend, customerx_rmse = recommend_item('0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94', similar_customer_indices, trans_matrix)
print(customerx_recommend)
print(customerx_rmse)

TypeError: recommend_item() missing 1 required positional argument: 'matrix'

In [193]:
customerx_purchases = filtered_trans[filtered_trans.customer_id=='0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94'].article_id.tolist()
customerx_purchases

[572998013, 865929003, 909869004, 923134003, 923134005, 935858001]

In [194]:
# MAP@5
len(set(customerx_purchases).intersection(set(customerx_recommend)))

0

number of recommendations that are relevant for this customer = 3

number of items we recommended = 5

MAP@5 = 3/5

# Calculate MAP@5

## Using Cosine

Having found that we can successfully recommend 5 items for an individual customer, let's loop through our customers list and find the precision for each customer. This will allow us to find the average of those precisions and see how well the system performs. We can then adjust our similarity scores to see if there is a similarity score that performs better.

In [195]:
list(trans_matrix.head().index)

['0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37e011580a479e80aa94',
 '000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed6396773839f6bf71a9',
 '00077dbd5c4a4991e092e63893ccf29294a9d5c46e85010e95f2fc10bf9437a4',
 '000fa1b80857fa40bf25990bc1b1b65afc63923a8e4b5762db34e4bb46032d29',
 '000fb6e772c5d0023892065e659963da90b1866035558ec16fca51b0dcfb7e59']

In [224]:
# map@5 for the filtered list of september 2020 customers
map_5 = []
customer_rmse = []
for i in list(trans_matrix.head().index):
    similar_customer_indices, scores = similar_customers(current_user, trans_matrix)
    customerx_recommend, customerx_rmse = recommend_item(i, similar_customer_indices, scores, trans_matrix)
    customerx_purchases = filtered_trans[filtered_trans.customer_id==i].article_id.tolist()
    map_5.append(len(set(customerx_purchases).intersection(set(customerx_recommend)))/5)

In [226]:
np.mean(customerx_rmse)

0.08672670440428748

In [225]:
np.mean(map_5)

0.0

## Using Jaccard

In [96]:
# map@5 for the filtered list of september 2020 customers
map_5 = []
for i in list(trans_matrix.head().index):
    similar_customer_indices = similar_customers(i, trans_matrix, sim = 'jaccard')
    customerx_recommend = recommend_item(i, similar_customer_indices, trans_matrix)
    customerx_purchases = filtered_trans[filtered_trans.customer_id==i].article_id.tolist()
    map_5.append(len(set(customerx_purchases).intersection(set(customerx_recommend)))/5)

In [97]:
np.mean(map_5)

0.4800000000000001