In [52]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse as sps
%matplotlib inline

#train_final.csv - the training set of interactions
train_final = pd.read_csv('input/train_final.csv', delimiter = "\t");

#tracks_final.csv - supplementary information about the items
tracks_final = pd.read_csv('input/tracks_final.csv', delimiter = "\t");

#playlists_final.csv - supplementary information about the users
playlists_final = pd.read_csv('input/playlists_final.csv', delimiter = "\t");

#target_playlists.csv - the set of target playlists that will receive recommendations
target_playlists = pd.read_csv('input/target_playlists.csv');

#target_tracks.csv - the set of target items (tracks) to be recommended
target_tracks = pd.read_csv('input/target_tracks.csv');

#Let's have a look at the train data. 
train_final.head()

Unnamed: 0,playlist_id,track_id
0,3271849,2801526
1,5616275,727878
2,11267488,2805283
3,10103900,1515105
4,3836898,2945623


In [53]:
#Now we need to remove some redundant stuff. 

#We will remove all song which are not: 1. occurring more than 10 times in train_final and 2. not in the target_tracks. 

popularity = train_final.groupby(by="track_id").playlist_id.nunique().to_frame()

#remove index name
popularity.reset_index(level = 0, inplace = True)

#Rename the columns
popularity.columns = ['track_id','occurrences']

#Remove all targeted tracks - TESTED, working as expected
tracks_relevant = popularity[~popularity['track_id'].isin(target_tracks['track_id'])]

#Remove tracks occurring less than 10 times
tracks_relevant = tracks_relevant[tracks_relevant['occurrences'] > 10]

#Add the targeteted tracks back again
tracks_relevant = pd.concat([tracks_relevant, target_tracks])

tracks_relevant.shape


(41756, 2)

In [54]:
#We will remove all playlists which are not: 1. containing more than 5 tracks and 2. not in the target_playlists.

playlists_sizes = train_final.groupby(by="playlist_id").track_id.nunique().to_frame()

#remove index name
playlists_sizes.reset_index(level = 0, inplace = True)

#Rename the columns
playlists_sizes.columns = ['playlist_id','size']

print(playlists_sizes.shape)

#Remove all targeted playlists TESTED works
playlists_relevant = playlist_sizes[~playlist_sizes['playlist_id'].isin(target_playlists['playlist_id'])]

#Remove playlists of size less than 10
playlists_relevant = playlists_relevant[playlists_relevant['size'] > 10]

#Add the targeteted playlists back again
playlists_relevant = pd.concat([playlists_relevant, target_playlists])

print(playlists_relevant.shape)


#WORKING! 

(45649, 2)
(23618, 2)


In [55]:
#Now we have to create a set of the relevant train data. 


print(train_final.shape)

train_relevant = train_final[train_final['track_id'].isin(tracks_relevant['track_id'])]

print(train_relevant.shape)

train_relevant = train_relevant[train_final['playlist_id'].isin(playlists_relevant['playlist_id'])]

print(train_relevant.shape)


(1040522, 2)
(731373, 2)
(667033, 2)


  # Remove the CWD from sys.path while we load stuff.


In [157]:
item_playlist_matrix = np.zeros([playlists_relevant.shape[0]+1, tracks_relevant.shape[0]+1]) 

In [158]:
#Very large matrix filled with zeros.
#Old size before removing used to be 5.756.100.000
#New size: 986.193.208
item_playlist_matrix.size

986258583

In [165]:
#If we translate each track_id to a track_index which will serve as matrix index, we can save a lot of time. 
#Same goes for playlist_id --> playlist_index. 


#We need a way to get from track_id to index in O(1).
#Let's create a dictionary

track_indexes = {}
counter = 1; 
for track_id in tracks_relevant['track_id']:
    item_playlist_matrix[0][counter] = track_id
    track_indexes[track_id] = counter
    counter += 1;
    
#and a way to get from playlist_id to index in O(1)


playlist_indexes = {}
counter = 1; 
for playlist_id in playlists_relevant['playlist_id']:
    item_playlist_matrix[counter][0] = playlist_id
    playlist_indexes[playlist_id] = counter
    counter += 1;

#felsökning
#print(playlists_relevant[playlists_relevant['playlist_id']==1515105])

#Now, in order to get a playlist_index we just go: playlist_index = playlist_indexes[playlist_id]
    
#How do we get it back in the end? We simply keep the tracks_relevant and playlist_relevant and access them by index in the end. 
# tracks_relevant[playlist_index] = playlist_id



7912
8268
8900
8954
9020
9213
9273
9346
10149
10510
12651
14277
14285
15387
15688
16792
16859
17223
18618
18755
18766
19155
19623
20160
20945
21052
21452
21567
22269
23041
23247
23421
23785
23956
24097
24532
25168
25606
25806
26469
27685
27904
28537
29407
29448
30606
30747
31495
32522
32543
32903
33795
33901
34706
35761
35966
38022
38226
38505
38923
39637
40086
40954
41392
41685
42736
42982
43169
43234
43823
44158
44442
44591
44987
45231
46019
46330
48673
48916
48992
49676
49873
50780
52080
52474
52576
53032
55235
55564
56885
57254
57292
57486
57779
59107
59872
60080
60284
61092
61413
63151
63599
63989
64097
64662
64948
65628
66866
67505
67994
69218
70026
71896
71897
75623
75776
76111
76434
76548
76572
76774
77382
77519
78206
81048
81195
81620
82704
83499
83713
85090
86884
88490
89083
89202
89624
91070
91768
92694
93140
93197
94364
94515
98334
99527
100428
100503
101075
102158
103062
103464
104170
104834
105578
107486
110530
110565
110754
110899
111023
112250
112455
112705
112802
11304

3119725
3120940
3121302
3121675
3122042
3122112
3123553
3126264
3126413
3126575
3126605
3126693
3128020
3129594
3130062
3130339
3131131
3131242
3132612
3133491
3133720
3135366
3135892
3136255
3137337
3137426
3138234
3139304
3140456
3143429
3143452
3143767
3144136
3144910
3145919
3146868
3147219
3147224
3147616
3147886
3148773
3150060
3150192
3151352
3151628
3151726
3152454
3152756
3153033
3153165
3153181
3153862
3153895
3153940
3154210
3155219
3157631
3157750
3157943
3159890
3161448
3161524
3161909
3161919
3163301
3163659
3163794
3163989
3164911
3165524
3166420
3166545
3166638
3167530
3167682
3167722
3167809
3167871
3168319
3168355
3168363
3168946
3169214
3172664
3172919
3173156
3174002
3174283
3174578
3174885
3174969
3175811
3177065
3177486
3177662
3177984
3178641
3180575
3180709
3180866
3180936
3181042
3181277
3181411
3181447
3181657
3181807
3181816
3183341
3183553
3186657
3187034
3187860
3190491
3193196
3193272
3194021
3194321
3194789
3195142
3195206
3195364
3195381
3195769
3195804


4182023
4182106
4182120
4182263
4182295
4182310
4182327
4182335
4182339
4182683
4182798
4183441
4183447
4183872
4185330
4185493
4186272
4187664
4187801
4188071
4188179
4188452
4188530
4189976
4190022
4190164
4192683
4193864
4194425
4194702
4194986
4195516
4196374
4196973
4197105
4197616
4197772
4198693
4199324
4199453
4202124
4202927
4202962
4202978
4203601
4204771
4205163
4205256
4205853
4206389
4206922
4209660
4211601
4211692
4213326
4217039
4217207
4217238
4218176
4218236
4218393
4220988
4221823
4223441
4223771
4223834
4223866
4224962
4225579
4225722
4226921
4230778
4232784
4232864
4233140
4233442
4233968
4235300
4235853
4236613
4238424
4239129
4240266
4240338
4240534
4242205
4242342
4242493
4243383
4243748
4244826
4245612
4246283
4246291
4247027
4249497
4249690
4251023
4251028
4251580
4252088
4252360
4252781
4253278
4254115
4255018
4255034
4255498
4255561
4257160
4257290
4259771
4260868
4261052
4262249
4262582
4263265
4264389
4266202
4266281
4267901
4268818
4272086
4272151
4273085


5298893
5299098
5299114
5299919
5300088
5301455
5301744
5301849
5302289
5302466
5302585
5302862
5305354
5305401
5305605
5305824
5306441
5306498
5306511
5307199
5307646
5308162
5308410
5310061
5310369
5310372
5310540
5310549
5312407
5314963
5315010
5315605
5316662
5317392
5318272
5318724
5318796
5318840
5319003
5320500
5320804
5321136
5321144
5321446
5321925
5323592
5324447
5324745
5328296
5328430
5329784
5330020
5330306
5330636
5330924
5331425
5332816
5333153
5333526
5333547
5333770
5336553
5337276
5341530
5341911
5342305
5342453
5342532
5343538
5343956
5344611
5346249
5346546
5346663
5346718
5347546
5347609
5349236
5349386
5349845
5350682
5350684
5352494
5353375
5355682
5356557
5356900
5357612
5357762
5357969
5358101
5358175
5358563
5358724
5359156
5363219
5364791
5365077
5365148
5365885
5366034
5367548
5367672
5368575
5369095
5369758
5373038
5373106
5373340
5375660
5377707
5377888
5378606
5379113
5379114
5380371
5380832
5381247
5381658
5382835
5382923
5382941
5383034
5383081
5386002


6285528
6286190
6287067
6287945
6288566
6289295
6289523
6290383
6292904
6293883
6294649
6294661
6298028
6298094
6298217
6298327
6298697
6300979
6301114
6301273
6301532
6302118
6303366
6304526
6305586
6306659
6306978
6307068
6308223
6308597
6309215
6311122
6311175
6312615
6312683
6313711
6313731
6314360
6314846
6315881
6320123
6320579
6321118
6321369
6321662
6322016
6322040
6322052
6322083
6322093
6322099
6322134
6322197
6322235
6322296
6323248
6323474
6323682
6325013
6325563
6327190
6327274
6328545
6328906
6330359
6330642
6331847
6331986
6332938
6333441
6333525
6335185
6336088
6336196
6337292
6338321
6338394
6338623
6339545
6340798
6341947
6343050
6343118
6343206
6345150
6345801
6347969
6347992
6351533
6353679
6354404
6355624
6355944
6356647
6358890
6359216
6359986
6362312
6362538
6363137
6363261
6365284
6367082
6371698
6371741
6372409
6372507
6372612
6373157
6375251
6376057
6376061
6376835
6378859
6378941
6379908
6381369
6382472
6383859
6383968
6384034
6384413
6384849
6385452
6385952


7337916
7338058
7339013
7339402
7339569
7340480
7340629
7340951
7342145
7342366
7342755
7342918
7342919
7343410
7344080
7345591
7345826
7347102
7348052
7348848
7349302
7349364
7349511
7349562
7352534
7354861
7356171
7357176
7357415
7357675
7357967
7358153
7358788
7360274
7361357
7363641
7365591
7367161
7368174
7368241
7368395
7369471
7369599
7370034
7371887
7372283
7372301
7372732
7374987
7375321
7375425
7375784
7375815
7375882
7376652
7379198
7380122
7380358
7380394
7382788
7383997
7385857
7385886
7385966
7386612
7387071
7388085
7388321
7388973
7389259
7390429
7390594
7390723
7391060
7392314
7393665
7393720
7393739
7394211
7394616
7394625
7394730
7394781
7394810
7395322
7396068
7396292
7396697
7397138
7397143
7397881
7398180
7398964
7400130
7400336
7401638
7401857
7401863
7401961
7402173
7402793
7402911
7404219
7404414
7404701
7405284
7405294
7406659
7406748
7406982
7407142
7407306
7407484
7409774
7409852
7410072
7410246
7416040
7416534
7416938
7417664
7418463
7418730
7418900
7418997


8216996
8217384
8217605
8217831
8218058
8218863
8221182
8221240
8221541
8221700
8221863
8221866
8222143
8222821
8223238
8223687
8224587
8224882
8228354
8228473
8228812
8228855
8229043
8230827
8230864
8230912
8231396
8232696
8236922
8237337
8238714
8239398
8239480
8240297
8240996
8241020
8243474
8244823
8245008
8245719
8245878
8245989
8247705
8248398
8249021
8249087
8249818
8249924
8250341
8250412
8250675
8251256
8251786
8252100
8252384
8252856
8253643
8254194
8254258
8254332
8254356
8254458
8254623
8256622
8256910
8257346
8257351
8258482
8258539
8261277
8261280
8261285
8261762
8263226
8263602
8263645
8263657
8263771
8264095
8266890
8266931
8267860
8268977
8269119
8269193
8269196
8269348
8269366
8269397
8269739
8270647
8270705
8270840
8271736
8272225
8272470
8272665
8272812
8273376
8275806
8275862
8275863
8275885
8276388
8276413
8278537
8278581
8280111
8280447
8281686
8283341
8283352
8283678
8283717
8283732
8284345
8284456
8284738
8285101
8285394
8285509
8286611
8287315
8287344
8287527


8974722
8975768
8978420
8978761
8981466
8981575
8981732
8982467
8982472
8982787
8982894
8983016
8983329
8983626
8984318
8985726
8986196
8986603
8987054
8987513
8988661
8988779
8989038
8989962
8990019
8990835
8991313
8991900
8992015
8992145
8993304
8994289
8994496
8994657
8994704
8996429
8996664
8997796
8999989
9000220
9000667
9001078
9001472
9001514
9002153
9003836
9004141
9004898
9005431
9005616
9006625
9006806
9006815
9007403
9008167
9008321
9008392
9008472
9009189
9009193
9009274
9009995
9010609
9010871
9011721
9011973
10012592
10012774
10013199
10015686
10016763
10019336
10019450
10019620
10019684
10019836
10020145
10020262
10021173
10022072
10022312
10022949
10023016
10023851
10023943
10027431
10027641
10027967
10028553
10029354
10030811
10031520
10031993
10032112
10032695
10033372
10033829
10034005
10035328
10035776
10037181
10037198
10037561
10038060
10038146
10038179
10038665
10039179
10039480
10040398
10040474
10040543
10040550
10040559
10040582
10040638
10040664
10041431
1004

10821922
10822650
10823307
10823531
10824233
10825207
10825212
10826148
10826417
10826933
10828222
10828753
10830481
10831058
10831610
10831624
10831720
10834705
10835168
10835682
10836203
10836598
10837147
10837350
10837880
10838631
10839283
10839450
10839837
10840064
10840426
10841248
10841674
10841834
10841885
10841964
10842280
10842812
10843252
10843332
10843829
10844060
10844695
10845460
10845502
10845755
10845893
10846235
10847020
10847023
10847762
10847766
10847814
10848987
10849170
10851722
10851764
10851943
10852241
10853177
10853458
10854203
10855569
10855607
10855674
10855801
10856342
10857357
10857576
10858541
10858673
10858775
10859103
10859136
10861057
10862614
10863297
10864050
10864098
10864109
10864145
10864348
10864552
10864737
10864774
10864825
10865017
10865275
10865692
10866883
10867725
10868281
10868449
10868868
10869336
10870677
10873106
10873367
10873625
10873628
10873635
10873921
10874104
10874146
10874635
10874725
10874733
10874977
10875355
10875903
10875906
1

5986784
4813887
5257246
8727207
10293391
6535385
5539635
8267113
17136
6127544
8542950
7578244
3092775
7445528
10203268
8407326
6926802
3210471
99608
4654378
6284697
7654797
3175120
1237120
4992377
11567923
2825522
7047552
6322279
10701030
3718052
3010951
3298268
11511853
11132162
1316704
8071471
8289475
8327408
3425837
122539
3485083
10808964
10585603
5591947
7075085
208633
10884430
6712708
5548630
8479915
7541916
3895464
7641288
6992939
7811127
7709624
3412034
5063302
8657731
1075540
6154149
2469861
8522392
10500655
8959590
5195699
3913158
8852004
577464
155151
10512400
5202941
10241476
4381991
5921943
7541798
6553236
3064774
184872
10989258
10839811
1577340
7423034
4761531
6081419
5332741
5813432
1245594
11450729
8412441
10660705
10595985
5939946
6424137
3005086
10216267
5973722
5144523
6616699
5881475
1713955
4329805
1689729
8913826
5130975
10154315
8139717
4098713
10313138
5433732
5234120
6900041
3099269
3492054
4067488
5500619
7968960
5823281
8166117
6093188
7699704
5797466
40981

72826
7739542
8851097
11396283
4352112
10676056
5449729
11158796
10161524
11404827
3306386
8450933
7826813
7270644
11436945
5829328
6311549
5654462
3435196
11639137
5039186
6888371
4272590
6243192
10914066
5386960
8249540
6480390
10092330
7597559
7333844
7541402
6405958
7792713
7482647
8993635
7948824
6001088
8419128
5760678
4706927
2741177
3460809
5558984
5286415
10178078
8714906
3617187
65535
1370133
5123555
11356594
4372571
892769
10645352
8960528
11571801
8378295
8273887
8987911
7468408
7786905
381957
10316130
8657812
7541856
8652652
10411812
11365998
1748497
3830724
10657098
7266479
5557957
6337235
56379
6231219
1020298
5185518
6322058
307419
2799753
10919241
4024294
10950773
4718310
7542183
8086140
3992613
8140492
3449610
3106386
11575442
10183397
10938607
4023906
8223708
10699791
11689258
10467175
4735981
8410886
11352591
6589080
11705758
8344555
11114146
10050014
220447
11537603
3103711
10668173
8843580
45305
7541684
566005
595999
8538298
10303447
5285242
7579406
7635037
435768

10122259
1387004
554048
6899985
6440161
10023840
6238756
7406991
1719170
2725210
3994312
4539781
7279498
5710198
6296936
1050636
10121899
876283
5719886
4286768
11042943
8696606
11210661
8825266
5016702
6578865
7841777
124650
8549423
8357674
10250550
8172288
10287297
8414985
3321133
6720338
4014894
5961266
10023275
8746018
3058448
7231780
10633849
11137098
10118489
5827516
10301135
1778307
439580
2607412
10175594
5200235
170514
6601487
2467491
7853801
6521158
2986135
8698555
4979851
5503609
6736510
3821416
71636
2966044
3125605
6088337
4739561
6689375
8636
3090630
7433057
11507549
3504432
10466744
3660337
5661608
2618341
11038970
7795943
4501994
1373647
6148455
1625176
7463251
2931449
10444299
6579375
10914338
4716527
10349316
9003605
6603896
7272395
78538
5745657
3415990
4308736
5770839
8425572
5909081
2806165
1958344
5764824
8758040
5224316
5864606
7637631
5903226
10342185
10393195
11662504
6106474
4630475
10212944
5705023
4963059
11746230
7751126
5850290
3006493
8991797
3357388
7541

10446504
1151087
7598295
6986303
8388310
8988970
3873786
11534833
4054294
7541847
3383824
8389876
11513050
7686187
10090754
10744628
3162171
4641507
5273517
5465989
1931489
11074304
3070276
10568956
6925864
6122904
97534
6968912
8552431
3316385
6570637
10283261
2963978
8694484
51884
7123795
7013679
8863325
7541432
7074613
5836512
11761555
6033826
29654
5249673
10377969
3564326
4230919
6899324
10259525
3412048
7182357
1523962
4732772
2939791
7826552
5709934
2949522
8471775
4132075
267066
11170978
5432645
8673040
8653186
4845778
7541853
10848538
4661614
8201614
8614289
10401885
3164307
3400410
8779132
5633853
3622974
5202936
10109165
11555313
8314794
5648631
5875779
3056736
8671207
4580352
3282069
8533862
5057011
5383888
3267012
11013790
10829603
11483074
4936059
3023211
11408325
8672809
5690373
5050183
3484971
3095683
5078143
8455801
7221661
4312266
10230343
7329149
8412938
5015255
10856669
3227684
10155147
11374916
1180364
10163963
7065332
8555075
10465526
11724380
6539806
10321023
457

2363592
6278540
5444906
7704885
11140231
4654273
3502723
10707134
6728326
10483130
6756218
11759272
6289089
11022017
11188968
11037786
5558667
10301818
8756186
10127300
4914042
7271254
11748030
10213245
10175563
7923893
5269432
4926617
4109280
8315050
7445534
4301772
7542191
8863774
10559307
11394565
6273030
8365534
8596601
2757091
10815733
11192511
6519754
8667435
11565527
3286580
11565005
11618009
54519
3180116
77604
6649528
10282701
10159265
11572750
11315596
6946223
8584511
10487758
3706432
5969846
3285599
8388316
3756446
10983823
8742958
4168821
8598286
11666276
11160166
569396
7025708
10136348
8318934
4818416
7176805
3074126
3829067
11171402
1509783
5824675
7060514
5425784
11378849
7046502
8935435
8789754
7934420
10147010
8913254
10916607
11210115
4277870
7994233
5292644
3947213
8069972
7520558
4459845
5644510
16226
10851742
6525629
11129252
7285429
11001015
1679853
122952
5712083
6138768
7021909
8050086
8638166
10469544
8544519
3241135
3226506
3439047
7987250
7355341
11110574
77

8744621
1335054
8872646
5413157
371058
8363882
11758194
10510765
3978497
2154567
7366798
8362706
8549787
5034996
4952467
4885189
7559717
3244447
6925084
8607421
5506406
10969772
4822866
6891048
4286920
10239399
8481784
4562008
11192560
3692768
10761289
7909822
10981107
956417
8453941
5320454
10979256
11146379
783410
10831619
11592656
6484300
3344503
8641718
8150491
7777423
3606313
6092408
662010
7567274
10636342
8636029
3149495
487691
3016011
937357
6683436
8019070
6668248
2034618
10026813
8061956
1819629
6906905
8697740
10968123
6703253
5142568
5465228
466352
8921543
4859032
6443452
1059160
3000947
6096258
6914723
8696073
7826038
4782352
5056106
1187208
7032979
8688869
6505433
7645626
6329454
560299
10847022
11603028
8087794
3827810
8188566
4051631
6014412
6831507
151951
3325846
8333537
5758965
3713906
597917
8064752
3630881
4843239
3803931
5429939
7032999
5367236
5638390
10614182
6803197
2933418
10942831
6695704
6947268
7946701
8228479
5653596
5783223
8065100
8976982
7548053
8878019


2766469
5417913
8452050
3116280
4860456
6817143
2262739
7652105
139699
5718107
745171
1460623
11718092
10377341
4184345
956940
11150905
11765405
11702703
8879184
1034941
8587833
9005278
7647354
8837139
7501976
241061
2998602
6571262
3652791
10322757
4120400
2209491
2631287
5907134
10946470
8430420
10384798
130367
1117923
6986194
10831622
153446
5891002
5324059
3879398
3024806
8228631
3874112
8778744
7309105
7776124
3269058
8325110
4457903
10919782
5838102
10323270
491966
7553218
11735520
3987881
7316888
4984721
6250989
3767189
5647012
10204775
8040314
3699174
10787124
11180160
5219763
6732443
4954102
8399807
2125331
3545736
6558365
16022
10917442
6493365
7628242
7950868
7541613
11661975
3460204
6522289
6470192
5325173
10308681
7243751
10154389
8540479
11064018
5948708
5851347
341899
5646908
3458011
6311186
53329
2922195
10808453
4738712
2551515
8750386
6066009
4952500
11591591
11170990
10606661
8803200
4679171
7359411
11270242
5625726
10485762
2776785
2970273
5435411
8681225
72489
3097

In [166]:
#Lets build that matrix. 

interactions = train_relevant.as_matrix()
for row in interactions:
    #Lets get the info
    playlist_id = row[0]
    track_id = row[1]
    
    #Now lets get the proper indexes. 
    playlist_index = playlist_indexes[playlist_id]
    track_index = track_indexes[track_id]
    
    #And now lets add it to the matrix
    item_playlist_matrix[playlist_index][track_index] = 1
    

print(item_playlist_matrix)


[[  0.00000000e+00   3.60000000e+02   1.37600000e+03 ...,   2.73921300e+06
    2.22864600e+06   2.26546300e+06]
 [  7.91200000e+03   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  8.26800000e+03   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 ..., 
 [  1.13695460e+07   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  7.93953500e+06   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  2.97021000e+05   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]]


In [167]:
#Now we have a item_playlist_matrix! Nice. lets save. 

sparse_matrix = sps.csr_matrix(item_playlist_matrix)

#sps.save_npz("sparse_item_playlist", sparse_matrix)

print(sparse_matrix.shape)



(23619, 41757)


In [168]:
#If we multiply the matrix with its transposition, we will get an item similarity matrix. 


playlist_similarities = sparse_matrix.dot(sparse_matrix.transpose())
print(playlist_similarities.shape)


(23619, 23619)


In [131]:
#Here we can get the similarities between two playlists.   
similarity = np.asarray(playlist_similarities.getrow(100).todense())[0][100]

b

41.0

In [171]:
def playlist_similarity(playlist_id1, playlist_id2):
    similarity = np.asarray(playlist_similarities.getrow(playlist_indexes[playlist_id1]).todense())[0][playlist_indexes[playlist_id2]]
    return similarity

In [78]:
#Get all playlists which contain a certain track:
playlists = (item_playlist_matrix[item_playlist_matrix.T[1666][:]==1])

(14, 41756)


In [253]:
def recommend(target_playlist_id):
    playlist_index = playlist_indexes[target_playlist_id]

    tracks = popularity.sort_values(by='occurences', ascending=False)[:20]

    #remove index name
    tracks.reset_index(level = 0, inplace = True)
    #Rename the columns
    tracks.columns = ['relevance','track_id','occurences']
    track_counter = 0
    #For each song not in the playlist (we will start with top 100 popular)
    tracks = tracks.as_matrix()
    for track in tracks:
        sum = 0
        track_id = track[1]
        track_index = track_indexes[track_id]

        #Get all playlists with this track. playlist_ids are in the [0] column. 
        playlists_with_track = (item_playlist_matrix[item_playlist_matrix.T[track_index][:]==1])

        #for each playlist containing the song
        for playlist in playlists_with_track:
            playlist_id = playlist[0]
            sum += playlist_similarity(target_playlist_id, playlist_id)
        track[0] = sum/(100000000000000*track[2]*track[2]*track[2]) #track[2] is the number of playlists containing the song. 
        track_counter += 1


        #relevance = sum/num of playlists containing the song

    tracks.sort(axis=0)
    print(tracks)
    recommendations = tracks[:-5, 1]

    return recommendations
print(recommend(10024884))




[[      0  204966     304]
 [      0  209196     306]
 [      0  276186     306]
 [      0  675104     306]
 [      0 1074579     307]
 [      0 1156143     315]
 [      0 1286763     317]
 [      0 1321053     324]
 [      0 1363985     330]
 [      0 1495432     339]
 [      0 1563309     346]
 [      0 1580480     349]
 [      0 1595978     370]
 [      0 2339150     387]
 [      0 2863395     390]
 [      0 3166665     391]
 [      0 3628787     403]
 [      0 3705881     425]
 [      0 3779477     432]
 [      0 3796108     476]]
[ 204966  209196  276186  675104 1074579 1156143 1286763 1321053 1363985
 1495432 1563309 1580480 1595978 2339150 2863395]


In [244]:
###Callin Recommend function, filling it into a DataFrame. ###
###This part should not be changed ##

zeros = np.zeros((target_playlists.size, 6), dtype = int)

#Create empty dataframe
recommendations = pd.DataFrame(zeros)

#Rename the first col
recommendations.columns = ['playlist_id', 1, 2, 3, 4, 5]

#recommendations.iloc[:, 0] = target_playlists['playlist_id']


print(target_playlists[:2]['playlist_id'])

#Fill the recommendations matrix through calling the recommend-function
counter = 0; 
for playlist_id in target_playlists[:2]['playlist_id']:
    #Add the playlist ids as first col
    recommendations.iloc[counter, 0] = playlist_id
    print(playlist_id)
    #Fill the recommendations to col 1-5 for each playlist
    recommendations.iloc[counter, 1:6] = recommend(playlist_id)
    counter += 1

print(recommendations)


def save_to_file():
    #Saves the recommendations dataframe to the .csv-file. 
    np.savetxt("recommendations.csv",recommendations, fmt = '%s,%s %s %s %s %s', header = "playlist_id,track_ids", newline = "\n")
    
save_to_file()

0    10024884
1    10624787
Name: playlist_id, dtype: int64
10024884
10624787
      playlist_id       1       2       3       4       5
0        10024884  183353  196922  204966  206081  209196
1        10624787  183353  196922  204966  206081  209196
2               0       0       0       0       0       0
3               0       0       0       0       0       0
4               0       0       0       0       0       0
5               0       0       0       0       0       0
6               0       0       0       0       0       0
7               0       0       0       0       0       0
8               0       0       0       0       0       0
9               0       0       0       0       0       0
10              0       0       0       0       0       0
11              0       0       0       0       0       0
12              0       0       0       0       0       0
13              0       0       0       0       0       0
14              0       0       0       0       0   

In [None]:
def save_to_file():
    #Saves the recommendations dataframe to the .csv-file. 
    np.savetxt("recommendations.csv",recommendations, fmt = '%s,%s %s %s %s %s', header = "playlist_id,track_ids", newline = "\n")
    
    
def test():
    #Do something
    print("Result: ")
    pass


save_to_file()