In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as svm
%matplotlib inline
import pandas as pd
import csv
import numpy as np
from sklearn import preprocessing
import statistics
import sys

**Loading datsets for content based filtering which is based on category and quality of game**

In [31]:
ds_collab = pd.read_csv('steam-200k.csv')

In [32]:
ds_collab.head(10)

Unnamed: 0,user_id,name,status,hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0
2,151603712,Fallout 4,purchase,1.0
3,151603712,Fallout 4,play,87.0
4,151603712,Spore,purchase,1.0
5,151603712,Spore,play,14.9
6,151603712,Fallout New Vegas,purchase,1.0
7,151603712,Fallout New Vegas,play,12.1
8,151603712,Left 4 Dead 2,purchase,1.0
9,151603712,Left 4 Dead 2,play,8.9


In [33]:
ds_collab.size

800000

In [34]:
ds_collab = ds_collab[ds_collab['status'] == 'play'].copy()

In [35]:
ds_collab.head()

Unnamed: 0,user_id,name,status,hours
1,151603712,The Elder Scrolls V Skyrim,play,273.0
3,151603712,Fallout 4,play,87.0
5,151603712,Spore,play,14.9
7,151603712,Fallout New Vegas,play,12.1
9,151603712,Left 4 Dead 2,play,8.9


**Data Pre-processing**

In [37]:
ds_collab = ds_collab[ds_collab.groupby('name').user_id.transform(len) >= 20]

In [38]:
ds_collab.head()

Unnamed: 0,user_id,name,status,hours
1,151603712,The Elder Scrolls V Skyrim,play,273.0
3,151603712,Fallout 4,play,87.0
5,151603712,Spore,play,14.9
7,151603712,Fallout New Vegas,play,12.1
9,151603712,Left 4 Dead 2,play,8.9


In [42]:
#setting new attribute called avergae_playtime for each game
playtime_mean_per_game = ds_collab.groupby(['name'],as_index=False).hours.mean()
playtime_mean_per_game.rename(columns = {'hours':'avg_hours'}, inplace = True)
playtime_mean_per_game

Unnamed: 0,name,avg_hours
0,7 Days to Die,41.586735
1,8BitMMO,0.455556
2,A.V.A - Alliance of Valiant Arms,4.040741
3,APB Reloaded,66.936232
4,ARK Survival Evolved,86.032278
...,...,...
634,Yet Another Zombie Defense,1.675000
635,Zombie Panic Source,28.380233
636,Zombies Monsters Robots,5.917857
637,sZone-Online,0.977586


In [43]:
for i in range(len(playtime_mean_per_game)):
    if playtime_mean_per_game['name'][i] == 'The Elder Scrolls V Skyrim':
        print(playtime_mean_per_game['name'][i],playtime_mean_per_game['avg_hours'][i])
        break

The Elder Scrolls V Skyrim 104.71093057607091


In [45]:
#assigning rate values for each game given by user based on playtime and if purchased.
preproccesed_ds = pd.merge(ds_collab,playtime_mean_per_game[['name','avg_hours']],on='name')
conditions = [
    (preproccesed_ds['hours'] == max(preproccesed_ds['hours'] )),
    (preproccesed_ds['hours']>=0.6*preproccesed_ds['avg_hours']),
    (preproccesed_ds['hours']>=0.3*preproccesed_ds['avg_hours']) & (preproccesed_ds['hours']<0.6*preproccesed_ds['avg_hours']),
    (preproccesed_ds['hours']>=0.2*preproccesed_ds['avg_hours']) & (preproccesed_ds['hours']<0.3*preproccesed_ds['avg_hours']),
    preproccesed_ds['hours']>=0,
]
rating = [5,4,3,2,1]
preproccesed_ds['game_rating'] = -1
preproccesed_ds['game_rating'] = np.select(conditions,rating)
preproccesed_ds

Unnamed: 0,user_id,name,status,hours,avg_hours,game_rating
0,151603712,The Elder Scrolls V Skyrim,play,273.0,104.710931,4
1,59945701,The Elder Scrolls V Skyrim,play,58.0,104.710931,3
2,92107940,The Elder Scrolls V Skyrim,play,110.0,104.710931,4
3,250006052,The Elder Scrolls V Skyrim,play,465.0,104.710931,4
4,11373749,The Elder Scrolls V Skyrim,play,220.0,104.710931,4
...,...,...,...,...,...,...
57604,38317154,"Warhammer 40,000 Dawn of War Soulstorm",play,5.5,12.528000,3
57605,129478138,"Warhammer 40,000 Dawn of War Soulstorm",play,1.9,12.528000,1
57606,36404933,"Warhammer 40,000 Dawn of War Soulstorm",play,5.8,12.528000,3
57607,87201181,"Warhammer 40,000 Dawn of War Soulstorm",play,24.0,12.528000,4


In [51]:
print("Users with same category for game - The Elder Scrolls V Skyrim")
for i in range(len(preproccesed_ds)):
    if preproccesed_ds['game_rating'][i] == 3 and preproccesed_ds['name'][i] == 'The Elder Scrolls V Skyrim':
        print(preproccesed_ds['user_id'][i])

Users with same category for game - The Elder Scrolls V Skyrim
59945701
54103616
170491009
128790593
71082079
4834220
65229865
142001340
44482198
12610800
215160630
136495026
87907200
107377573
70970228
191747590
100478352
141873792
92322563
158476996
53898495
45974860
11403772
119170251
149790632
122679931
136866564
115958331
124878949
2083767
45120292
151234988
122798021
209626174
278678644
92663477
62633395
108007732
145320311
54568397
155718820
118695833
975449
66650717
15221799
93326304
91859894
132418423
22301321
59205262
94110492
71527252
170172944
302606492
65064340
38465050
155420511
133404545
130312302
160299889
97571329
181304845
127335438
79073449
59382844
68719896
240497102
95369808
99110207
115487966
92010366
132748085
49228258
201023624
98848653
112547463
197652539
149810726
59662830
111163130
92503355
112626210
116876958
61772065
65145468
16645459
110369840
224070533
134322141
172518437
90104676
277239386
81170946
139016142
58905429
43160799
144854157
77214425
85458882


In [53]:
for i in range(len(preproccesed_ds)):
    if preproccesed_ds['name'][i] == 'The Elder Scrolls V Skyrim':
        print(str(preproccesed_ds['user_id'][i]) + ' ' + str(preproccesed_ds['hours'][i]) + ' ' + str(preproccesed_ds['game_rating'][i]))

151603712 273.0 4
59945701 58.0 3
92107940 110.0 4
250006052 465.0 4
11373749 220.0 4
54103616 35.0 3
56038151 14.6 1
94088853 320.0 4
116085629 80.0 4
148510973 73.0 4
9823354 143.0 4
100519466 3.5 1
180789959 5.0 1
168031436 14.1 1
41883322 107.0 4
170491009 48.0 3
190231757 18.0 1
65716118 22.0 2
98152188 643.0 4
162649407 403.0 4
128790593 46.0 3
130201800 313.0 4
71082079 33.0 3
25096601 82.0 4
42061089 13.0 1
227083521 0.5 1
60760816 110.0 4
72842694 142.0 4
20200395 118.0 4
4834220 58.0 3
65229865 57.0 3
65958466 70.0 4
77905942 95.0 4
198249024 341.0 4
68224834 4.8 1
34177747 5.8 1
44866715 12.6 1
142001340 37.0 3
104055167 234.0 4
117531196 6.1 1
64787956 6.3 1
131973876 2.8 1
44482198 33.0 3
167202599 105.0 4
159129006 16.2 1
156799348 22.0 2
68316900 106.0 4
102825821 2.0 1
147602462 104.0 4
52907921 23.0 2
12610800 61.0 3
78341587 21.0 2
298222970 1.7 1
96014467 242.0 4
215160630 45.0 3
131940023 83.0 4
136495026 46.0 3
24366790 569.0 4
89732768 69.0 4
87907200 53.0 3
11048