In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as svm
%matplotlib inline
import pandas as pd
import csv
import numpy as np
from sklearn import preprocessing
import statistics
import sys

**Loading datsets for content based filtering which is based on category and quality of game**

In [2]:
ds_collab = pd.read_csv('steam-200k.csv')

In [3]:
ds_collab.head(10)

Unnamed: 0,user_id,name,status,hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0
2,151603712,Fallout 4,purchase,1.0
3,151603712,Fallout 4,play,87.0
4,151603712,Spore,purchase,1.0
5,151603712,Spore,play,14.9
6,151603712,Fallout New Vegas,purchase,1.0
7,151603712,Fallout New Vegas,play,12.1
8,151603712,Left 4 Dead 2,purchase,1.0
9,151603712,Left 4 Dead 2,play,8.9


In [4]:
ds_collab.size

800000

In [5]:
ds_collab['filter_hours'] = ds_collab['hours']

In [6]:
ds_collab.head()

Unnamed: 0,user_id,name,status,hours,filter_hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,1.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,273.0
2,151603712,Fallout 4,purchase,1.0,1.0
3,151603712,Fallout 4,play,87.0,87.0
4,151603712,Spore,purchase,1.0,1.0


**Data Pre-processing**

In [7]:
#considering actual hours played by user(not purchased)
for i in range(len(ds_collab)):
    if ds_collab['status'][i] == 'purchase':
        ds_collab['filter_hours'][i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds_collab['filter_hours'][i] = 0


In [8]:
ds_collab.head(5)

Unnamed: 0,user_id,name,status,hours,filter_hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,273.0
2,151603712,Fallout 4,purchase,1.0,0.0
3,151603712,Fallout 4,play,87.0,87.0
4,151603712,Spore,purchase,1.0,0.0


In [9]:
ds_collab = ds_collab[ds_collab.groupby('name').user_id.transform(len) >= 20]

In [10]:
ds_collab.head()

Unnamed: 0,user_id,name,status,hours,filter_hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,273.0
2,151603712,Fallout 4,purchase,1.0,0.0
3,151603712,Fallout 4,play,87.0,87.0
4,151603712,Spore,purchase,1.0,0.0


In [11]:
#setting new attribute called avergae_playtime for each game
playtime_mean_per_game = ds_collab.groupby(['name'],as_index=False).filter_hours.mean()
playtime_mean_per_game.rename(columns = {'filter_hours':'avg_hours'}, inplace = True)
playtime_mean_per_game

Unnamed: 0,name,avg_hours
0,12 Labours of Hercules II The Cretan Bull,1.715000
1,3DMark,6.688889
2,404Sight,0.176190
3,7 Days to Die,20.276119
4,8BitBoy,0.800000
...,...,...
1513,iBomber Defense Pacific,0.745833
1514,resident evil 4 / biohazard 4,2.528947
1515,sZone-Online,0.354375
1516,the static speaks my name,0.095238


In [13]:
for i in range(len(playtime_mean_per_game)):
    if playtime_mean_per_game['name'][i] == 'The Elder Scrolls V Skyrim':
        print(playtime_mean_per_game['name'][i],playtime_mean_per_game['avg_hours'][i])
        break

The Elder Scrolls V Skyrim 50.85315638450502


In [14]:
#assigning rate values for each game given by user based on playtime and if purchased.
preproccesed_ds = pd.merge(ds_collab,playtime_mean_per_game[['name','avg_hours']],on='name')
conditions = [
    (preproccesed_ds['filter_hours'] == 1),
    (preproccesed_ds['filter_hours']>=0.6*preproccesed_ds['avg_hours']),
    (preproccesed_ds['filter_hours']>=0.3*preproccesed_ds['avg_hours']) & (preproccesed_ds['filter_hours']<0.6*preproccesed_ds['avg_hours']),
    (preproccesed_ds['filter_hours']>=0.2*preproccesed_ds['avg_hours']) & (preproccesed_ds['filter_hours']<0.3*preproccesed_ds['avg_hours']),
    preproccesed_ds['filter_hours']>=0,
]
rating = [5,4,3,2,1]
preproccesed_ds['game_rating'] = -1
preproccesed_ds['game_rating'] = np.select(conditions,rating)
preproccesed_ds

Unnamed: 0,user_id,name,status,hours,filter_hours,avg_hours,game_rating
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0.0,50.853156,1
1,151603712,The Elder Scrolls V Skyrim,play,273.0,273.0,50.853156,4
2,59945701,The Elder Scrolls V Skyrim,purchase,1.0,0.0,50.853156,1
3,59945701,The Elder Scrolls V Skyrim,play,58.0,58.0,50.853156,4
4,53875128,The Elder Scrolls V Skyrim,purchase,1.0,0.0,50.853156,1
...,...,...,...,...,...,...,...
180301,61070572,Lethal League,play,2.5,2.5,1.030769,4
180302,24632218,Lethal League,purchase,1.0,0.0,1.030769,1
180303,24632218,Lethal League,play,0.2,0.2,1.030769,1
180304,17495098,Lethal League,purchase,1.0,0.0,1.030769,1


In [16]:
print("Users with same category for game - The Elder Scrolls V Skyrim")
for i in range(len(preproccesed_ds)):
    if preproccesed_ds['game_rating'][i] == 1 and preproccesed_ds['name'][i] == 'The Elder Scrolls V Skyrim':
        print(preproccesed_ds['user_id'][i])

Users with same category for game - The Elder Scrolls V Skyrim
151603712
59945701
53875128
92107940
250006052
11373749
54103616
56038151
94088853
116085629
148510973
9823354
100519466
100519466
180789959
180789959
168031436
41883322
170491009
190231757
65716118
98152188
162649407
128790593
130201800
71082079
25096601
42061089
227083521
227083521
60760816
72842694
20200395
4834220
65229865
65958466
77905942
198249024
176033341
68224834
68224834
34177747
34177747
44866715
142001340
93454114
210744685
104055167
117531196
117531196
64787956
64787956
131973876
131973876
44482198
167202599
159129006
156799348
68316900
102825821
102825821
147602462
52907921
12610800
78341587
298222970
298222970
96014467
215160630
68049243
48028873
131940023
136495026
24366790
89732768
87907200
110489310
107377573
115037563
115037563
70970228
306690580
191747590
56333620
92393218
85759872
85759872
154123088
154123088
72488020
72488020
205343727
106147454
217709850
57974873
100478352
233277671
71603645
71603645