Installing libraries

In [61]:
import pandas as pd
import scipy.sparse as sparse
import pickle
import json
from implicit.als import AlternatingLeastSquares
import ast

Loading data

In [62]:
rec=pd.read_csv("./dataset/recommendations.csv")
games=pd.read_csv("./dataset/games.csv")
users=pd.read_csv("./dataset/users.csv")

In [63]:
rec

Unnamed: 0,app_id,helpful,funny,date,is_recommended,hours,user_id,review_id
0,975370,0,0,2022-12-12,True,36.3,51580,0
1,304390,4,0,2017-02-17,False,11.5,2586,1
2,1085660,2,0,2019-11-17,True,336.5,253880,2
3,703080,0,0,2022-09-23,True,27.4,259432,3
4,526870,0,0,2021-01-10,True,7.9,23869,4
...,...,...,...,...,...,...,...,...
41154789,633230,0,0,2021-02-15,True,41.0,1606890,41154789
41154790,758870,8,0,2019-07-18,False,8.0,1786254,41154790
41154791,696170,3,10,2018-03-26,False,2.0,6370324,41154791
41154792,696170,0,0,2018-06-11,True,4.0,1044289,41154792


In [64]:
games

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,11.99,0.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50867,2296380,I Expect You To Die 3: Cog in the Machine,2023-09-28,True,False,False,Very Positive,96,101,22.00,0.00,0.0,True
50868,1272080,PAYDAY 3,2023-09-21,True,False,False,Mostly Negative,38,29458,40.00,0.00,0.0,True
50869,1402110,Eternights,2023-09-11,True,False,False,Very Positive,89,1128,30.00,0.00,0.0,True
50870,2272250,Forgive Me Father 2,2023-10-19,True,False,False,Very Positive,95,82,17.00,0.00,0.0,True


In [65]:
users

Unnamed: 0,user_id,products,reviews
0,7360263,359,0
1,14020781,156,1
2,8762579,329,4
3,4820647,176,4
4,5167327,98,2
...,...,...,...
14306059,5047430,6,0
14306060,5048153,0,0
14306061,5059205,31,0
14306062,5074363,0,0


In [66]:
rec.dtypes

app_id              int64
helpful             int64
funny               int64
date               object
is_recommended       bool
hours             float64
user_id             int64
review_id           int64
dtype: object

In [67]:
games.dtypes

app_id              int64
title              object
date_release       object
win                  bool
mac                  bool
linux                bool
rating             object
positive_ratio      int64
user_reviews        int64
price_final       float64
price_original    float64
discount          float64
steam_deck           bool
dtype: object

In [68]:
users.dtypes

user_id     int64
products    int64
reviews     int64
dtype: object

In [69]:
rec = rec[rec['hours'] > 0].copy()

Encoding

In [70]:
user2id = {u: i for i, u in enumerate(rec['user_id'].unique())}
game2id = {g: i for i, g in enumerate(rec['app_id'].unique())}
id2game = {i: g for g, i in game2id.items()}

rec['uid'] = rec['user_id'].map(user2id)
rec['gid'] = rec['app_id'].map(game2id)

Sparse Matrix

In [71]:
item_user_matrix = sparse.coo_matrix(
    (rec['hours'].astype(float), (rec['gid'], rec['uid']))
).tocsr()

In [72]:
item_user_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 40978444 stored elements and shape (37528, 13760373)>

Training model

In [73]:
model = AlternatingLeastSquares(
    factors=50,
    regularization=0.01,
    iterations=20,
    use_gpu=False
)
model.fit(item_user_matrix)

  0%|          | 0/20 [00:00<?, ?it/s]

Merge metadata

In [74]:
with open("./dataset/games_metadata.json", "r", encoding="utf-8") as f:
    metadata_raw = [json.loads(line) for line in f]

metadata_df = pd.DataFrame(metadata_raw)
metadata_df = metadata_df[['app_id', 'description', 'tags']]

# ----------------------------
# 7. Merge with games.csv for platform/price info
# ----------------------------
games_df = games[['app_id', 'title', 'date_release', 'price_final', 'steam_deck',
                     'rating', 'win', 'mac', 'linux']]

merged_meta = pd.merge(games_df, metadata_df, on="app_id", how="left")

# Fill missing values
merged_meta['description'] = merged_meta['description'].fillna("No description available")
merged_meta['tags'] = merged_meta['tags'].apply(lambda x: ', '.join(x) if isinstance(x, list) else "No tags")

# ----------------------------
# 8. Create Metadata Lookup by app_id
# ----------------------------
metadata_lookup = merged_meta.set_index('app_id')[[
    'title', 'description', 'tags', 'price_final', 'steam_deck',
    'rating', 'date_release', 'win', 'mac', 'linux'
]].to_dict(orient='index')

saving model

In [75]:
output = {
    'model': model,
    'user2id': user2id,
    'game2id': game2id,
    'id2game': id2game,
    'metadata': metadata_lookup
}

with open("../model/steam_als_model.pkl", "wb") as f:
    pickle.dump(output, f)

print("✅ Model trained and saved with full metadata and platform support.")

✅ Model trained and saved with full metadata and platform support.
