In [28]:
import sagemaker
import boto3
import pandas as pd
from io import StringIO
import ast

In [3]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

### Get library for every user

Set the user_id and gameid for all users

In [14]:
def load_data(file_key):    
    bucket_name = 'ads-508-final'
    file_key = f"playstation/{file_key}/{file_key}.csv"
    
    response = s3.get_object(Bucket=bucket_name, Key=file_key)
    
    csv_content = response['Body'].read().decode('utf-8')
    
    csv_file = StringIO(csv_content)
    
    df = pd.read_csv(csv_file)

    return df

In [20]:
df_purch = load_data("purchased_games")

In [38]:
def conv_str_exp(df, col):
    """Convert columns type if needed then explode"""
    
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df_exploded = df.explode(col, ignore_index=True)
    return df_exploded

In [46]:
df_player = conv_str_exp(df_purch, 'library')

In [40]:
df.head()

Unnamed: 0,playerid,library
0,268071,14972
1,268071,417905
2,268071,14693
3,268071,7742
4,268071,20162


### Add game meta data
Add additional metadata about the game for training

In [41]:
df_games = load_data("games")

In [42]:
df_games.head()

Unnamed: 0,gameid,title,platform,developers,publishers,genres,supported_languages,release_date
0,749375,Medieval Match Master,PS4,['Erik Games'],['Erik Games'],['Puzzle'],,2024-11-22
1,749839,Sunforge Solar Survival,PS4,['GrizzlyGames'],['GrizzlyGames'],,,2024-11-21
2,749347,Find Love Or Die Trying,PS5,['Sunseeker Games'],['Ratalaika Games'],['Visual Novel'],,2024-11-22
3,749341,Medieval Match Master,PS4,['Erik Games'],['Erik Games'],['Puzzle'],,2024-11-22
4,749097,Bust A Block 2,PS5,['ThiGames'],['ThiGames'],['Puzzle'],,2024-11-21


In [43]:
df_games = df_games.drop(columns = ['release_date', 'title', 'publishers'])

In [48]:
df_games_meta = pd.merge(df_player, df_games, left_on='library', right_on='gameid', how='left')

### Derive interaction characteristics from tophy data