In [1]:
import pandas as pd
from ast import literal_eval
import itertools
import numpy as np
import warnings
import gdown
warnings.filterwarnings('ignore')

In [4]:
url = "https://drive.google.com/drive/folders/1Vwii9GZ_OdBnXshmhMZuWutgp4HWfgli?usp=sharing"
gdown.download_folder(url, quiet=True, use_cookies=False)

['/mnt/e/IIITB/Semester 6/rs/Project/Gamify/Generated Data/app_list.csv',
 '/mnt/e/IIITB/Semester 6/rs/Project/Gamify/Generated Data/steam_app_data.csv',
 '/mnt/e/IIITB/Semester 6/rs/Project/Gamify/Generated Data/steamspy_data.csv']

In [5]:
app = pd.read_csv("Generated Data/app_list.csv")
steam_data = pd.read_csv("Generated Data/steam_app_data.csv") 
steam_spy = pd.read_csv("Generated Data/steamspy_data.csv")

In [6]:
steam_data.head()

Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,detailed_description,about_the_game,short_description,...,categories,genres,screenshots,movies,recommendations,achievements,release_date,support_info,background,content_descriptors
0,game,Counter-Strike,10,0.0,False,,,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://steamcdn...",,{'total': 65735},{'total': 0},"{'coming_soon': False, 'date': '1 Nov, 2000'}","{'url': 'http://steamcommunity.com/app/10', 'e...",https://steamcdn-a.akamaihd.net/steam/apps/10/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
1,game,Team Fortress Classic,20,0.0,False,,,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://steamcdn...",,{'total': 2802},{'total': 0},"{'coming_soon': False, 'date': '1 Apr, 1999'}","{'url': '', 'email': ''}",https://steamcdn-a.akamaihd.net/steam/apps/20/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
2,game,Day of Defeat,30,0.0,False,,,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://steamcdn...",,{'total': 1992},{'total': 0},"{'coming_soon': False, 'date': '1 May, 2003'}","{'url': '', 'email': ''}",https://steamcdn-a.akamaihd.net/steam/apps/30/...,"{'ids': [], 'notes': None}"
3,game,Deathmatch Classic,40,0.0,False,,,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://steamcdn...",,{'total': 931},{'total': 0},"{'coming_soon': False, 'date': '1 Jun, 2001'}","{'url': '', 'email': ''}",https://steamcdn-a.akamaihd.net/steam/apps/40/...,"{'ids': [], 'notes': None}"
4,game,Half-Life: Opposing Force,50,0.0,False,,,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,...,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://steamcdn...",,{'total': 4355},{'total': 0},"{'coming_soon': False, 'date': '1 Nov, 1999'}","{'url': 'https://help.steampowered.com', 'emai...",https://steamcdn-a.akamaihd.net/steam/apps/50/...,"{'ids': [], 'notes': None}"


In [7]:
steam_data.drop(['type', 'controller_support', 'screenshots', 'achievements', "support_info", "dlc", 'fullgame', 'legal_notice', 'drm_notice', 'ext_user_account_notice', 'demos','price_overview','packages','package_groups', 'header_image', 'website', 'metacritic', 'reviews', 'background', 'detailed_description', 'publishers', 'background', 'movies', 'recommendations', 'supported_languages', 'pc_requirements', 'mac_requirements', 'linux_requirements'], axis=1, inplace=True)


In [8]:
steam_data = steam_data[steam_data['is_free'].notna()]
steam_data = steam_data[steam_data['required_age'].notna()]

In [9]:
def process_tags(df, export=False):
    if export: 

        tag_data = df[['appid', 'tags']].copy()

        def parse_export_tags(x):
            x = literal_eval(x)

            if isinstance(x, dict):
                return x
            elif isinstance(x, list):
                return {}
            else:
                raise TypeError('Something other than dict or list found')

        tag_data['tags'] = tag_data['tags'].apply(parse_export_tags)

        cols = set(itertools.chain(*tag_data['tags']))

        for col in sorted(cols):
            col_name = col.lower().replace(' ', '_').replace('-', '_').replace("'", "")

            tag_data[col_name] = tag_data['tags'].apply(lambda x: x[col] if col in x.keys() else 0)

        tag_data = tag_data.drop('tags', axis=1)

    def parse_tags(x):
        x = literal_eval(x)

        return ';'.join(list(x.keys())[:3]) if isinstance(x, dict) else np.nan

    df['tags'] = df['tags'].apply(parse_tags)

    # rows with null tags seem to be superseded by newer release, so remove (e.g. dead island)
    df = df[df['tags'].notnull()]

    return df


def process(df):
    df = df.copy()
    
    # handle missing values
    df = df[(df['name'].notnull()) & (df['name'] != 'none')]
    df = df[df['developer'].notnull()]
    df = df[df['languages'].notnull()]
    df = df[df['price'].notnull()]
    
    # remove unwanted columns
    df = df.drop([
        'genre', 'developer', 'publisher', 'score_rank', 'userscore', 'average_2weeks',
        'median_2weeks', 'price', 'initialprice', 'discount', 'ccu'
    ], axis=1)
    
    # keep top tags, exporting full tag data to file
    df = process_tags(df, export=True)
    
    # reformat owners column
    df['owners'] = df['owners'].str.replace(',', '').str.replace(' .. ', '-')
    
    return df

steam_spy = process(steam_spy)

In [10]:
steam_data=steam_data.dropna().reset_index(drop=True)

In [11]:
steam_spy=steam_spy.dropna().reset_index(drop=True)

In [12]:
merged = steam_data.merge(steam_spy, left_on='steam_appid', right_on='appid', suffixes=('', '_steamspy'))

In [13]:
merged.drop(['appid','name_steamspy', 'categories', 'genres', 'content_descriptors'], axis=1, inplace=True)

In [14]:
merged.to_csv('Cleaned Data/clean_data.csv', index=False)