### Using the Steam Spy API, will get number of positive and negative comments and Peak Players for the Previous Day 
https://steamspy.com/api.php

### All time Peak from https://steamplayercount.com

In [35]:
import pandas as pd
import requests
import json
import time
from bs4 import BeautifulSoup
import numpy as np

In [2]:
df = pd.read_excel('Data/steam_data_cleaned.xlsx')

In [3]:
# Column with the IDs
df['Steam_AppID']

0             10
1             20
2             30
3             40
4             50
          ...   
11273    2472270
11274    2472500
11275    2472840
11276    2473150
11277    2473690
Name: Steam_AppID, Length: 11278, dtype: int64

#### Will get the total Reviews and Peak Players of the day before

In [4]:
# Define function to construct URLs for each App_ID
def get_app_details(app_id):
    url = f"https://steamspy.com/api.php?request=appdetails&appid={app_id}"
    response = requests.get(url)
    data = response.json()
    return data

# Define function to get number of Negative and Posiive comments and the Peak Players yesterday
def get_values(row):
    # Get the App_ID from the dataframe
    app_id = row['Steam_AppID']
    
    # A 1sec delay between requests
    time.sleep(1)
    
    # Get the app details from SteamSpy API for the respective app ID
    app_data = get_app_details(app_id)
    
    # Extract the values from the API
    positive_reviews = app_data.get('positive', None)
    negative_reviews = app_data.get('negative', None)
    Peak_Players_Yesterday = app_data.get('ccu', None) #this will get the peak players on 18/06/2023 (Sunday)
    
    # Return a new pandas Series with the values
    return pd.Series({'Positive_Reviews': positive_reviews, 'Negative_Reviews': negative_reviews, 'Peak_Players_Yesterday': Peak_Players_Yesterday})

# Apply the 'get_values' function to each row of the dataframe
df[['Positive_Reviews', 'Negative_Reviews', 'Peak_Players_Yesterday']] = df.apply(get_values, axis=1)

In [6]:
# New column for total Reviews
df['Total_reviews'] = df['Positive_Reviews'] + df['Negative_Reviews']

In [9]:
df.head()

Unnamed: 0,Game,Steam_AppID,Minimum_age,Free,About_the_Game,Developers,Publishers,Windows_Support,Mac_Support,Linux_Support,...,DRM_Protection,Discounted_Price,Original_Price,categories_Description,genres_Description,Is_Subscription,Positive_Reviews,Negative_Reviews,Peak_Players_Yesterday,Total_reviews
0,Counter-Strike,10,0,0,Play the world's number 1 online action game. ...,Valve,Valve,1,1,1,...,0,8.19,8.19,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,214780,5476,11739,220256
1,Team Fortress Classic,20,0,0,One of the most popular online action games of...,Valve,Valve,1,1,1,...,0,3.99,3.99,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,6362,980,72,7342
2,Day of Defeat,30,0,0,Enlist in an intense brand of Axis vs. Allied ...,Valve,Valve,1,1,1,...,0,3.99,3.99,"Multi-player, Valve Anti-Cheat enabled",Action,0,5629,621,94,6250
3,Deathmatch Classic,40,0,0,Enjoy fast-paced multiplayer gaming with Death...,Valve,Valve,1,1,1,...,0,3.99,3.99,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,2179,482,6,2661
4,Half-Life: Opposing Force,50,0,0,Return to the Black Mesa Research Facility as ...,Gearbox Software,Valve,1,1,1,...,0,3.99,3.99,"Single-player, Multi-player, Valve Anti-Cheat ...",Action,0,17044,864,128,17908


#### Will now get the Peak Players of all time

In [16]:
# Create a new column in the DataFrame to store the values
df['Peak_Players_all_Time'] = None

for index, row in df.iterrows():
    app_id = row['Steam_AppID']
    url = f'https://steamplayercount.com/app/{app_id}'

    # Send a GET request to the URL
    response = requests.get(url)

    # Create a BeautifulSoup object with the response text
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all table elements on the page
    tables = soup.find_all('table')

    # Check if at least one table was found
    if len(tables) > 0:
        # Get the first table
        first_table = tables[0]

        # Find all <span> elements with class "big-text" within the first table
        spans = first_table.find_all('span', class_='big-text')

        # Check if at least three spans were found
        if len(spans) >= 3:
            # Get the third <span> element this cointains the Peak Players
            third_span = spans[2]

            # Extract the text from the <span> element
            text = third_span.get_text()

            # Assign the value to the 'Peak_Players_all_Time' column
            df.at[index, 'Peak_Players_all_Time'] = text
        else:
            print(f"Manually Check App_ID {app_id}.")
    else:
        print(f"Manually Check App_ID {app_id}.")

Manually Check App_ID 12220.
Manually Check App_ID 255163.
Manually Check App_ID 256576.
Manually Check App_ID 256611.
Manually Check App_ID 345950.
Manually Check App_ID 360970.
Manually Check App_ID 400070.
Manually Check App_ID 429380.
Manually Check App_ID 544690.
Manually Check App_ID 620620.
Manually Check App_ID 636640.
Manually Check App_ID 643370.
Manually Check App_ID 668020.
Manually Check App_ID 668580.
Manually Check App_ID 687800.
Manually Check App_ID 717870.
Manually Check App_ID 733110.
Manually Check App_ID 743680.
Manually Check App_ID 756300.
Manually Check App_ID 756800.
Manually Check App_ID 772710.
Manually Check App_ID 772980.
Manually Check App_ID 781200.
Manually Check App_ID 789150.
Manually Check App_ID 789450.
Manually Check App_ID 790030.
Manually Check App_ID 790430.
Manually Check App_ID 821840.
Manually Check App_ID 823230.
Manually Check App_ID 843390.
Manually Check App_ID 871160.
Manually Check App_ID 871410.
Manually Check App_ID 871540.
Manually Ch

In [17]:
df

Unnamed: 0,Game,Steam_AppID,Minimum_age,Free,About_the_Game,Developers,Publishers,Windows_Support,Mac_Support,Linux_Support,...,Original_Price,categories_Description,genres_Description,Is_Subscription,Positive_Reviews,Negative_Reviews,Peak_Players_Yesterday,Total_reviews,Peak_Players_All_Time,Peak_Players_all_Time
0,Counter-Strike,10,0,0,Play the world's number 1 online action game. ...,Valve,Valve,1,1,1,...,8.19,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,214780,5476,11739,220256,,319586
1,Team Fortress Classic,20,0,0,One of the most popular online action games of...,Valve,Valve,1,1,1,...,3.99,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,6362,980,72,7342,,1897
2,Day of Defeat,30,0,0,Enlist in an intense brand of Axis vs. Allied ...,Valve,Valve,1,1,1,...,3.99,"Multi-player, Valve Anti-Cheat enabled",Action,0,5629,621,94,6250,,7758
3,Deathmatch Classic,40,0,0,Enjoy fast-paced multiplayer gaming with Death...,Valve,Valve,1,1,1,...,3.99,"Multi-player, PvP, Online PvP, Shared/Split Sc...",Action,0,2179,482,6,2661,,628
4,Half-Life: Opposing Force,50,0,0,Return to the Black Mesa Research Facility as ...,Gearbox Software,Valve,1,1,1,...,3.99,"Single-player, Multi-player, Valve Anti-Cheat ...",Action,0,17044,864,128,17908,,619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11273,GoldenMiner,2472270,0,0,"This is a puzzle game. In this game, gold coin...",GoldenMiner,GoldenMiner,1,0,0,...,0.00,Single-player,"Casual, Simulation",0,0,0,0,0,,
11274,Dose Response Playtest,2472500,0,0,,,,1,0,0,...,0.00,,,0,0,0,0,0,,
11275,Ducks Can Drive,2472840,0,1,"Ducks like to race, even in their own cars! Ea...",Joseph Cook,Joseph Cook,1,0,1,...,0.00,"Single-player, Multi-player, PvP, Online PvP, ...",Racing,0,0,0,0,0,,
11276,Cards We're Dealt: Prologue,2473150,0,1,"<h2 class=""bb_tag"">Cards We're Dealt: Prologue...",Cole Chittim,Cole Chittim,1,1,1,...,0.00,"Single-player, Full controller support, Steam ...","Action, Indie",0,0,0,0,0,,


In [29]:
# Remove commas
df['Peak_Players_all_Time'] = df['Peak_Players_all_Time'].str.replace(',', '')

In [38]:
# Convert 'None' to NaN (missing value)
df['Peak_Players_all_Time'] = df['Peak_Players_all_Time'].replace('None', np.nan)

# Convert non-finite values to NaN
df['Peak_Players_all_Time'] = df['Peak_Players_all_Time'].replace([np.inf, -np.inf], np.nan)

# Convert the column to int
df['Peak_Players_all_Time'] = df['Peak_Players_all_Time'].astype(float).astype('Int64')

## All data gathered

In [39]:
df.to_excel('Data/steam_data_final.xlsx')