In [None]:
%pip install bokeh

# Steam Data Analysis

## Introduction
This notebook is dedicated to analyzing data from the Steam platform, a popular digital distribution service for video games. The goal of this analysis is to uncover trends and insights about game popularity, user behavior, pricing, and other relevant metrics. By the end of this notebook, we aim to have a better understanding of the factors that influence game success on Steam.

## Data Loading
In this section, we load the dataset containing Steam data. We use the `pandas` library to read the data from a CSV file and perform initial inspections to understand its structure and contents.

## Data Cleaning
Data cleaning is a crucial step to ensure the quality of our analysis. Here, we handle missing values, correct data types, and perform any necessary transformations to prepare the data for analysis. This step ensures that our dataset is accurate and reliable.

## Exploratory Data Analysis (EDA)
Exploratory Data Analysis involves visualizing and summarizing the data to uncover patterns and insights. We use libraries like `matplotlib` and `seaborn` to create plots and charts that help us understand the distribution of variables, relationships between features, and other important aspects of the data.


In [None]:
import requests
import json
from itertools import islice

def get_top_played_games_steamspy(limit=10):
    url = 'https://steamspy.com/api.php?request=top100in2weeks'
    response = requests.get(url)
    data = response.json()
    top_games = dict(islice(data.items(), limit))
    return [game['appid'] for game in top_games.values()]

def get_game_data(appid):
    url = f'http://store.steampowered.com/api/appdetails?appids={appid}'
    response = requests.get(url)
    data = response.json()
    if data[str(appid)]['success']:
        return data[str(appid)]['data']
    else:
        return None

def print_game_info(game_data):
    if game_data:
        print(f"Title: {game_data.get('name', 'N/A')}")
        print(f"Release Date: {game_data.get('release_date', {}).get('date', 'N/A')}")
        if 'price_overview' in game_data:
            print(f"Price: {game_data['price_overview']['final_formatted']}")
        else:
            print("Price: Free")
        print(f"Metacritic Score: {game_data.get('metacritic', {}).get('score', 'N/A')}")
        print(f"Recommendations: {game_data.get('recommendations', {}).get('total', 'N/A')}")
        print(f"Developers: {', '.join(game_data.get('developers', []))}")
        print(f"Publishers: {', '.join(game_data.get('publishers', []))}")
    else:
        print("Game data not found.")

# Fetch the list of top played games from SteamSpy
top_played_games = get_top_played_games_steamspy(limit=200)
print(top_played_games)


In [3]:
import numpy as np

# Fetch and store game details for each most played game
game_data_list = []

for appid in top_played_games:
    game_data = get_game_data(appid)
    if game_data:
        game_data_list.append(game_data)

# Convert the list of game data to a numpy array
game_data_array = np.array(game_data_list)

In [None]:
import json

# Pretty print the first game's data
print_game_info(game_data_array[0])

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool

# Convert price to numeric value
def convert_price(price):
    try:
        if price == 'Free':
            return 0
        else:
            return float(price.replace('$', ''))
    except ValueError:
        return None
    
    # Convert score to float, or return None
    def convert_score(score):
        try:
            return float(score)
        except (ValueError, TypeError):
            return None

# Extract game info
def get_game_info(game_data):
    title = game_data.get('name', 'N/A')
    price = game_data.get('price_overview', {}).get('final_formatted', 'Free')
    score = game_data.get('metacritic', {}).get('score', None)
    recommendations = game_data.get('recommendations', {}).get('total', 'N/A')
    return title, convert_price(price), score, recommendations


# Prepare data for plotting
plot_data = {
    'Title': [],
    'Price': [],
    'Metacritic_Score': [],
    'Recommendations': []
}

for game in game_data_array:
    title, price, score, recommendations = get_game_info(game)
    if price is not None and score is not None:
        plot_data['Title'].append(title)
        plot_data['Price'].append(price)
        plot_data['Metacritic_Score'].append(score)
        plot_data['Recommendations'].append(recommendations)

# Create ColumnDataSource
source = ColumnDataSource(data=plot_data)

# Create the plot
p = figure(title="Price vs Metacritic Score", x_axis_label='Price (USD)', y_axis_label='Metacritic Score', tools="pan,wheel_zoom,box_zoom,reset,hover,save")

# Add circle glyphs
p.circle('Price', 'Metacritic_Score', size=10, source=source, fill_alpha=0.6)

# Add hover tool
hover = p.select_one(HoverTool)
hover.tooltips = [
    ("Title", "@Title"),
    ("Price", "@Price"),
    ("Metacritic Score", "@Metacritic_Score"),
    ("Recommendations", "@Recommendations")
]

# Show the plot in the notebook
output_notebook()
show(p)