# Libraries

In [1]:
import pandas as pd

# Load Data

In [2]:
games_requirement_data = pd.read_csv('../tables/games_requirement_data.csv', index_col=0)
benchmark_score_data = pd.read_csv('../tables/benchmark_score_data.csv', index_col=0)

# Filtering old games

Games released more than three years ago will not enter in the analysis

In [3]:
# Convert release date to date time
games_requirement_data['release_date'] = pd.to_datetime(games_requirement_data['release_date'], format='%Y. %B %d.')

# Filtering old games
games_requirement_data = games_requirement_data[games_requirement_data['release_date'].dt.year >= 2023]

# Extract only date from datetime object
games_requirement_data['release_date'] = games_requirement_data['release_date'].dt.date

# Handling Null Values

In [4]:
games_requirement_data.dropna(inplace=True)
benchmark_score_data.dropna(inplace=True)

# Handling Duplicates

In [5]:
benchmark_score_data.drop_duplicates(subset='gpu_name', keep='first', inplace=True)

# Join the market share column for Video Card and RAM

Market share data can be found in the files gpu_data.csv and ram_data.csv, both extracted from Steam Hardware Survey

## Load Tables

In [6]:
# Loading tables
gpu_data = pd.read_csv('../tables/gpu_data.csv', index_col=0)
ram_data = pd.read_csv('../tables/ram_data.csv', index_col=0)

In [7]:
# Convert market share to numeric
gpu_data['market_share'] = gpu_data['market_share'].str.rstrip('%').astype(float)/100
ram_data['market_share'] = ram_data['market_share'].str.rstrip('%').astype(float)/100

## Handling GPU Market Share

Since only NVIDIA was filtered, the market share is currently less than 100%. However, it should be adjusted to 100% to represent the total share of NVIDIA GPUs.

In [8]:
# Sum market share
print(f"Before:\n{round(gpu_data['market_share'].sum(), 3)}")

Before:
0.793


In [9]:
# Normalizing market share
gpu_data['market_share'] = gpu_data['market_share']/(gpu_data['market_share'].sum())

In [10]:
# Sum market share
print(f"After:\n{round(gpu_data['market_share'].sum(), 3)}")

After:
1.0


Now it's possible to join the columns

## Join Market Share

### GPU

In [11]:
# Minimum GPU required
games_requirement_data = pd.merge(games_requirement_data, gpu_data, how='left', left_on='gpu_minimum', right_on='gpu_name')
games_requirement_data.rename(columns={'market_share':'gpu_minimum_market_share'}, inplace=True)

# Drop column
games_requirement_data.drop('gpu_name', axis=1, inplace=True)

In [12]:
# Recommended GPU required
games_requirement_data = pd.merge(games_requirement_data, gpu_data, how='left', left_on='gpu_recommended', right_on='gpu_name')
games_requirement_data.rename(columns={'market_share':'gpu_recommended_market_share'}, inplace=True)

# Drop column
games_requirement_data.drop('gpu_name', axis=1, inplace=True)

### RAM

In [13]:
# Minimum RAM required
games_requirement_data = pd.merge(games_requirement_data, ram_data, how='left', left_on='ram_minimum', right_on='RAM')
games_requirement_data.rename(columns={'market_share':'ram_minimum_market_share'}, inplace=True)

# Drop column
games_requirement_data.drop('RAM', axis=1, inplace=True)

In [14]:
# Recommended RAM required
games_requirement_data = pd.merge(games_requirement_data, ram_data, how='left', left_on='ram_recommended', right_on='RAM')
games_requirement_data.rename(columns={'market_share':'ram_recommended_market_share'}, inplace=True)

# Drop column
games_requirement_data.drop('RAM', axis=1, inplace=True)

# Handling GPU Benchmark Score

## Join Benchmarck Score for GPU

In [15]:
# Minimum GPU required
games_requirement_data = pd.merge(games_requirement_data, benchmark_score_data, how='inner', left_on='gpu_minimum', right_on='gpu_name')
games_requirement_data.rename(columns={'benchmark_score':'gpu_minimum_benchmark_score'}, inplace=True)

# Drop column
games_requirement_data.drop('gpu_name', axis=1, inplace=True)

In [16]:
# Recommended GPU required
games_requirement_data = pd.merge(games_requirement_data, benchmark_score_data, how='inner', left_on='gpu_recommended', right_on='gpu_name')
games_requirement_data.rename(columns={'benchmark_score':'gpu_recommended_benchmark_score'}, inplace=True)

# Drop column
games_requirement_data.drop('gpu_name', axis=1, inplace=True)