In [1]:
import pandas as pd
import rename_columns
from  clean_string_content import clean_strings
import duplicate
import matplotlib.pyplot as plt
import seaborn as sns
from box_plot import box_plot
import functions_Ako
import yaml

In [2]:
try:
    with open("../config.yaml", "r") as file:
        config = yaml.safe_load(file)
except:
    print("Configuration file not found")

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
df = pd.read_csv(config['input_data']['file'])
df

Unnamed: 0,Game Title,Genre,Platform,Release Year,Developer,Revenue (Millions $),Players (Millions),Peak Concurrent Players,Metacritic Score,Esports Popularity,Trending Status
0,Pixel Odyssey 2,Action,PlayStation,2024,Square Enix,1847.58,44.96,14.07,87,Yes,Declining
1,Battle Warriors 4,Strategy,Cross-Platform,2024,EA,4835.79,115.65,56.27,92,Yes,Stable
2,Legends of Valor 2,Shooter,Xbox,2024,Square Enix,2628.55,169.67,74.74,99,No,Declining
3,Shadow Realms 4,Action,Nintendo Switch,2024,Microsoft,1695.50,184.40,20.15,73,Yes,Stable
4,Cyber Quest 2,Adventure,Xbox,2024,Microsoft,4529.75,17.28,5.83,91,Yes,Stable
...,...,...,...,...,...,...,...,...,...,...,...
995,Warzone Chronicles 1,Horror,Nintendo Switch,2000,Ubisoft,2654.16,174.49,22.38,53,Yes,Stable
996,Zombie Outbreak 3,Fighting,Cross-Platform,2000,Ubisoft,2170.46,155.20,74.51,57,Yes,Declining
997,Battle Warriors 5,Fighting,PC,2000,Rockstar,2040.85,166.01,50.85,82,No,Rising
998,Cyber Quest 2,Adventure,Cross-Platform,2000,Activision,2657.88,112.92,25.93,66,No,Rising


In [5]:
# # Checking for Null Values

df.isnull()

Unnamed: 0,Game Title,Genre,Platform,Release Year,Developer,Revenue (Millions $),Players (Millions),Peak Concurrent Players,Metacritic Score,Esports Popularity,Trending Status
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
995,False,False,False,False,False,False,False,False,False,False,False
996,False,False,False,False,False,False,False,False,False,False,False
997,False,False,False,False,False,False,False,False,False,False,False
998,False,False,False,False,False,False,False,False,False,False,False


In [6]:
# # Check for null values in each column
df.isna().any()

Game Title                 False
Genre                      False
Platform                   False
Release Year               False
Developer                  False
Revenue (Millions $)       False
Players (Millions)         False
Peak Concurrent Players    False
Metacritic Score           False
Esports Popularity         False
Trending Status            False
dtype: bool

In [7]:
# # Count the number of null values in each column
df.isna().sum()

Game Title                 0
Genre                      0
Platform                   0
Release Year               0
Developer                  0
Revenue (Millions $)       0
Players (Millions)         0
Peak Concurrent Players    0
Metacritic Score           0
Esports Popularity         0
Trending Status            0
dtype: int64

In [8]:
# Drop column 'Peak Concurrent Players'
df.drop('Peak Concurrent Players', axis=1, inplace=True)
df.head()

Unnamed: 0,Game Title,Genre,Platform,Release Year,Developer,Revenue (Millions $),Players (Millions),Metacritic Score,Esports Popularity,Trending Status
0,Pixel Odyssey 2,Action,PlayStation,2024,Square Enix,1847.58,44.96,87,Yes,Declining
1,Battle Warriors 4,Strategy,Cross-Platform,2024,EA,4835.79,115.65,92,Yes,Stable
2,Legends of Valor 2,Shooter,Xbox,2024,Square Enix,2628.55,169.67,99,No,Declining
3,Shadow Realms 4,Action,Nintendo Switch,2024,Microsoft,1695.5,184.4,73,Yes,Stable
4,Cyber Quest 2,Adventure,Xbox,2024,Microsoft,4529.75,17.28,91,Yes,Stable


In [9]:
# Drop column 'Esports Popularity'
df.drop('Esports Popularity', axis=1, inplace=True)
df.head()

Unnamed: 0,Game Title,Genre,Platform,Release Year,Developer,Revenue (Millions $),Players (Millions),Metacritic Score,Trending Status
0,Pixel Odyssey 2,Action,PlayStation,2024,Square Enix,1847.58,44.96,87,Declining
1,Battle Warriors 4,Strategy,Cross-Platform,2024,EA,4835.79,115.65,92,Stable
2,Legends of Valor 2,Shooter,Xbox,2024,Square Enix,2628.55,169.67,99,Declining
3,Shadow Realms 4,Action,Nintendo Switch,2024,Microsoft,1695.5,184.4,73,Stable
4,Cyber Quest 2,Adventure,Xbox,2024,Microsoft,4529.75,17.28,91,Stable


In [10]:
# # Drop column 'Trending Status'
df.drop('Trending Status', axis=1, inplace=True)
df.head()

Unnamed: 0,Game Title,Genre,Platform,Release Year,Developer,Revenue (Millions $),Players (Millions),Metacritic Score
0,Pixel Odyssey 2,Action,PlayStation,2024,Square Enix,1847.58,44.96,87
1,Battle Warriors 4,Strategy,Cross-Platform,2024,EA,4835.79,115.65,92
2,Legends of Valor 2,Shooter,Xbox,2024,Square Enix,2628.55,169.67,99
3,Shadow Realms 4,Action,Nintendo Switch,2024,Microsoft,1695.5,184.4,73
4,Cyber Quest 2,Adventure,Xbox,2024,Microsoft,4529.75,17.28,91


In [11]:
column_map = {
    'Game Title': 'game_title', 
    'Genre': 'genre', 
    'Platform': 'platform', 
    'Release Year': 'release_year', 
    'Developer': 'developer', 
    'Revenue (Millions $)': 'revenue_millions', 
    'Players (Millions)': 'players_millions',
    'Metacritic Score': 'metacritic_score'
}
df = rename_columns.rename_multiple_columns(df, column_map)
df

Unnamed: 0,game_title,genre,platform,release_year,developer,revenue_millions,players_millions,metacritic_score
0,Pixel Odyssey 2,Action,PlayStation,2024,Square Enix,1847.58,44.96,87
1,Battle Warriors 4,Strategy,Cross-Platform,2024,EA,4835.79,115.65,92
2,Legends of Valor 2,Shooter,Xbox,2024,Square Enix,2628.55,169.67,99
3,Shadow Realms 4,Action,Nintendo Switch,2024,Microsoft,1695.50,184.40,73
4,Cyber Quest 2,Adventure,Xbox,2024,Microsoft,4529.75,17.28,91
...,...,...,...,...,...,...,...,...
995,Warzone Chronicles 1,Horror,Nintendo Switch,2000,Ubisoft,2654.16,174.49,53
996,Zombie Outbreak 3,Fighting,Cross-Platform,2000,Ubisoft,2170.46,155.20,57
997,Battle Warriors 5,Fighting,PC,2000,Rockstar,2040.85,166.01,82
998,Cyber Quest 2,Adventure,Cross-Platform,2000,Activision,2657.88,112.92,66


In [12]:
text_cols = df.select_dtypes(include="object").columns
df[text_cols]= df[text_cols].applymap(clean_strings)
df[text_cols]

  df[text_cols]= df[text_cols].applymap(clean_strings)


Unnamed: 0,game_title,genre,platform,release_year,developer
0,pixel odyssey 2,action,playstation,2024,square enix
1,battle warriors 4,strategy,cross platform,2024,ea
2,legends of valor 2,shooter,xbox,2024,square enix
3,shadow realms 4,action,nintendo switch,2024,microsoft
4,cyber quest 2,adventure,xbox,2024,microsoft
...,...,...,...,...,...
995,warzone chronicles 1,horror,nintendo switch,2000,ubisoft
996,zombie outbreak 3,fighting,cross platform,2000,ubisoft
997,battle warriors 5,fighting,pc,2000,rockstar
998,cyber quest 2,adventure,cross platform,2000,activision


In [13]:
df.duplicated().sum()

np.int64(0)

In [14]:
df.duplicated().any()

np.False_

In [15]:
# Remove duplicate rows
df = duplicate.remove_duplicate_rows(df, keep="first")
display(df)

Unnamed: 0,game_title,genre,platform,release_year,developer,revenue_millions,players_millions,metacritic_score
0,pixel odyssey 2,action,playstation,2024,square enix,1847.58,44.96,87
1,battle warriors 4,strategy,cross platform,2024,ea,4835.79,115.65,92
2,legends of valor 2,shooter,xbox,2024,square enix,2628.55,169.67,99
3,shadow realms 4,action,nintendo switch,2024,microsoft,1695.50,184.40,73
4,cyber quest 2,adventure,xbox,2024,microsoft,4529.75,17.28,91
...,...,...,...,...,...,...,...,...
995,warzone chronicles 1,horror,nintendo switch,2000,ubisoft,2654.16,174.49,53
996,zombie outbreak 3,fighting,cross platform,2000,ubisoft,2170.46,155.20,57
997,battle warriors 5,fighting,pc,2000,rockstar,2040.85,166.01,82
998,cyber quest 2,adventure,cross platform,2000,activision,2657.88,112.92,66


In [17]:
df.describe()

Unnamed: 0,revenue_millions,players_millions,metacritic_score
count,1000.0,1000.0,1000.0
mean,2483.0178,103.49982,74.99
std,1412.629451,58.869562,14.618536
min,11.43,0.53,50.0
25%,1276.19,52.005,62.0
50%,2476.13,107.04,76.0
75%,3677.7975,155.6325,87.0
max,4999.79,199.98,100.0


In [None]:
box_plot(df, column ="revenue_millions", title="revenue in millions dollars", color="skyblue")

In [None]:
box_plot(df, column ="players_millions", title="number of players", color="skyblue")

In [None]:
box_plot(df, column ="metacritic_score", title="the metacritic score", color="skyblue")

In [None]:
df = functions_Ako.index_reset(df)
df

In [18]:
df["release_year"] = df["release_year"].astype(str).str.strip().str.replace(" ", "")
df["release_year"] = df["release_year"].astype(int)