# Video Game Sales Analysis

In [None]:
# Import dependencies
import pandas as pd
import numpy as np
pd.set_option('max_colwidth',400)

### Video Game Information | Cleaning

In [3]:
# Read video game data into a Pandas DataFrame

video_game_info_df = pd.read_excel('resources/video-game-info.xlsx')
video_game_info_df.head()

Unnamed: 0,UNIQUE ID,Name,Year_of_Release,Genre,Publisher,Developer,Rating
0,1,.hack//Infection Part 1,2002,Role-Playing,Atari,CyberConnect2,T
1,2,.hack//Mutation Part 2,2002,Role-Playing,Atari,CyberConnect2,T
2,3,.hack//Outbreak Part 3,2002,Role-Playing,Atari,CyberConnect2,T
3,4,[Prototype],2009,Action,Activision,Radical Entertainment,M
4,5,[Prototype],2009,Action,Activision,Radical Entertainment,M


In [6]:
video_game_info_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6894 entries, 0 to 6893
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   UNIQUE ID        6894 non-null   int64 
 1   Name             6894 non-null   object
 2   Year_of_Release  6894 non-null   int64 
 3   Genre            6894 non-null   object
 4   Publisher        6893 non-null   object
 5   Developer        6890 non-null   object
 6   Rating           6826 non-null   object
dtypes: int64(2), object(5)
memory usage: 377.1+ KB


In [15]:
# Renaming columns for SQL compatibility 
video_game_info_df = video_game_info_df.rename(columns={
    'UNIQUE ID': 'uniqueId',
    'Name': 'name',
    'Year_of_Release': 'yearReleased',
    'Genre': 'genre',
    'Publisher': 'publisher',
    'Developer': 'developer',
    'Rating': 'rating'
})
video_game_info_df.head()

Unnamed: 0,uniqueId,name,yearReleased,genre,publisher,developer,rating
0,1,.hack//Infection Part 1,2002,Role-Playing,Atari,CyberConnect2,T
1,2,.hack//Mutation Part 2,2002,Role-Playing,Atari,CyberConnect2,T
2,3,.hack//Outbreak Part 3,2002,Role-Playing,Atari,CyberConnect2,T
3,4,[Prototype],2009,Action,Activision,Radical Entertainment,M
4,5,[Prototype],2009,Action,Activision,Radical Entertainment,M


### Video Game Sales | Cleaning

In [4]:
# Read video game sales into a Pandas DataFrame

video_game_sales_df = pd.read_excel('resources/video-game-sales.xlsx')
video_game_sales_df.head()

Unnamed: 0,UNIQUE ID,Name,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,.hack//Infection Part 1,0.49,0.38,0.26,0.13,1.27
1,2,.hack//Mutation Part 2,0.23,0.18,0.2,0.06,0.68
2,3,.hack//Outbreak Part 3,0.14,0.11,0.17,0.04,0.46
3,4,[Prototype],0.84,0.35,0.0,0.12,1.31
4,5,[Prototype],0.65,0.4,0.0,0.19,1.24


In [10]:
video_game_sales_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6894 entries, 0 to 6893
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   UNIQUE ID     6894 non-null   int64  
 1   Name          6894 non-null   object 
 2   NA_Sales      6894 non-null   float64
 3   EU_Sales      6894 non-null   float64
 4   JP_Sales      6894 non-null   float64
 5   Other_Sales   6894 non-null   float64
 6   Global_Sales  6894 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 377.1+ KB


In [14]:
# Renaming columns for SQL compatibility 
video_game_sales_df = video_game_sales_df.rename(columns={
    'UNIQUE ID': 'uniqueId',
    'Name': 'name',
    'NA_Sales': 'naSales',
    'EU_Sales': 'euSales',
    'JP_Sales': 'jpSales',
    'Other_Sales': 'otherSales',
    'Global_Sales': 'globalSales'
})
video_game_sales_df.head()

Unnamed: 0,uniqueId,name,yearReleased,genre,publisher,developer,rating
0,1,.hack//Infection Part 1,2002,Role-Playing,Atari,CyberConnect2,T
1,2,.hack//Mutation Part 2,2002,Role-Playing,Atari,CyberConnect2,T
2,3,.hack//Outbreak Part 3,2002,Role-Playing,Atari,CyberConnect2,T
3,4,[Prototype],2009,Action,Activision,Radical Entertainment,M
4,5,[Prototype],2009,Action,Activision,Radical Entertainment,M


### Video Game User & Critic Scores || Cleaning

In [5]:
# Read video game user and critic scores into a Pandas DataFrame

video_game_scores_df = pd.read_excel('resources/video-game-scores.xlsx')
video_game_scores_df.head()

Unnamed: 0,UNIQUE ID,Name,Critic_Score,Critic_Count,User_Score,User_Count
0,1,.hack//Infection Part 1,75,35,8.5,60
1,2,.hack//Mutation Part 2,76,24,8.9,81
2,3,.hack//Outbreak Part 3,70,23,8.7,19
3,4,[Prototype],78,83,7.8,356
4,5,[Prototype],79,53,7.7,308


In [16]:
video_game_scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6894 entries, 0 to 6893
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   UNIQUE ID     6894 non-null   int64  
 1   Name          6894 non-null   object 
 2   Critic_Score  6894 non-null   int64  
 3   Critic_Count  6894 non-null   int64  
 4   User_Score    6894 non-null   float64
 5   User_Count    6894 non-null   int64  
dtypes: float64(1), int64(4), object(1)
memory usage: 323.3+ KB


In [17]:
# Renaming columns for SQL compatibility 
video_game_scores_df = video_game_scores_df.rename(columns={
    'UNIQUE ID': 'uniqueId',
    'Name': 'name',
    'Critic_Score': 'criticScore',
    'Critic_Count': 'criticCount',
    'User_Score': 'userScore',
    'User_Count': 'userCount'
})
video_game_scores_df.head()

Unnamed: 0,uniqueId,name,criticScore,criticCount,userScore,userCount
0,1,.hack//Infection Part 1,75,35,8.5,60
1,2,.hack//Mutation Part 2,76,24,8.9,81
2,3,.hack//Outbreak Part 3,70,23,8.7,19
3,4,[Prototype],78,83,7.8,356
4,5,[Prototype],79,53,7.7,308
