<a href="https://colab.research.google.com/github/mertqara/PopularGameGenres/blob/main/data_merge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
# Re-merge using only vgsales.csv and metacritic.csv
import pandas as pd

# Load CSV files from your GitHub repo (RAW LINKS)
vgsales_url = 'https://raw.githubusercontent.com/mertqara/PopularGameGenres/main/data/vgsales.csv'
metacritic_url = 'https://raw.githubusercontent.com/mertqara/PopularGameGenres/main/data/metacritic.csv'
popular_url = 'https://raw.githubusercontent.com/mertqara/PopularGameGenres/main/data/popular_games.csv'

# Reload and clean sales data
sales_df = pd.read_csv(vgsales_url)
sales_df.rename(columns={'Name': 'Game', 'Year': 'Release_Year'}, inplace=True)
sales_df['Game'] = sales_df['Game'].str.lower().str.strip()
sales_df['Release_Year'] = pd.to_numeric(sales_df['Release_Year'], errors='coerce')

# Reload and clean metacritic data
meta_df = pd.read_csv(metacritic_url)
meta_df.rename(columns={'name': 'Game', 'meta_score': 'MetaScore', 'user_review': 'UserReview'}, inplace=True)
meta_df['Game'] = meta_df['Game'].str.lower().str.strip()
meta_df['Release_Year'] = pd.to_datetime(meta_df['release_date'], errors='coerce').dt.year
meta_df = meta_df[['Game', 'Release_Year', 'MetaScore', 'UserReview']]  # Keep relevant columns

# Merge only vgsales and metacritic datasets
merged_simple_df = pd.merge(sales_df, meta_df, on=['Game', 'Release_Year'], how='outer')

# Clean non-numeric values in scores
merged_simple_df['MetaScore'] = pd.to_numeric(merged_simple_df['MetaScore'], errors='coerce')
merged_simple_df['UserReview'] = pd.to_numeric(merged_simple_df['UserReview'], errors='coerce')

# Remove future years and zero global sales
merged_simple_df = merged_simple_df[merged_simple_df['Release_Year'] <= 2024]
merged_simple_df = merged_simple_df[merged_simple_df['Global_Sales'] > 0]

# Reset index and sort by Rank
merged_simple_df.reset_index(drop=True, inplace=True)
merged_simple_df_sorted = merged_simple_df.sort_values(by='Rank').reset_index(drop=True)

# Display the top 10 ranked games
display(merged_simple_df_sorted.head(10))

# Save the sorted & cleaned version
merged_vgsales_metacritic_only = merged_simple_df_sorted


Unnamed: 0,Rank,Game,Platform,Release_Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,MetaScore,UserReview
0,1.0,wii sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74,76.0,8.1
1,2.0,super mario bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,
2,3.0,mario kart wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82,82.0,8.4
3,4.0,wii sports resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0,80.0,8.2
4,5.0,pokemon red/pokemon blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,
5,6.0,tetris,GB,1989.0,Puzzle,Nintendo,23.2,2.26,4.22,0.58,30.26,,
6,7.0,new super mario bros.,DS,2006.0,Platform,Nintendo,11.38,9.23,6.5,2.9,30.01,89.0,8.5
7,8.0,wii play,Wii,2006.0,Misc,Nintendo,14.03,9.2,2.93,2.85,29.02,,
8,9.0,new super mario bros. wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62,87.0,8.3
9,10.0,duck hunt,NES,1984.0,Shooter,Nintendo,26.93,0.63,0.28,0.47,28.31,,
