# Merging data

There are 3 parts, each with 3 subparts, totaling 9 parts. To get a complete dataset, we need to merge all the parts together.

In [1]:
import pandas as pd
import numpy as np

In [2]:
main_df = []
for i in range(1, 4):
    for j in range(1, 4):
        filepath = f"./data/outputs/part_{i}_{j}_steam_data.csv"
        main_df.append(pd.read_csv(filepath))

main_df = pd.concat(main_df)
main_df.to_csv("./data/outputs/steam_data.csv", index=False)
main_df


Unnamed: 0,Game Title,Game Genre,Pricing,Publisher,Release Date,Platform,Rating,Number of Ratings
0,Mycelium,"Adventure, Indie, RPG, Strategy",$5.99,Alex Grim,Oct 22 2024,,100.0,12
1,Relic Keepers,"Action, Adventure, Indie",$0.99,Idea Cabin,Sep 12 2017,,13.0,15
2,OUTBRK,"Action, Adventure, Simulation, Strategy, Early...",$34.99,Sublime,Jun 28 2024,,78.0,1132
3,Whipseey and the Lost Atlas,"Action, Adventure, Indie",$5.99,Daniel A. Ramirez,Aug 27 2019,,62.0,24
4,TT Isle Of Man: Ride on the Edge 3,"Racing, Simulation, Sports",$49.99,Raceward Studio,May 11 2023,,75.0,308
...,...,...,...,...,...,...,...,...
6489,Raiden III x MIKADO MANIAX,"Action, Adventure",$29.99,MOSS,Sep 7 2023,,61.0,13
6490,Blush Blush,"Casual, Indie, Simulation, Free To Play",Free To Play,Sad Panda Studios,Apr 4 2019,,87.0,6899
6491,Dungeons & Treasure VR,"Action, Adventure, Indie",$19.99,SDC Ventures,Nov 30 2017,,74.0,54
6492,Public Enemy: Revolution Simulator,Action,Free,Concrete Games,Aug 19 2019,,89.0,73


# Preprocessing

The steps to prepare data for merging with other datasets are:
- Remove duplicates
- Remove missing values
- Convert data types

### Drop duplicates

In [3]:
print(f"Before removeing duplicates:\t{main_df.shape}")
main_df.drop_duplicates(subset=["Game Title"], keep="first", inplace=True)
print(f"After removeing duplicates:\t{main_df.shape}")

Before removeing duplicates:	(58320, 8)
After removeing duplicates:	(49229, 8)


In [8]:
# remove empty pricing
main_df = main_df[main_df["Pricing"].notna()]
main_df

Unnamed: 0,Game Title,Game Genre,Pricing,Publisher,Release Date,Platform,Rating,Number of Ratings
0,Mycelium,"Adventure, Indie, RPG, Strategy",$5.99,Alex Grim,Oct 22 2024,,100.0,12
1,Relic Keepers,"Action, Adventure, Indie",$0.99,Idea Cabin,Sep 12 2017,,13.0,15
2,OUTBRK,"Action, Adventure, Simulation, Strategy, Early...",$34.99,Sublime,Jun 28 2024,,78.0,1132
3,Whipseey and the Lost Atlas,"Action, Adventure, Indie",$5.99,Daniel A. Ramirez,Aug 27 2019,,62.0,24
4,TT Isle Of Man: Ride on the Edge 3,"Racing, Simulation, Sports",$49.99,Raceward Studio,May 11 2023,,75.0,308
...,...,...,...,...,...,...,...,...
6489,Raiden III x MIKADO MANIAX,"Action, Adventure",$29.99,MOSS,Sep 7 2023,,61.0,13
6490,Blush Blush,"Casual, Indie, Simulation, Free To Play",Free To Play,Sad Panda Studios,Apr 4 2019,,87.0,6899
6491,Dungeons & Treasure VR,"Action, Adventure, Indie",$19.99,SDC Ventures,Nov 30 2017,,74.0,54
6492,Public Enemy: Revolution Simulator,Action,Free,Concrete Games,Aug 19 2019,,89.0,73
