# MERGE DATASET - API DATA

### Libreries used

In [25]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sqlalchemy import create_engine
import psycopg2
import sys
import os

## Connection to database

In [26]:
sys.path.append('../otherconfig/')
from dbconfig import configuration

config = configuration('../otherconfig/database.ini')
db_url = f"postgresql+psycopg2://{config['user']}:{config['password']}@{config['host']}/{config['database']}"
engine = create_engine(db_url)

try:
    conn = engine.connect()
    print("Database connection successful.")
except Exception as e:
    print("Database connection failed:", e)
    sys.exit(1)


Reading configuration from ../otherconfig/database.ini
Database connection successful.


Next, this part will be for the practical use of the dags and the follow-up of the steps.

### Load the CSV data into a DataFrame

In [27]:

metacritic_data_path = '../data/metacritic_data.csv'
try:
    metacritic_data = pd.read_csv(metacritic_data_path)
    print(f"CSV data loaded successfully with {len(metacritic_data)} records.")
except Exception as e:
    print("Failed to load CSV data:", e)
    sys.exit(1)

CSV data loaded successfully with 14055 records.


### Load data into PostgreSQL database

In [28]:

try:
    metacritic_data.to_sql('metacritic_data', conn, if_exists='replace', index=False)
    print("Data loaded into the PostgreSQL database successfully.")
except Exception as e:
    print("Failed to load data into the PostgreSQL database:", e)
    sys.exit(1)

Data loaded into the PostgreSQL database successfully.


## API DATA

In [40]:
# Path to the directory containing the API data CSV files
api_data_path = '../data/'
# List of CSV files
csv_files = [os.path.join(api_data_path, f'api_data_{i}.csv') for i in range(1, 14)]



In [41]:
# Initialize an empty list to store DataFrames
dataframes = []

# Loop through the CSV files and load each into a DataFrame
for file in csv_files:
    try:
        df = pd.read_csv(file)
        dataframes.append(df)
        print(f"Loaded {file} successfully with {len(df)} records.")
    except Exception as e:
        print(f"Failed to load {file}:", e)
        sys.exit(1)


Loaded ../data/api_data_1.csv successfully with 1000 records.
Loaded ../data/api_data_2.csv successfully with 1000 records.
Loaded ../data/api_data_3.csv successfully with 900 records.
Loaded ../data/api_data_4.csv successfully with 240 records.
Loaded ../data/api_data_5.csv successfully with 1000 records.
Loaded ../data/api_data_6.csv successfully with 1000 records.
Loaded ../data/api_data_7.csv successfully with 1000 records.
Loaded ../data/api_data_8.csv successfully with 1000 records.
Loaded ../data/api_data_9.csv successfully with 1000 records.
Loaded ../data/api_data_10.csv successfully with 1000 records.
Loaded ../data/api_data_11.csv successfully with 1000 records.
Loaded ../data/api_data_12.csv successfully with 1000 records.
Loaded ../data/api_data_13.csv successfully with 1000 records.


In [42]:
# Concatenate all DataFrames into a single DataFrame
combined_api_data = pd.concat(dataframes, ignore_index=True)
print(f"Combined API data has {len(combined_api_data)} records.")

Combined API data has 12140 records.


In [44]:
# Display the first few rows of the combined data
combined_api_data.head()

Unnamed: 0,id,slug,name,released,tba,background_image,rating,rating_top,ratings,ratings_count,...,dominant_color,platforms,parent_platforms,genres,stores,clip,tags,esrb_rating,short_screenshots,community_rating
0,9907,pictopix,Pictopix,2017-01-05,False,https://media.rawg.io/media/screenshots/ff0/ff...,3.96,4,"[{'id': 4, 'title': 'recommended', 'count': 20...",24,...,0f0f0f,"[{'platform': {'id': 4, 'name': 'PC', 'slug': ...","[{'platform': {'id': 1, 'name': 'PC', 'slug': ...","[{'id': 40, 'name': 'Casual', 'slug': 'casual'...","[{'id': 10805, 'store': {'id': 1, 'name': 'Ste...",,"[{'id': 31, 'name': 'Singleplayer', 'slug': 's...",,"[{'id': -1, 'image': 'https://media.rawg.io/me...",
1,4119,chronovolt,Chronovolt,2012-11-21,False,https://media.rawg.io/media/games/8dc/8dc50d62...,2.55,3,"[{'id': 3, 'title': 'meh', 'count': 11, 'perce...",20,...,0f0f0f,"[{'platform': {'id': 19, 'name': 'PS Vita', 's...","[{'platform': {'id': 2, 'name': 'PlayStation',...","[{'id': 4, 'name': 'Action', 'slug': 'action',...","[{'id': 4440, 'store': {'id': 3, 'name': 'Play...",,"[{'id': 37796, 'name': 'exclusive', 'slug': 'e...","{'id': 1, 'name': 'Everyone', 'slug': 'everyone'}","[{'id': -1, 'image': 'https://media.rawg.io/me...",
2,3935,breakquest-extra-evolution,BreakQuest: Extra Evolution,2012-10-16,False,https://media.rawg.io/media/screenshots/4e3/4e...,2.6,3,"[{'id': 3, 'title': 'meh', 'count': 10, 'perce...",20,...,0f0f0f,"[{'platform': {'id': 16, 'name': 'PlayStation ...","[{'platform': {'id': 2, 'name': 'PlayStation',...","[{'id': 11, 'name': 'Arcade', 'slug': 'arcade'...","[{'id': 4253, 'store': {'id': 3, 'name': 'Play...",,"[{'id': 114, 'name': 'Physics', 'slug': 'physi...","{'id': 2, 'name': 'Everyone 10+', 'slug': 'eve...","[{'id': -1, 'image': 'https://media.rawg.io/me...",
3,1991,killallzombies-2,#KILLALLZOMBIES,2014-10-28,False,https://media.rawg.io/media/screenshots/675/67...,2.36,1,"[{'id': 1, 'title': 'skip', 'count': 5, 'perce...",11,...,0f0f0f,"[{'platform': {'id': 1, 'name': 'Xbox One', 's...","[{'platform': {'id': 1, 'name': 'PC', 'slug': ...","[{'id': 4, 'name': 'Action', 'slug': 'action',...","[{'id': 2061, 'store': {'id': 2, 'name': 'Xbox...",,"[{'id': 31, 'name': 'Singleplayer', 'slug': 's...",,"[{'id': -1, 'image': 'https://media.rawg.io/me...",
4,979,tethered,Tethered,2016-10-25,False,https://media.rawg.io/media/screenshots/1e6/1e...,2.75,4,"[{'id': 4, 'title': 'recommended', 'count': 5,...",12,...,0f0f0f,"[{'platform': {'id': 18, 'name': 'PlayStation ...","[{'platform': {'id': 1, 'name': 'PC', 'slug': ...","[{'id': 3, 'name': 'Adventure', 'slug': 'adven...","[{'id': 1002, 'store': {'id': 3, 'name': 'Play...",,"[{'id': 31, 'name': 'Singleplayer', 'slug': 's...","{'id': 2, 'name': 'Everyone 10+', 'slug': 'eve...","[{'id': -1, 'image': 'https://media.rawg.io/me...",


## General description

In [22]:
metacritic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14055 entries, 0 to 14054
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Title          14034 non-null  object 
 1   released       13991 non-null  object 
 2   Developer      13917 non-null  object 
 3   Publisher      13917 non-null  object 
 4   Genres         14034 non-null  object 
 5   rating         11005 non-null  object 
 6   User Score     14055 non-null  float64
 7   ratings_count  14055 non-null  int64  
dtypes: float64(1), int64(1), object(6)
memory usage: 878.6+ KB
