In [19]:
import os
import json
import datetime
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [2]:
# establish a connection to the MongoDB database
from pymongo import MongoClient

# connect to your Atlas cluster
client = MongoClient(os.environ["MONGODB_URI"])

In [17]:
# establish a connection to the PostgreSQL database
import psycopg2 as pg

conn = pg.connect(
    dbname=os.environ["POSTGRES_DB"],
    user=os.environ["POSTGRES_USER"],
    password=os.environ["POSTGRES_PASSWORD"],
    host=os.environ["POSTGRES_HOST"]
)

## Load Fandom pages into postgres and mongo db

In [4]:
# get list of names of files in a fodler
folder = 'fandom_data'
file_names = os.listdir(folder)

In [5]:
# parse json contents of each file
data = []
for file_name in file_names:
    if file_name.endswith('.json'):
        with open(f'{folder}/{file_name}', 'r') as file:
            l = json.load(file)
            if len(l) > 1:
                for item in zip(l[0], l[1]):
                    item = {
                        'game_name': file_name.split('.')[0],
                        'game_url': item[1].split('wiki')[0],
                        'page_name': item[1].split('/')[-1],
                        'page_url': item[1],
                        'content': item[0]
                    }
                    data.append(item)



In [62]:
data[25]

{'game_name': 'lostark',
 'game_url': 'https://lostark.fandom.com/',
 'page_name': 'A_Bell_in_the_Night',
 'page_url': 'https://lostark.fandom.com/wiki/A_Bell_in_the_Night',
 'content': '<aside class="portable-infobox pi-background pi-border-color pi-theme-wikia pi-layout-default" role="region">\n<h2>A Bell in the Night</h2>\n\n<div>\n<h3>Type</h3>\n<div>Normal</div>\n</div>\n<div>\n<h3>Continent</h3>\n<div><a>East Luterra</a></div>\n</div>\n<div>\n<h3>Location</h3>\n<div><a>Blackrose Chapel</a></div>\n</div>\n<div>\n<h3>Start NPC</h3>\n<div><span>Vion</span></div>\n</div>\n</aside>',
 '_id': ObjectId('6713a1e88f64cb721b2f3f05')}

In [18]:
# get the database and collection on which to run the operation
collection = client['Fandom']['Pages']
cur = conn.cursor()

for item in data:
    # insert into MongoDB
    result = collection.insert_one(item)
    # result.inserted_id

    # insert into PostgreSQL
    cur.execute(
        f"""
        INSERT INTO fandom_pages (object_id, game_name, game_url, page_name, page_url, content)
        VALUES (%s, %s, %s, %s, %s, %s);
        """,
        (str(result.inserted_id), item['game_name'], item['game_url'], item['page_name'], item['page_url'], item['content'])
    )
    conn.commit()
    
cur.close()

## Load Metacritic data into postgres

In [None]:
# Title,Score,Platforms,Release Date,Developers,Publisher,Genres
# Elden Ring: Shadow of the Erdtree,94,"['PC', 'Xbox Series X', 'PlayStation 5', 'PlayStation 4', 'Xbox One']","Jun 21, 2024",['From Software'],Bandai Namco Games,['Action RPG']
# Batman: Arkham City,94,"['PC', 'Xbox 360', 'PlayStation 3', 'Nintendo Switch']","Oct 18, 2011",['Rocksteady Studios'],Warner Bros. Interactive Entertainment,['Open-World Action']

In [56]:
import csv

mc_filename = 'metacritic_2010_2024.csv'
mc_data = []

with open(mc_filename, newline='') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    for row in reader:
        item = {
            'title': row['Title'],
            'score': int(row['Score']) if row['Score'] != '' else None,
            # 'platforms': json.loads(row['Platforms'].replace("'", '"')),
            'release_date': datetime.datetime.strptime(row['Release Date'], '%b %d, %Y') if row['Release Date'] != '' else None,
            # Date string: 'May 23, 2010'
            # 'developers': json.loads(row['Developers'].replace("'", '"')),
            'publisher': row['Publisher'],
            # 'genres': json.loads(row['Genres'].replace("'", '"'))
        }
        try:
            item['platforms'] = json.loads(row['Platforms'].replace("'", '"'))
        except:
            item['platforms'] = json.loads(row['Platforms'])

        if row['Developers'] != '':
            try:
                item['developers'] = json.loads(row['Developers'].replace("'", '"'))
            except:
                
                try:
                    item['developers'] = json.loads(row['Developers'])
                except:
                    print(row)

        try:
            item['genres'] = json.loads(row['Genres'].replace("'", '"'))
        except:
            item['genres'] = json.loads(row['Genres'])
            
        mc_data.append(item)



{'Title': "Hero's Hour", 'Score': '82', 'Platforms': "['PC', 'Nintendo Switch']", 'Release Date': 'Mar 1, 2022', 'Developers': '[\'Benjamin "ThingOnItsOwn" Hauer\']', 'Publisher': 'Goblinz Studio', 'Genres': "['Turn-Based Strategy']"}
{'Title': 'Cyber Shadow', 'Score': '82', 'Platforms': "['Nintendo Switch', 'PC', 'PlayStation 4', 'Xbox One', 'PlayStation 5']", 'Release Date': 'Jan 26, 2021', 'Developers': '[\'Aarne "MekaSkull" Hunziker\']', 'Publisher': 'Yacht Club Games', 'Genres': "['2D Platformer']"}


In [57]:
len(mc_data)

8015