In [1]:
from sqlalchemy import create_engine
import pandas as pd
import matplotlib.pyplot as plt


class DatabaseConnection:
    def __init__(self, dbname, user, host, password, port):
        self.dbname = dbname
        self.user = user
        self.host = host
        self.password = password
        self.port = port
        self.engine = None

    def connect(self):
        # Create the connection URL for SQLAlchemy
        url = f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.dbname}"
        self.engine = create_engine(url)
        return self.engine

    def query_to_dataframe(self, query):
        # Ensure connection is established
        if self.engine is None:
            self.connect()

        # Execute the query and return results as a DataFrame
        return pd.read_sql(query, self.engine)

In [2]:
if __name__ == "__main__":
    # Initialize connection
    db_conn = DatabaseConnection(
        dbname="devdb",
        user="timeless",
        host="localhost",
        password="password",
        port="5432",
    )

    # Connect to the database
    engine = db_conn.connect()

# Load Movies Data
query = "SELECT movie_id, title, genres FROM core_movie;"
movies_df = db_conn.query_to_dataframe(query)

# Load Ratings Data
query = "SELECT user_id, movie_id, rating FROM core_ratings;"
ratings_df = db_conn.query_to_dataframe(query)

# Load Tags Data
query = "SELECT user_id, movie_id, tag FROM core_tags;"
tags_df = db_conn.query_to_dataframe(query)

# Load Links Data
query = "SELECT movie_id, imdb_id, tmdb_id FROM core_links;"
links_df = db_conn.query_to_dataframe(query)

In [5]:
# movies_df.to_csv("movies.csv")
print(movies_df.head(10))

   movie_id                               title                 genres
0         5  Father of the Bride Part II (1995)                 Comedy
1         8                 Tom and Huck (1995)     Adventure|Children
2         9                 Sudden Death (1995)                 Action
3        12  Dracula: Dead and Loving It (1995)          Comedy|Horror
4        14                        Nixon (1995)                  Drama
5        16                       Casino (1995)            Crime|Drama
6        18                   Four Rooms (1995)                 Comedy
7        21                   Get Shorty (1995)  Comedy|Crime|Thriller
8        23                    Assassins (1995)  Action|Crime|Thriller
9        24                       Powder (1995)           Drama|Sci-Fi


In [6]:
# ratings_df.to_csv("ratings.csv")
print(ratings_df.head(10))

   user_id  movie_id  rating
0        2        31     5.0
1        2        34     5.0
2        2        39     5.0
3        2        48     5.0
4        2       153     3.0
5        2       185     5.0
6        2       186     5.0
7        2       193     3.0
8        2       207     5.0
9        2       216     4.0


In [7]:
# tags_df.to_csv("tags.csv")
print(tags_df.head(10))

   user_id  movie_id                       tag
0       22     26479               Kevin Kline
1       22     79592                  misogyny
2       22    247150                acrophobia
3       34      2174                     music
4       34      8623              Steve Martin
5       55      5766  the killls and the score
6       58      7451                  bullying
7       58     49272                 '60s feel
8       58     61132             fake trailers
9       58     63113                       007


In [8]:
# links_df.to_csv("links.csv")
print(links_df.head(10))

   movie_id  imdb_id  tmdb_id
0         1   114709    862.0
1         2   113497   8844.0
2         3   113228  15602.0
3         4   114885  31357.0
4         5   113041  11862.0
5         6   113277    949.0
6         7   114319  11860.0
7         8   112302  45325.0
8         9   114576   9091.0
9        10   113189    710.0
