In [83]:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Column, String, Integer, Float, Date, Table, ForeignKey
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, relationship

import pandas as pd

In [84]:
credentials = pd.read_csv('../credentials/credentials.csv')

database_adress = credentials['Host'].iloc[0]
database_user = credentials['User'].iloc[0]
database_password = credentials['Password'].iloc[0]
database_name = credentials['Database'].iloc[0]
database_port = credentials['Port'].iloc[0]

print(database_user)

kubacka


In [85]:
Base = automap_base()

##### Create the middle Table **movie_genres** which is between the tables movies and genres which is am many to many relationship

In [86]:
movie_genres = Table(
    "movies_genres",
    Base.metadata,
    Column("movieId", ForeignKey("movies.movieId")),
    Column("genreId", ForeignKey("genres.genreId")),
)

##### Creation of the class **Genre**(Tabele name 'genres') with it's 3 properties genre_id, name and all the movies that are assigned to that genre </br> Movies is a many to many relation because every genre can have multiple movies with it's genre and every movie can have multiple genres

In [87]:
class Genre(Base):
   __tablename__ = 'genres' 
   genre_id = Column('genreId', Integer, primary_key = True)
   name = Column('genrename', String)
   movies = relationship('Movie', secondary=movie_genres,
                            backref='movies', viewonly=True)

##### Creation of the class **Genome_Tag**(Table name 'genometags') with it's two properties tagId and tag

In [88]:
class Genome_Tag(Base):
    __tablename__ = 'genometags' 
    tagId = Column('tagId', Integer, primary_key = True)
    tag = Column('tag', String)
    genome_scores = relationship("Genome_Score" ,back_populates='genome_tag', viewonly=True)

##### Creation of the class **Link**(Table name 'links') with it's three properties movie, imdb_id and tmdb_id. <br/> Movie is the primary key of the table and at the same time the foreign key to the table movie

In [89]:
class Link(Base):
    __tablename__ = 'links' 
    movieId = Column(Integer, ForeignKey("movies.movieId"), primary_key = True)
    movie = relationship("Movie", back_populates="link", viewonly=True, uselist=False)
    imdb_id = Column('imdbId', Integer)
    tmdb_id = Column('tmdbId', Integer)

##### Creation of the class **Rating**(Table name 'ratings') with it's five properties rating_id, user_id, movie, rating, rating_date <br/>Movie is a one to many relationship to Rating, which means that every rating rates one movie and every movie can have multiple ratings

In [90]:
class Rating(Base):
    __tablename__ = 'ratings' 
    rating_id = Column('rating_id', Integer, primary_key = True)
    user_id = Column('userId', Integer)
    movieId = Column(Integer, ForeignKey("movies.movieId"))
    movie = relationship("Movie", back_populates="rating", viewonly=True, uselist=False)
    rating = Column('rating', Float)
    rating_date = Column('rating_date', Date)

##### Creation of the class **Tag**(Table name 'tags') with it's five properties tag_id, user_id, movie, tag, tag_date <br/>Movie is a one to many relationship to Tag, which means that every tag belongs to one movie and every movie can have multiple tags

In [91]:
class Tag(Base):
    __tablename__ = 'tags' 
    tag_id = Column('tag_id', Integer, primary_key = True)
    user_id = Column('userId', Integer)
    movieId = Column(Integer, ForeignKey("movies.movieId"))
    movie = relationship("Movie", back_populates="tag", viewonly=True, uselist=False)
    tag = Column('tag', String)
    tag_date = Column('tag_date', Date) 

##### Creation of the class **Genome_Score**(Table name 'genomescores') with it's three properties tagId, movie, relevance <br/>Genome_Score has two primary keys because the combination of tagId and movieId is unique to every entry <br/>Movie is a many to many relationship to Genome_Score, which means that every genome_score belongs to every movie and every movie has a relevance rating of every tag

In [92]:
class Genome_Score(Base):
    __tablename__ = 'genomescores' 
    tagId = Column(Integer, ForeignKey("genometags.tagId"), primary_key = True)
    tag = relationship('Genome_Tag', back_populates="genome_scores", viewonly=True)
    movieId = Column(Integer, ForeignKey("movies.movieId"), primary_key = True)
    movie = relationship("Movie", back_populates="genome_score", viewonly=True, uselist=False)
    relevance = Column('relevance', Float)

##### Creation of the class **Movie**(Table name 'movies') with it's eight properties movie_id, title, release_year, genres, ratings, tags, genome_scores and link <br/>Genres is a many to many relation because every movie can have multiple genres with it's and every genre can have multiple movies<br/>Ratings is many to one relation to Movie, because every movie can have multiple ratings and every rating belongs to one movie<br/>Tags is many to one relation to Movie, because every movie can have multiple tags and every tag belongs to one movie<br/>Genome_Scores is many to many relation to Movie with two primary keys, because every movie has a relevance rating to every tag so the combination of movieId an tagId is unique for every entry


In [93]:
class Movie(Base):
    __tablename__ = 'movies'

    movie_id = Column('movieId', Integer, primary_key = True)
    title = Column('title', String)
    release_year = Column('release_year', Integer)

    genres = relationship('Genre', secondary=movie_genres,
                            backref='genres', viewonly=True)
    ratings = relationship("Rating" ,back_populates='movie', viewonly=True)
    tags = relationship("Tag" ,back_populates='movie', viewonly=True)
    genome_scores = relationship("Genome_Score" ,back_populates='movie', viewonly=True)
    link = relationship("Link" ,back_populates='movie', viewonly=True, uselist=False)

### Build the connection with the database

In [94]:
#Build the connection with the database
engine = create_engine(f'mysql+pymysql://{database_user}:{database_password}@{database_adress}:{database_port}/{database_name}')
Base.prepare(autoload_with=engine)

session = Session(engine)

#### Function which prints out all the information about a movie

In [95]:
def print_Movie_Infos(movie):

    ratings = list(movie.ratings)
    tags = list(movie.tags)
    genome_scores = list(movie.genome_scores)
    genres = list(movie.genres)

    print(f'Movie id: {movie.movie_id}')
    print(f'Title: {movie.title}')
    print(f'Release Year: {movie.release_year}')

    print()
    print("Genres: ")
    for genre in genres:
        print(genre.name)

    print()
    print(f'IMDb id: {movie.link.imdb_id}')
    print(f'TMDb id: {movie.link.tmdb_id}')

    print()
    print(f'First 10 ratings of movie {movie.title}')

    i_ratings = 0

    while i_ratings < 10:
        print(ratings[i_ratings].rating)
        i_ratings += 1

    print()
    print(f'First 10 tags of movie {movie.title}')

    i_tags = 0

    while i_tags < 10:
        print(tags[i_tags].tag)
        i_tags += 1

    print()
    print(f'First 10 genome_scores of movie {movie.title}')

    i_genome_scores = 0

    while i_genome_scores < 10:
        print(genome_scores[i_genome_scores].tag.tag)
        print(str(genome_scores[i_genome_scores].relevance))
        i_genome_scores += 1

#### Function which prints out all the information about a genome_Tag

In [96]:
def print_Genome_Tag_Infos(genome_tag):
    print(f'Genome tag id: {genome_tag.tagId}')
    print(f'Genome tag: {genome_tag.tag}')

#### Function which prints out all the information about a link

In [97]:
def print_Link_Infos(link):
    print(f'Movie title: {link.movie.title}')
    print(f'IMDb id: {link.imdb_id}')
    print(f'TMDb id: {link.tmdb_id}')

#### Function which prints out all the information about a rating

In [98]:

def print_Rating_Infos(rating):
    print(f'Rating id: {rating.rating_id}')
    print(f'Rating user id: {rating.user_id}')
    print(f'Rating movie title: {rating.movie.title}')
    print(f'Rating: {rating.rating}')
    print(f'Rating date: {rating.rating_date}')

#### Function which prints out all the information about a tag

In [99]:

def print_Tag_Infos(tag):
    print(f'Tag id: {tag.tag_id}')
    print(f'Tag user id: {tag.user_id}')
    print(f'Tag movie title: {tag.movie.title}')
    print(f'Tag: {tag.tag}')
    print(f'Tag date: {tag.tag_date}')


#### Function which prints out all the information about a genome_Score

In [100]:

def print_Genome_Score_Infos(genome_score):
    print(f'Genome score tag id: {genome_score.tagId}')
    print(f'Genome score movie title: {genome_score.movie.title}')
    print(f'Genome score relevance: {genome_score.relevance}')

#### Function which prints out all the information about a genre

In [101]:
# Function which prints out all the information about a genre
def print_Genre_Infos(genre):
    print(f'Genre id: {genre.genre_id}')
    print(f'Genre name: {genre.name}')
    print(f'First 10 movies with the genre {genre.name}: {genre.genre_id}')

    movies = list(genre.movies)
    i_movies = 0

    while i_movies < 10:
        print(movies[i_movies].title)
        i_movies += 1    


In [102]:
movie_example = session.query(Movie).first()
print_Movie_Infos(movie_example)

Movie id: 1
Title: Toy Story (1995)
Release Year: 1995

Genres: 
Adventure
Animation
Children
Comedy
Fantasy

IMDb id: 0114709
TMDb id: 862

First 10 ratings of movie Toy Story (1995)
4.0
5.0
4.5
4.0
4.0
4.0
5.0
5.0
4.5
5.0

First 10 tags of movie Toy Story (1995)
animated
buddy movie
Cartoon
cgi
comedy
computer animation
family
friendship
kids
toy

First 10 genome_scores of movie Toy Story (1995)
007
0.029
007 (series)
0.02375
18th century
0.05425
1920s
0.06875
1930s
0.16
1950s
0.19525
1960s
0.076
1970s
0.252
1980s
0.2275
19th century
0.024


In [103]:
genome_tag_example = session.query(Genome_Tag).first()
print_Genome_Tag_Infos(genome_tag_example)

Genome tag id: 1
Genome tag: 007


In [104]:
link_example = session.query(Link).first()
print_Link_Infos(link_example)

Movie title: Toy Story (1995)
IMDb id: 0114709
TMDb id: 862


In [105]:
rating_example = session.query(Rating).first()
print_Rating_Infos(rating_example)

Rating id: 1
Rating user id: 1
Rating movie title: Three Colors: Blue (Trois couleurs: Bleu) (1993)
Rating: 3.5
Rating date: 2009-10-27 09:00:21


In [106]:
tag_example = session.query(Tag).first()
print_Tag_Infos(tag_example)

Tag id: 1
Tag user id: 14
Tag movie title: Braveheart (1995)
Tag: epic
Tag date: 2015-09-24 14:35:38


In [107]:
genome_score_example = session.query(Genome_Score).first()
print_Genome_Score_Infos(genome_score_example)

Genome score tag id: 1
Genome score movie title: Stranger by the Lake (L'inconnu du lac) (2013)
Genome score relevance: 0.005


In [108]:
genre_example = session.query(Genre).first()
print_Genre_Infos(genre_example)

Genre id: 1
Genre name: Action
First 10 movies with the genre Action: 1
Heat (1995)
Sudden Death (1995)
GoldenEye (1995)
Cutthroat Island (1995)
Money Train (1995)
Assassins (1995)
Dead Presidents (1995)
Mortal Kombat (1995)
Guardian Angel (1994)
Lawnmower Man 2: Beyond Cyberspace (1996)


In [109]:
star_wars_epIV = session.query(Movie).filter(Movie.title.like('Star Wars: Episode IV%')).first()

In [110]:
print(star_wars_epIV.title)

Star Wars: Episode IV - A New Hope (1977)


In [111]:
print(len(list(star_wars_epIV.ratings)))

81815


In [112]:
star_wars_ratings = list(star_wars_epIV.ratings)
i_ratings = 0

while i_ratings < 10:
    print(star_wars_ratings[i_ratings].rating)
    i_ratings += 1

4.5
4.0
5.0
4.0
5.0
1.5
5.0
5.0
5.0
4.5


In [31]:
print_Movie_Infos(star_wars_epIV)

Movie id: 260
Title: Star Wars: Episode IV - A New Hope (1977)
Release Year: 1977

Genres: 
Action
Adventure
Sci-Fi

IMDb id: 0076759
TMDb id: 11

First 10 ratings of movie Star Wars: Episode IV - A New Hope (1977)
4.5
4.0
5.0
4.0
5.0
1.5
5.0
5.0
5.0
4.5

First 10 tags of movie Star Wars: Episode IV - A New Hope (1977)
sci-fi
space action
classic sci-fi
Harrison Ford
must see
sci-fi
Class sci-fi
Mystical space epic
classic sci-fi
space adventure

First 10 genome_scores of movie Star Wars: Episode IV - A New Hope (1977)
007
0.0695
007 (series)
0.04175
18th century
0.15525
1920s
0.1605
1930s
0.342
1950s
0.20575
1960s
0.109
1970s
0.47025
1980s
0.5015
19th century
0.08875


In [32]:
movie_932 = session.query(Movie).get(932)

print_Movie_Infos(movie_932)

Movie id: 932
Title: Affair to Remember, An (1957)
Release Year: 1957

Genres: 
Drama
Romance

IMDb id: 0050105
TMDb id: 8356

First 10 ratings of movie Affair to Remember, An (1957)
4.0
3.5
5.0
5.0
3.5
3.0
5.0
5.0
4.0
4.0

First 10 tags of movie Affair to Remember, An (1957)
Carey Grant
classic love story
Bob*ola
Cary Grant
Classic
Infidelity
romcom
Infidelity
Deborah Kerr
painful to watch

First 10 genome_scores of movie Affair to Remember, An (1957)
007
0.03225
007 (series)
0.035
18th century
0.14
1920s
0.10025
1930s
0.42525
1950s
0.49625
1960s
0.129
1970s
0.121
1980s
0.10125
19th century
0.1065


In [81]:
#import pandas as pd

In [82]:
"""movies = session.query(Movie)

test_count = 0

list_movies_with_users_with_one_rating = []

for movie in movies:
    test_count += 1
    users_with_one_rating = 0

    for rating in movie.ratings:
        ratings_of_user = list(session.query(Rating).filter(Rating.user_id == rating.user_id))
        if len(ratings_of_user) == 1:
            users_with_one_rating += 1

    new_user_ratio = users_with_one_rating / len(movie.ratings)
    list_movies_with_users_with_one_rating.append([movie.title, users_with_one_rating, new_user_ratio])

    if test_count >= 2:
        break

df_movies_with_users_with_one_rating = pd.DataFrame(list_movies_with_users_with_one_rating, columns=['Title', 'Users with only one rating', 'Ratio Users with only one rating / all ratings'])
#movies_with_users_with_one_rating.sort(key=lambda row: (row[1], row[2]))"""

KeyboardInterrupt: 

In [39]:
"""test_list = []

test_list.append(['First entry', 2, 3])
test_list.append(['Second entry', 5, 6])

test_df = pd.DataFrame(test_list, columns=['First Column', 'Second Column', 'Third Column'])
print(test_df)"""

   First Column  Second Column  Third Column
0   First entry              2             3
1  Second entry              5             6
