In [1]:
import sqlite3
from sqlite3 import Error
import pandas as pd
import numpy as np

# Loading CSV file

In [2]:
df_anime = pd.read_csv(r'data\myAnimelist-No_Hentai 2.csv').drop(columns='genre')
df_anime.rename(columns={"animeID":'anime_id'},inplace=True)
df_rating = pd.read_csv(r'data\rating.csv')

In [3]:
print(df_rating.shape)
# filtering for computational reasons & file saving size
df_rating_filtered = df_rating[df_rating.groupby("user_id")['user_id'].transform('size') > 500]
df_rating_filtered

(7813737, 3)


Unnamed: 0,user_id,anime_id,rating
1614,17,6,7
1615,17,19,10
1616,17,30,9
1617,17,32,10
1618,17,43,8
...,...,...,...
7812698,73502,18893,9
7812699,73502,19151,5
7812700,73502,19221,6
7812701,73502,19367,6


In [4]:
display(df_anime)
display(df_rating)

Unnamed: 0,anime_id,name,type,source,episodes,duration,rating,score,scored_by,rank,popularity,members,favorites
0,1,Cowboy Bebop,TV,Original,26,0:24:00,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460
1,5,Cowboy Bebop: Tengoku no Tobira,Movie,Original,1,1:55:00,R - 17+ (violence & profanity),8.41,120243,164,449,197791,776
2,6,Trigun,TV,Manga,26,0:24:00,PG-13 - Teens 13 or older,8.30,212537,255,146,408548,10432
3,7,Witch Hunter Robin,TV,Original,26,0:25:00,PG-13 - Teens 13 or older,7.33,32837,2371,1171,79397,537
4,8,Bouken Ou Beet,TV,Manga,52,0:23:00,PG - Children,7.03,4894,3544,3704,11708,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5555,38923,Sinbi Apateu: Ghost Ball Bimil,TV,Original,24,0:24:00,G - All Ages,7.00,4,13056,15281,30,1
5556,38961,Takoyaki Story,Music,Original,1,0:01:00,R+ - Mild Nudity,3.28,25,13254,15083,46,0
5557,38985,Birthday Wonderland,Movie,Book,1,1:55:00,G - All Ages,6.61,327,5504,9195,835,2
5558,39022,Ling Feng Zhe,ONA,Original,7,0:24:00,PG-13 - Teens 13 or older,6.08,38,11923,10455,467,2


Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9


In [5]:
# Database Path object for easier calling
db_path = r"db\anime.db"

# Create a new database

In [6]:
# Create a new database file

def create_database(db_file):
    """ create a database connection to a SQLite database """
    conn = None
    # connect to database file
    try:
        conn = sqlite3.connect(db_file)
        print(f'sqlite3 version: {sqlite3.version}')
        print('Created Database')
    # print error if connection can not be established
    except Error as error:
        print(error)
        print('Could not create Database')
    # close connection
    finally:
        if conn:
            conn.close()
    

# run the above function (save database file to path)
if __name__ == '__main__':
    create_database(db_path)

sqlite3 version: 2.6.0
Created Database


In [7]:
# # Create a new database that resides in the memory (RAM)

# def create_database():
#     """ create a database connection to a database that resides
#         in the memory
#     """
#     conn = None;
#     # connect to memory
#     try:
#         conn = sqlite3.connect(':memory:')
#         print(f'sqlite3 version: {sqlite3.version}')
#         print('Created Database')
#     # print error if connection can not be established
#     except Error as error:
#         print(error)
#         print('Could not create Database')
#     # close connection
#     finally:
#         if conn:
#             conn.close()
    

# # run the above function
# if __name__ == '__main__':
#     create_database()


# Create Tables

In [8]:
# function for establish connection to SQLite3 for editing database

def establish_connection(db_file):
    """ 
    creates a database connection to a SQLite database file
    
    :returns: Connection object or None
    """
    conn = None
    # connect to database file
    try:
        conn = sqlite3.connect(db_file)
        print(f'sqlite3 version: {sqlite3.version}')
        print('Connected to Database path')
    # print error if connection can not be established
    except Error as error:
        print(error)
        print('Could not create Database')
    
    return conn

In [9]:
conn = establish_connection(db_path)

sqlite3 version: 2.6.0
Connected to Database path


In [10]:
# function for creating table in database

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    print('Creating tables')
    try:
        # make object cursor from connect
        c = conn.cursor()
        # from cursor use execute to create table
        c.execute(create_table_sql)
    except Error as error:
        print(error)

In [11]:
# Create database schema

def main():
    
    sql_create_anime_table = """ CREATE TABLE IF NOT EXISTS Anime (
                                        anime_id integer PRIMARY KEY NOT NULL,
                                        name text NOT NULL,
                                        type text,
                                        source text,
                                        episodes integer,
                                        duration datetime,
                                        rating text,
                                        score real,
                                        scored_by integer,
                                        rank integer,
                                        popularity integer,
                                        members integer,
                                        favorites integer,
                                        FOREIGN KEY (anime_id) REFERENCES rating (anime_id)
                                    ); """

    sql_create_rating_table = """CREATE TABLE IF NOT EXISTS Rating (
                                    anime_id integer PRIMARY KEY NOT NULL,
                                    user_id integer NOT NULL,
                                    rating integer,
                                    FOREIGN KEY (anime_id) REFERENCES anime (anime_id)
                                    
                                );"""

    # create a database connection
    

    # create tables
    if conn is not None:
        # create Anime table
        create_table(conn, sql_create_anime_table)

        # create Rating table
        create_table(conn, sql_create_rating_table)
        
        print("Created tables")
    else:
        print("Error! cannot create the database connection.")
        
    

    
if __name__ == '__main__':
    main()

Creating tables
Creating tables
Created tables


In [12]:
display(df_anime.head(5))
display(df_rating.head(5))

Unnamed: 0,anime_id,name,type,source,episodes,duration,rating,score,scored_by,rank,popularity,members,favorites
0,1,Cowboy Bebop,TV,Original,26,0:24:00,R - 17+ (violence & profanity),8.81,405664,26,39,795733,43460
1,5,Cowboy Bebop: Tengoku no Tobira,Movie,Original,1,1:55:00,R - 17+ (violence & profanity),8.41,120243,164,449,197791,776
2,6,Trigun,TV,Manga,26,0:24:00,PG-13 - Teens 13 or older,8.3,212537,255,146,408548,10432
3,7,Witch Hunter Robin,TV,Original,26,0:25:00,PG-13 - Teens 13 or older,7.33,32837,2371,1171,79397,537
4,8,Bouken Ou Beet,TV,Manga,52,0:23:00,PG - Children,7.03,4894,3544,3704,11708,14


Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


# Populating Tables from csv files

In [13]:
# exporting data from dataframe 'anime' to table anime
df_anime.to_sql('Anime',sqlite3.connect(db_path), if_exists='replace')

# export data from dataframe 'rating' to table rating 
df_rating.to_sql('Rating',sqlite3.connect(db_path), if_exists='replace')

In [14]:
# close connection to database

def close_connection(db_file):
    try:
        conn = sqlite3.connect(db_file)
        conn.close()
        print('Connection successfully closed')
    except Error as error:
        print(error)
        print('Could not close connection')
# run the above function (save database file to path)
if __name__ == '__main__':
    close_connection(db_path)

Connection successfully closed


# Insert data into Tables

In [15]:
df_anime.anime_id.max

<bound method NDFrame._add_numeric_operations.<locals>.max of 0           1
1           5
2           6
3           7
4           8
        ...  
5555    38923
5556    38961
5557    38985
5558    39022
5559    39116
Name: anime_id, Length: 5560, dtype: int64>

In [16]:
column_names = list(df_anime.columns)
print(column_names)

vinland_saga = (39200,"Vinland Saga",5,6,24,'0:24:00',4,8.71,363439,41,134,737881,20182)
demon_slayer = (39201,"Demon Slayer",5,6,26,'0:23:00',4,8.61,966203,63,22,1567326,72436)
dororo = (39202,"Dororo",5,6,24,'0:24:00',4,8.19,341122,322,137,730890,12071)
Attack_on_Titan_season_3 = (39203,'Attack on Titan Season 3',5,6,12,'0:23:00',4,8.62,877466,64,32,1370047,17234)
my_hero_season_3 = (39204,"My Hero Academia Season 3",5,6,25,'0:23:00',3,8.21,916942,294,30,1452482,14255)
jujutsu_kaisen = (39205,"Jujutsu Kaisen",5,6,24,'0:23:00',4,8.79,441907,26,85,955531,39622)
mob_psycho_100_season_2 = (39206,"Mob Psycho 100 Season 2",5,14,13,'0:23:00',3,8.83,542236,22,99,903843,22297)

anime_info_to_insert = [vinland_saga,demon_slayer,dororo,Attack_on_Titan_season_3,my_hero_season_3,jujutsu_kaisen,mob_psycho_100_season_2]

['anime_id', 'name', 'type', 'source', 'episodes', 'duration', 'rating', 'score', 'scored_by', 'rank', 'popularity', 'members', 'favorites']


In [17]:
conn = establish_connection(db_path)

sqlite3 version: 2.6.0
Connected to Database path


In [20]:
# functions for inserting new data into tables in database

def add_new_anime(conn,data_to_insert):

    sql_insert_statement = f""" INSERT into Anime(anime_id, name, type, source, episodes, duration, rating, score, scored_by, rank, popularity, members, favorites) 
                                            VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?);"""
    cur = conn.cursor()
    cur.execute(sql_insert_statement, data_to_insert)
    conn.commit()
    return cur.lastrowid
    
    
def main():
    # create a database connection
    

    # insert new anime data
    with conn:
        try:
            for x in anime_info_to_insert:
                add_new_anime(conn, x)


            print("Successfully inserted new anime data into table")
        
        except Error as error:
            print(error)
            print('Could not insert new data into table')

    
    
    
if __name__ == '__main__':
    main()
    close_connection(db_path)

Successfully inserted new anime data into table
Connection successfully closed


# Querying data with conditions and save to Pandas Dataframe

In [21]:
conn = sqlite3.connect(db_path)

In [22]:

df_score_greater_then_8 = pd.read_sql_query("""SELECT * 
                                               from Anime 
                                               where score>8 
                                               order by score desc;""", conn)
df_score_greater_then_8

Unnamed: 0,index,anime_id,name,type,source,episodes,duration,rating,score,scored_by,rank,popularity,members,favorites
0,5551.0,38781,Violence Voyager,Movie,Original,1,1:23:00,R - 17+ (violence & profanity),10.00,1,13544,14765,65,0
1,5318.0,37149,Xing You Ji: Fengbao Famila,ONA,Original,1,1:10:00,PG-13 - Teens 13 or older,9.33,3,13633,12651,171,0
2,5539.0,38570,Tatakae! Dokan-kun: Robolympic-hen,TV,Original,13,0:05:00,G - All Ages,9.25,4,13288,15264,31,0
3,1939.0,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
4,2580.0,9253,Steins;Gate,TV,Visual novel,24,0:24:00,PG-13 - Teens 13 or older,9.14,633590,3,7,1139182,104173
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,2616.0,9513,Beelzebub,TV,Manga,60,0:24:00,PG-13 - Teens 13 or older,8.01,148483,554,215,332769,4097
451,3971.0,24921,Aoki Hagane no Arpeggio: Ars Nova Cadenza,Movie,Manga,1,1:45:00,PG-13 - Teens 13 or older,8.01,8034,552,2831,20938,105
452,4075.0,27775,Plastic Memories,TV,Original,13,0:24:00,PG-13 - Teens 13 or older,8.01,197098,557,155,396716,6153
453,4297.0,30415,High☆Speed!: Free! Starting Days,Movie,Light novel,1,1:50:00,PG-13 - Teens 13 or older,8.01,17893,555,1823,44425,202


In [23]:
df_rank_less_then_100 = pd.read_sql_query("Select * from Anime where rank<100 order by rank asc;", conn)


df_rank_less_then_100


Unnamed: 0,index,anime_id,name,type,source,episodes,duration,rating,score,scored_by,rank,popularity,members,favorites
0,1939.0,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
1,4555.0,32281,Kimi no Na wa.,Movie,Original,1,1:46:00,PG-13 - Teens 13 or older,9.14,588675,2,20,900593,43260
2,2580.0,9253,Steins;Gate,TV,Visual novel,24,0:24:00,PG-13 - Teens 13 or older,9.14,633590,3,7,1139182,104173
3,4158.0,28977,Gintama°,TV,Manga,51,0:24:00,R - 17+ (violence & profanity),9.13,82835,4,362,232437,6375
4,2893.0,11061,Hunter x Hunter (2011),TV,Manga,148,0:23:00,PG-13 - Teens 13 or older,9.12,468889,5,30,840943,76048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,117.0,170,Slam Dunk,TV,Manga,101,0:23:00,PG-13 - Teens 13 or older,8.54,60029,92,751,128264,3750
90,2715.0,9989,Ano Hi Mita Hana no Namae wo Bokutachi wa Mada...,TV,Original,11,0:22:00,PG-13 - Teens 13 or older,8.53,439345,93,49,734942,22969
91,10.0,21,One Piece,TV,Manga,909,0:24:00,PG-13 - Teens 13 or older,8.53,465454,94,36,803871,76869
92,1913.0,5028,Major S5,TV,Manga,25,0:24:00,PG-13 - Teens 13 or older,8.52,25258,97,1891,42107,479


In [24]:
fullmetal_alchemist_bh_user_ratings = pd.read_sql_query(""" select * from Rating r
                                                            inner join Anime a on a.anime_id = r.anime_id
                                                            where a.anime_id = 5114
                                                            group by user_id
                                                            order by a.anime_id asc

                                                            ;""",conn)
                  
                  
fullmetal_alchemist_bh_user_ratings                      

Unnamed: 0,index,user_id,anime_id,rating,index.1,anime_id.1,name,type,source,episodes,duration,rating.1,score,scored_by,rank,popularity,members,favorites
0,183,3,5114,10,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
1,1165,10,5114,10,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
2,1250,11,5114,8,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
3,1295,12,5114,9,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
4,1718,17,5114,10,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24569,7811580,73499,5114,10,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
24570,7812050,73500,5114,9,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
24571,7813047,73504,5114,10,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331
24572,7813249,73507,5114,9,1939,5114,Fullmetal Alchemist: Brotherhood,TV,Manga,64,0:24:00,R - 17+ (violence & profanity),9.24,826899,1,4,1355349,120331


In [25]:
most_frequent_raters = pd.read_sql_query(""" select count(r.user_id) total_occurrence,r.user_id, avg(r.rating) avg_rating from Rating r
                                             inner join Anime a on a.anime_id = r.anime_id
                                             group by r.user_id
                                             order by total_occurrence desc

                                             limit 10

                                            ;""",conn)
                  
                            
                  
most_frequent_raters                  
                      

Unnamed: 0,total_occurrence,user_id,avg_rating
0,4349,48766,-1.0
1,2250,42635,6.414667
2,1792,57620,7.918527
3,1594,59643,7.199498
4,1504,58343,0.630984
5,1473,45659,7.177868
6,1417,66021,-0.723359
7,1334,52371,0.637181
8,1330,65840,7.546617
9,1330,7345,6.313534


In [26]:
least_frequent_raters = pd.read_sql_query("""select count(r.user_id) total_occurrence,r.user_id, avg(r.rating) avg_rating from Rating r
                                             inner join Anime a on a.anime_id = r.anime_id
                                             group by r.user_id
                                             order by total_occurrence asc

                                             limit 10

                                            ;""",conn)
                  
                            
                  
least_frequent_raters                  
                      

Unnamed: 0,total_occurrence,user_id,avg_rating
0,1,9,8.0
1,1,59,9.0
2,1,70,10.0
3,1,83,10.0
4,1,113,9.0
5,1,114,8.0
6,1,131,10.0
7,1,195,7.0
8,1,212,9.0
9,1,238,10.0


In [27]:
total_users_rating_per_anime = pd.read_sql_query(""" select count(r.user_id) total_occurrence, a.* 
                                                     from Anime a
                                                     inner join Rating r on a.anime_id = r.anime_id
                                                     group by a.anime_id
                                                     order by total_occurrence desc



                                                    ;""",conn)

                            
                  
                  
total_users_rating_per_anime                      

KeyboardInterrupt: 

In [None]:
scores_classified = pd.read_sql_query("""select *,
                                         case
                                             when score >= 7.5 then 'highly_scored_game'
                                             when 6.0 >= score <= 7.49 then 'moderately_scored_game'
                                             else 'poorly_scored_game'
                                         end as 'anime_scored_classification'    
                                         from Anime
                                     ;""",conn)
scores_classified

In [None]:
scores_ = pd.read_sql_query("""  select r.user_id, a
                                 row_numbers() over(parition by r.anime_id orderby a.score)
                                 from Rating as r
                                 inner join Anime as a on a.anime_id = r.anime_id
                                         
                                     ;""",conn)

In [None]:
close_connection(db_path)