In [12]:
import psycopg2
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
from sqlalchemy import create_engine


# PostgreSQL database connection settings
db_host = 'localhost'
db_port = '5432'
db_name = 'anime-table2'
db_user = 'postgres'
db_password = 'password'

# Create the SQLAlchemy engine
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Query to fetch the anime_data from the database
query = "SELECT * FROM anime_table2"

# Execute the query and fetch the results into a pandas DataFrame
anime_data = pd.read_sql_query(query, con=engine)

# Close the database connection
conn.close()

# Initialize the TfidfVectorizer with various parameters
tfv = TfidfVectorizer(min_df=3, max_features=None, 
                      strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
                      ngram_range=(1, 3),
                      stop_words='english')

# Fill NaN values in the 'Genres' column with an empty string
anime_data.loc[:, 'Genres'] = anime_data['Genres'].fillna('')

# Split the 'Genres' column by comma and convert to string format
genres_str = anime_data['Genres'].str.split(',').astype(str)

# Use the TfidfVectorizer to transform the genres_str into a sparse matrix
tfv_matrix = tfv.fit_transform(genres_str)

# Compute the sigmoid kernel
sig = sigmoid_kernel(tfv_matrix, tfv_matrix)

# Create a Pandas Series object where the index is the anime names and the values are the indices in anime_data
indices = pd.Series(anime_data.index, index=anime_data['anime_title'])

# Remove duplicates in the index (i.e., duplicate anime names)
indices = indices.drop_duplicates()

def give_rec(title, sig=sig):
    # Get the index corresponding to anime title
    idx = indices[title]

    # Get the pairwise similarity scores 
    sig_scores = list(enumerate(sig[idx]))

    # Sort the anime based on similarity scores
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of top 10 most similar anime excluding the input anime
    anime_indices = [i[0] for i in sig_scores[1:11]]

    # Create dataframe of top 10 recommended anime
    top_anime = pd.DataFrame({
        'Anime name': anime_data['anime_title'].iloc[anime_indices].values,
        'Rating': anime_data['Score'].iloc[anime_indices].values
    })

    return top_anime

recommendations = give_rec('Naruto')
print(recommendations)

                                          Anime name Rating
0                                 Naruto: Shippuuden   8.16
1                    Boruto: Jump Festa 2016 Special   6.22
2                                        Naruto x UT    7.4
3             Naruto: Shippuuden - Sunny Side Battle   7.43
4  Boruto: Naruto the Movie - Naruto ga Hokage ni...    7.4
5  Dragon Ball GT: Gokuu Gaiden! Yuuki no Akashi ...   6.54
6               Dragon Ball Z: Atsumare! Gokuu World   6.48
7             Dragon Ball Z: Summer Vacation Special   6.62
8                             Dragon Ball Kai (2014)   7.69
9            Dragon Ball Z Movie 15: Fukkatsu no "F"   7.11


In [None]:
# import numpy as np
# import pandas as pd
# import re

# anime_list = pd.read_csv("myanimelist_data.csv")

# anime_list.info()

In [None]:
# anime_list.head()