In [1]:
import requests
from pprint import pprint
import pandas as pd
import time
from fuzzywuzzy import fuzz

In [2]:
#accessing my anime synopses
csv_file_path = "/Users/laranahcivan/Desktop/MLTA/df_top_500_anime.csv"

#reading the CSV file into a data-frame
df_top_500_anime = pd.read_csv(csv_file_path)

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#extracting the cleaned synopses and titles
clean_synopses = df_top_500_anime['clean_synopsis_no_stopwords'].fillna('').tolist()
anime_titles = df_top_500_anime['title'].tolist()
anime_rankings = df_top_500_anime['rank'].tolist()

#initializing the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

#fitting and transforming the data
tfidf_matrix = tfidf_vectorizer.fit_transform(clean_synopses)

#computing the cosine similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

#viewing the results
for i in range(len(cosine_sim_matrix)):
    print(f"Top 5 similar anime for {anime_titles[i]} (Rank: {anime_rankings[i]}):")
    similar_anime_indices = cosine_sim_matrix[i].argsort()[-6:-1][::-1]  #excluding self-similarity
    for j, idx in enumerate(similar_anime_indices):
        if idx != i:  #excluding self-similarity
            print(f"{j+1}. {anime_titles[idx]} (Rank: {anime_rankings[idx]}) (Cosine Similarity: {cosine_sim_matrix[i][idx]})")
            print(f"Cleaned Synopsis:\n{df_top_500_anime['clean_synopsis_no_stopwords'][idx]}\n")
    print()

Top 5 similar anime for Sousou no Frieren (Rank: 1):
1. Mushoku Tensei: Isekai Ittara Honki Dasu Part 2 (Rank: 65) (Cosine Similarity: 0.0742199715880528)
Cleaned Synopsis:
mysterious mana calamity rudeus greyrat fierce student eris boreas greyrat teleported demon continent team newfound companion ruijerd supardiathe former leader superds warrior groupto form dead end successful adventurer party making name trio journeys across continent make way back home fittoa following advice received faceless god hitogami rudeus saves kishirika kishirisu great emperor demon world rewards granting strange power rudeus masters powerful ability offers number new opportunities might prove bargained unexpected dangers threaten hinder travels

2. Fumetsu no Anata e (Rank: 213) (Cosine Similarity: 0.05698909979815288)
Cleaned Synopsis:
orb known cast earth observed afar capable changing forms beings whose reflections captures first becomes rock due rising temperature moss move one snowy day wolf deaths d

In [4]:
#for viewing the cosine similarities without the synopses
for i in range(len(cosine_sim_matrix)):
    print(f"Top 5 similar anime for {anime_titles[i]} (Rank: {anime_rankings[i]}):")
    similar_anime_indices = cosine_sim_matrix[i].argsort()[-6:-1][::-1]  # Excluding self-similarity
    for j, idx in enumerate(similar_anime_indices):
        if idx != i:  # Exclude self-similarity
            print(f"{j+1}. {anime_titles[idx]} (Rank: {anime_rankings[idx]}) (Cosine Similarity: {cosine_sim_matrix[i][idx]})")
    print()

Top 5 similar anime for Sousou no Frieren (Rank: 1):
1. Mushoku Tensei: Isekai Ittara Honki Dasu Part 2 (Rank: 65) (Cosine Similarity: 0.0742199715880528)
2. Fumetsu no Anata e (Rank: 213) (Cosine Similarity: 0.05698909979815288)
3. Kono Subarashii Sekai ni Shukufuku wo! 2 (Rank: 301) (Cosine Similarity: 0.05191590926510035)
4. Horimiya: Piece (Rank: 389) (Cosine Similarity: 0.05137409056191119)
5. Kimetsu no Yaiba: Katanakaji no Sato-hen (Rank: 314) (Cosine Similarity: 0.0499554789784751)

Top 5 similar anime for Fullmetal Alchemist: Brotherhood (Rank: 2):
1. Fullmetal Alchemist (Rank: 482) (Cosine Similarity: 0.41396004945687254)
2. Hajime no Ippo (Rank: 41) (Cosine Similarity: 0.05262067498012615)
3. Houseki no Kuni (Rank: 187) (Cosine Similarity: 0.051988913636725004)
4. Shinseiki Evangelion (Rank: 216) (Cosine Similarity: 0.05077307425084246)
5. ReLIFE: Kanketsu-hen (Rank: 369) (Cosine Similarity: 0.05032834696892661)

Top 5 similar anime for Steins;Gate (Rank: 3):
1. Steins;Gate 

In [5]:
#I want to view the cosine similarities that are greater than 0.5 (and also exclude self-similarity)
similarities_gt_0_5 = []

for i in range(len(cosine_sim_matrix)):
    for j in range(len(cosine_sim_matrix)):
        if i != j and cosine_sim_matrix[i][j] > 0.5:
            similarities_gt_0_5.append((anime_titles[i], anime_titles[j], cosine_sim_matrix[i][j]))

print("Cosine Similarities Greater Than 0.5 (Excluding Self-Similarity):")
for anime1, anime2, similarity in similarities_gt_0_5:
    print(f"{anime1} - {anime2}: {similarity}")

Cosine Similarities Greater Than 0.5 (Excluding Self-Similarity):
Kingdom 5th Season - Date A Live V: 0.5304717865971204
Natsume Yuujinchou Roku - Natsume Yuujinchou Go: 0.5065116396976019
Natsume Yuujinchou Roku - Natsume Yuujinchou San: 0.5417590559233859
Natsume Yuujinchou Roku - Zoku Natsume Yuujinchou: 0.596385307341428
Natsume Yuujinchou Roku - Natsume Yuujinchou: 0.5093370633352499
Natsume Yuujinchou Go - Natsume Yuujinchou Roku: 0.5065116396976019
Natsume Yuujinchou San - Natsume Yuujinchou Roku: 0.5417590559233859
Natsume Yuujinchou San - Zoku Natsume Yuujinchou: 0.6230815101034769
Zoku Natsume Yuujinchou - Natsume Yuujinchou Roku: 0.596385307341428
Zoku Natsume Yuujinchou - Natsume Yuujinchou San: 0.6230815101034769
Zoku Natsume Yuujinchou - Natsume Yuujinchou: 0.5520967299314335
Tunshi Xingkong 3rd Season - Tunshi Xingkong 4th Season: 0.8030835280562735
Natsume Yuujinchou - Natsume Yuujinchou Roku: 0.5093370633352499
Natsume Yuujinchou - Zoku Natsume Yuujinchou: 0.5520967299

In [6]:
print("TF-IDF Matrix Shape:", tfidf_matrix.shape)

TF-IDF Matrix Shape: (492, 10664)
