In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing

In [2]:
df = pd.read_csv("top250_anime.csv")

In [3]:
df.head()

Unnamed: 0,Rank,Title,Popularity,Genre,Studio,Type,Episodes,Duration,Start_date,End_date,Members,Score
0,1,Shingeki no Kyojin: The Final Season - Kankets...,609,"Action, Drama, Suspense",MAPPA,Special,2.0,61,23-Mar,2023,347875.0,9.17
1,2,Fullmetal Alchemist: Brotherhood,3,"Action, Adventure, Drama, Fantasy",Bones,TV,64.0,24,9-Apr,10-Jul,3109951.0,9.11
2,3,Bleach: Sennen Kessen-hen,508,"Action, Adventure, Fantasy",Pierrot,TV,13.0,24,22-Oct,22-Dec,404036.0,9.09
3,4,Steins;Gate,13,"Drama, Sci-Fi, Suspense",White Fox,TV,24.0,24,11-Apr,11-Sep,2393767.0,9.08
4,5,Gintama°,335,"Action, Comedy, Sci-Fi",Bandai Namco Pictures,TV,51.0,24,15-Apr,16-Mar,581994.0,9.07


In [4]:
df.tail()

Unnamed: 0,Rank,Title,Popularity,Genre,Studio,Type,Episodes,Duration,Start_date,End_date,Members,Score
245,246,Blue Lock,439,Sports,8bit,TV,24.0,24,22-Oct,23-Mar,459426.0,8.3
246,247,Detective Conan Movie 06: The Phantom of Baker...,2289,"Adventure, Mystery",TMS Entertainment,Movie,1.0,107,2-Apr,2-Apr,73487.0,8.3
247,248,Diamond no Ace: Second Season,1541,Sports,"Production I.G, Madhouse",TV,51.0,24,15-Apr,16-Mar,130376.0,8.3
248,249,Hibike! Euphonium 2,1017,Drama,Kyoto Animation,TV,13.0,23,16-Oct,16-Dec,213707.0,8.3
249,250,Ashita no Joe,2112,"Drama, Sports",Mushi Production,TV,79.0,25,Apr-70,Sep-71,83604.0,8.29


In [5]:
df.describe()

Unnamed: 0,Rank,Popularity,Episodes,Duration,Members,Score
count,250.0,250.0,249.0,250.0,248.0,250.0
mean,125.5,855.46,17.791165,41.72,664259.2,8.54668
std,72.312977,1009.330929,23.988668,35.677178,684780.9,0.206665
min,1.0,1.0,1.0,3.0,10566.0,8.29
25%,63.25,178.25,1.0,23.0,189997.5,8.38
50%,125.5,490.0,12.0,24.0,415455.0,8.505
75%,187.75,1139.0,24.0,29.0,861674.5,8.66
max,250.0,5794.0,201.0,161.0,3673536.0,9.17


In [6]:
df.dtypes

Rank            int64
Title          object
Popularity      int64
Genre          object
Studio         object
Type           object
Episodes      float64
Duration        int64
Start_date     object
End_date       object
Members       float64
Score         float64
dtype: object

In [7]:
# Check for NAs per column
na_counts = df.isna().sum()

print(na_counts)

Rank          0
Title         0
Popularity    0
Genre         1
Studio        2
Type          0
Episodes      1
Duration      0
Start_date    0
End_date      2
Members       2
Score         0
dtype: int64


In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

def get_anime_recommendations(title, df):
    # Split the genre column and create new binary rows for each genre
    genres = df['Genre'].str.split(',', expand=True)
    genre_columns = genres.apply(lambda x: x.str.strip()).stack().unique()
    genre_df = pd.DataFrame(columns=genre_columns)
    for i, row in genres.iterrows():
        genre_series = pd.Series({x: 1 for x in row.dropna()})
        genre_df = genre_df.append(genre_series, ignore_index=True)

    # Merge the genre dataframe with the original dataframe
    df = pd.concat([df, genre_df], axis=1)

    # Scale the rating and members columns
    scaler = StandardScaler()
    df[['Score', 'Members']] = scaler.fit_transform(df[['Score', 'Members']])
    # Select the columns to use for similarity
    similarity_cols = ['Score', 'Members'] + list(genre_columns)

    # Compute the cosine similarity matrix
    anime_data = df[similarity_cols]
    anime_data.fillna(0, inplace=True)
    anime_sim_matrix = cosine_similarity(anime_data)

    # Get the index of the anime title
    title_index = df[df['Title'] == title].index[0]

    # Get the similarity values for the given anime title
    sim_values = anime_sim_matrix[title_index].argsort()[::-1][1:]

    # Get the top 10 anime titles with the highest similarity values
    top_anime_titles = df.iloc[sim_values]['Title'].tolist()[:10]

    # Create a numbered list of recommendations
    recommendations = ""
    for i, anime_title in enumerate(top_anime_titles):
        recommendations += f"{i+1}. {anime_title}\n"

    return recommendations

In [10]:
# Get anime recommendations for "Shingeki no Kyojin: The Final Season - Kanketsu-hen"
anime_recommendations = get_anime_recommendations("Shingeki no Kyojin: The Final Season - Kanketsu-hen", df)

# Print the recommendations
print(anime_recommendations)

1. Bleach: Sennen Kessen-hen
2. Gintama'
3. Gintama': Enchousen
4. Gintama°
5. Gintama.
6. Gintama: The Final
7. Gintama Movie 2: Kanketsu-hen - Yorozuya yo Eien Nare
8. Gintama.: Shirogane no Tamashii-hen - Kouhan-sen
9. Kizumonogatari III: Reiketsu-hen
10. Gintama

