In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
from wordcloud import WordCloud
from langdetect import detect
from datetime import datetime

# Reading our Dataset

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
df_anime=pd.read_csv('AniMate_Model\anime_data.csv')
print("Shape of the Dataset:",df_anime.shape)
df_anime.head(3)

Shape of the Dataset: (24905, 24)


Unnamed: 0,anime_id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,Aired,Premiered,Status,Producers,Licensors,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL
0,1,Cowboy Bebop,Cowboy Bebop,カウボーイビバップ,8.75,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,26.0,"Apr 3, 1998 to Apr 24, 1999",spring 1998,Finished Airing,Bandai Visual,"Funimation, Bandai Entertainment",Sunrise,Original,24 min per ep,R - 17+ (violence & profanity),41.0,43,78525,914193.0,1771505,https://cdn.myanimelist.net/images/anime/4/196...
1,5,Cowboy Bebop: Tengoku no Tobira,Cowboy Bebop: The Movie,カウボーイビバップ 天国の扉,8.38,"Action, Sci-Fi","Another day, another bounty—such is the life o...",Movie,1.0,"Sep 1, 2001",UNKNOWN,Finished Airing,"Sunrise, Bandai Visual",Sony Pictures Entertainment,Bones,Original,1 hr 55 min,R - 17+ (violence & profanity),189.0,602,1448,206248.0,360978,https://cdn.myanimelist.net/images/anime/1439/...
2,6,Trigun,Trigun,トライガン,8.22,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,26.0,"Apr 1, 1998 to Sep 30, 1998",spring 1998,Finished Airing,Victor Entertainment,"Funimation, Geneon Entertainment USA",Madhouse,Manga,24 min per ep,PG-13 - Teens 13 or older,328.0,246,15035,356739.0,727252,https://cdn.myanimelist.net/images/anime/7/203...


In [None]:
df_user=pd.read_csv('AniMate_Model\user_data.csv')
print("Shape of the Dataset:",df_user.shape)
df_user.head()

Shape of the Dataset: (731290, 16)


Unnamed: 0,Mal ID,Username,Gender,Birthday,Location,Joined,Days Watched,Mean Score,Watching,Completed,On Hold,Dropped,Plan to Watch,Total Entries,Rewatched,Episodes Watched
0,1,Xinil,Male,1985-03-04T00:00:00+00:00,California,2004-11-05T00:00:00+00:00,142.3,7.37,1.0,233.0,8.0,93.0,64.0,399.0,60.0,8458.0
1,3,Aokaado,Male,,"Oslo, Norway",2004-11-11T00:00:00+00:00,68.6,7.34,23.0,137.0,99.0,44.0,40.0,343.0,15.0,4072.0
2,4,Crystal,Female,,"Melbourne, Australia",2004-11-13T00:00:00+00:00,212.8,6.68,16.0,636.0,303.0,0.0,45.0,1000.0,10.0,12781.0
3,9,Arcane,,,,2004-12-05T00:00:00+00:00,30.0,7.71,5.0,54.0,4.0,3.0,0.0,66.0,0.0,1817.0
4,18,Mad,,,,2005-01-03T00:00:00+00:00,52.0,6.27,1.0,114.0,10.0,5.0,23.0,153.0,42.0,3038.0


In [None]:
df_score=pd.read_csv('AniMate_Model\rating_data.csv')
print("Shape of the dataset:",df_score.shape)
df_score.head()

Shape of the dataset: (24325191, 5)


Unnamed: 0,user_id,Username,anime_id,Anime Title,rating
0,1,Xinil,21,One Piece,9
1,1,Xinil,48,.hack//Sign,7
2,1,Xinil,320,A Kite,5
3,1,Xinil,49,Aa! Megami-sama!,8
4,1,Xinil,304,Aa! Megami-sama! Movie,8


# Explorartory Data Analysis

In [None]:
df_anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24905 entries, 0 to 24904
Data columns (total 24 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   anime_id      24905 non-null  int64 
 1   Name          24905 non-null  object
 2   English name  24905 non-null  object
 3   Other name    24905 non-null  object
 4   Score         24905 non-null  object
 5   Genres        24905 non-null  object
 6   Synopsis      24905 non-null  object
 7   Type          24905 non-null  object
 8   Episodes      24905 non-null  object
 9   Aired         24905 non-null  object
 10  Premiered     24905 non-null  object
 11  Status        24905 non-null  object
 12  Producers     24905 non-null  object
 13  Licensors     24905 non-null  object
 14  Studios       24905 non-null  object
 15  Source        24905 non-null  object
 16  Duration      24905 non-null  object
 17  Rating        24905 non-null  object
 18  Rank          24905 non-null  object
 19  Popu

In [None]:
df_anime['Score'].value_counts()

Unnamed: 0_level_0,count
Score,Unnamed: 1_level_1
UNKNOWN,9213
6.31,80
6.54,80
6.25,79
6.51,79
...,...
3.21,1
3.29,1
1.85,1
3.69,1


In [None]:
scores = df_anime['Score'][df_anime['Score'] != 'UNKNOWN']
scores = scores.astype('float')
score_mean= round(scores.mean() , 2)

In [None]:
df_anime['Score'] = df_anime['Score'].replace('UNKNOWN', score_mean)
df_anime['Score'] = df_anime['Score'].astype('float64')

In [None]:
df_anime['Rank'].value_counts()

Unnamed: 0_level_0,count
Rank,Unnamed: 1_level_1
UNKNOWN,4612
0.0,187
6542.0,4
16675.0,4
6577.0,4
...,...
18424.0,1
18423.0,1
11642.0,1
8977.0,1


In [None]:
df_anime['Rank'] = df_anime['Rank'].replace('UNKNOWN', np.nan)
df_anime['Rank'] = df_anime['Rank'].astype('float64')

In [None]:
df_user.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731290 entries, 0 to 731289
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Mal ID            731290 non-null  int64  
 1   Username          731289 non-null  object 
 2   Gender            224383 non-null  object 
 3   Birthday          168068 non-null  object 
 4   Location          152805 non-null  object 
 5   Joined            731290 non-null  object 
 6   Days Watched      731282 non-null  float64
 7   Mean Score        731282 non-null  float64
 8   Watching          731282 non-null  float64
 9   Completed         731282 non-null  float64
 10  On Hold           731282 non-null  float64
 11  Dropped           731282 non-null  float64
 12  Plan to Watch     731282 non-null  float64
 13  Total Entries     731282 non-null  float64
 14  Rewatched         731282 non-null  float64
 15  Episodes Watched  731282 non-null  float64
dtypes: float64(10), int6

In [None]:
df_score.isnull().sum()

Unnamed: 0,0
user_id,0
Username,232
anime_id,0
Anime Title,0
rating,0


## Data Visualization

### For Anime Dataset

In [None]:
type_counts = df_anime['Type'].value_counts()
fig = px.bar(type_counts, x=type_counts.index, y=type_counts.values, color=type_counts.index, labels={'x':'Anime Type', 'y':'Count'},
             title='Count of Anime Titles by Type')

fig.show(renderer="colab")

In [None]:
df_valid_popularity = df_anime[df_anime['Popularity'] > 0]
top_10_popular = df_valid_popularity.sort_values(by='Popularity', ascending=True).head(15)
fig = px.bar(top_10_popular, x='Name', y='Popularity',
             labels={'Name': 'Anime Title', 'Popularity': 'Popularity'},
             title='Top 15 Most Popular Animes',
             color='Name')
fig.show(renderer="colab")

In [None]:
fig = px.scatter(df_anime, x='Score', y='Members',
                 labels={'Score':'Overall Score', 'Members':'Number of Scores'},
                 title='Anime Score vs. Number of Scores')

fig.show(renderer="colab")

In [None]:
top_15_scored = df_anime.sort_values(by='Members', ascending=False).head(15)
fig = px.bar(top_15_scored, x='Name', y='Members', labels={'Members':'Number of Users', 'Name':'Anime Title'},color='Name',
             title='Top 15 Animes by Number of Watchers')
fig.show(renderer="colab")

In [None]:
genre_counts = df_anime[df_anime['Genres'] != "UNKNOWN"]['Genres'].apply(lambda x: x.split(', ')).explode().value_counts()
fig = px.bar(genre_counts, x=genre_counts.index, y=genre_counts.values,
             labels={'x':'Genre', 'y':'Count'},
             title='Count of Anime Titles by Genre',
             color=genre_counts.index)
fig.show(renderer="colab")

In [None]:
top_20_genres = genre_counts.head(20)
fig = px.bar(top_20_genres, x=top_20_genres.index, y=top_20_genres.values,
             labels={'x':'Genre', 'y':'Count'},
             title='Top 20 Most Popular Genres In The Anime Industry')
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.8)
fig.update_layout(xaxis_tickangle=-45, xaxis=dict(tickfont=dict(size=12)),
                  yaxis=dict(titlefont=dict(size=14)))
fig.show(renderer="colab")

In [None]:
import plotly.graph_objects as go
fig = go.Figure(data=[go.Pie(labels=top_20_genres.index, values=top_20_genres.values,
                             hole=0.6, hoverinfo='label+percent', textinfo='value')])
fig.update_layout(title='Distribution of Anime Genres',
                  legend=dict(font=dict(size=12), title='Genre'),
                  annotations=[dict(text='Genre', x=0.5, y=0.5, font_size=20, showarrow=False)])
fig.show(renderer="colab")

In [None]:
genre_text = ' '.join(df_anime[df_anime['Genres'] != "UNKNOWN"]['Genres'].dropna())
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(genre_text)
wordcloud_image = wordcloud.to_image()
fig = go.Figure(go.Image(z=wordcloud_image))
fig.update_layout(title='Word Embedding Plot - Genre')
fig.show(renderer="colab")

In [None]:
fig = px.violin(df_anime, x='Type', y='Popularity',
                labels={'Type':'Anime Type', 'Popularity':'Popularity'},
                title='Distribution of Anime Popularity by Type',
                color='Type')

fig.show(renderer="colab")

In [None]:
fig = px.box(df_anime, x='Type', y='Score',
             labels={'Type':'Anime Type', 'Score':'Score'},
             title='Distribution of Anime Scores by Type',
             color='Type')

fig.show(renderer="colab")

In [None]:
fig = px.scatter(df_anime, x='Popularity', y='Members', size='Score', color='Type',
                 labels={'Popularity':'Popularity', 'Members':'Number of Scores'},
                 title='Relationship between Popularity, Number of Scores, and Score')

fig.show(renderer="colab")

In [None]:
correlation_matrix = df_anime[['Score', 'Popularity', 'Rank']].corr()
fig = ff.create_annotated_heatmap(z=correlation_matrix.values,
                                  x=list(correlation_matrix.columns),
                                  y=list(correlation_matrix.index),
                                  colorscale='Viridis')
fig.update_layout(title='Correlation Matrix')
fig.show(renderer="colab")

In [None]:
df_anime['Licensors'].value_counts()

Unnamed: 0_level_0,count
Licensors,Unnamed: 1_level_1
UNKNOWN,20170
Funimation,957
Sentai Filmworks,818
Discotek Media,275
Aniplex of America,222
...,...
"Bandai Entertainment, Maiden Japan",1
"ADV Films, SoftCel Pictures",1
"VIZ Media, Media Blasters, Sentai Filmworks, Geneon Entertainment USA",1
"Bandai Entertainment, Discotek Media, NYAV Post, Bandai Visual USA",1


In [None]:
licensors_list = [licensor.strip() for licensors in df_anime[df_anime['Licensors']!="UNKNOWN"]['Licensors'].str.split(',') for licensor in licensors]
licensor_counts = pd.Series(licensors_list).value_counts()
filtered_licensor_counts = licensor_counts[licensor_counts.index != 'Unknown']
top_15_licensors = filtered_licensor_counts.head(10)
fig = px.bar(top_15_licensors, x=top_15_licensors.index, y=top_15_licensors.values, color=top_15_licensors.index)
fig.update_layout(
    title='Top 10 Anime Licensors',
    xaxis_title='Licensors',
    yaxis_title='Count',
    xaxis_tickangle=-45
)
fig.show(renderer="colab")

In [None]:
df_anime['Premiered'].value_counts()

Unnamed: 0_level_0,count
Premiered,Unnamed: 1_level_1
UNKNOWN,19399
spring 2017,88
fall 2016,83
spring 2018,81
spring 2016,78
...,...
summer 1993,1
summer 1974,1
summer 1991,1
spring 1961,1


In [None]:
def extract_season_year(premiered):
    if premiered == 'UNKNOWN':
        return None, None
    else:
        season, year = premiered.split()
        return season, int(year)
season_year = df_anime['Premiered'].map(extract_season_year)
premiered_season = season_year.apply(lambda x: x[0])
premiered_Year = season_year.apply(lambda x: x[1])

In [None]:
filtered_premiered_season = premiered_season.dropna()
season_counts = filtered_premiered_season.value_counts()
fig = go.Figure(data=go.Pie(
    labels=season_counts.index,
    values=season_counts.values,
    hole=0.4,
    hoverinfo='label+percent',
    textinfo='value',
    textfont=dict(size=14),
    marker=dict(
        colors=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'],
        line=dict(color='#ffffff', width=2)
    )
))
fig.update_layout(
    title='Distribution of Premiered Seasons',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555')
)
fig.show(renderer="colab")

In [None]:
filtered_premiered_year = premiered_Year.dropna()
year_counts = filtered_premiered_year.value_counts()
sorted_years = sorted(year_counts.index)
fig = go.Figure(data=go.Bar(
    x=sorted_years,
    y=year_counts[sorted_years],
    marker=dict(color='#1f77b4'),
))
fig.update_layout(
    title='Number of Animes Premiered by Year',
    xaxis_title='Year',
    yaxis_title='Number of Animes',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555')
)
fig.show(renderer="colab")

In [None]:
studio_counts = df_anime['Studios'].value_counts()
studio_counts = studio_counts[studio_counts.index != 'UNKNOWN']
top_studios = studio_counts.head(10)
fig = go.Figure(data=go.Bar(
    x=top_studios.index,
    y=top_studios.values,
    marker=dict(color=top_studios.values, colorscale='Blues'),
    text=top_studios.values,
    hovertemplate='Studio: %{x}<br>Number of Animes: %{y}<extra></extra>',
))
fig.update_layout(
    title='Number of Animes by Studio (Top 10)',
    xaxis_title='Studios',
    yaxis_title='Number of Animes',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555'),
    plot_bgcolor='rgba(0, 0, 0, 0)'
)
fig.show(renderer="colab")

In [None]:
source_counts = df_anime['Source'].value_counts()
source_counts = source_counts[source_counts.index != 'UNKNOWN']
fig = go.Figure(data=go.Bar(
    x=source_counts.values,
    y=source_counts.index,
    orientation='h',
    marker=dict(color=source_counts.values, colorscale='Viridis'),
    text=source_counts.values,
    hovertemplate='Source: %{y}<br>Number of Animes: %{x}<extra></extra>',
))
fig.update_layout(
    title='Number of Animes by Source',
    xaxis_title='Number of Animes',
    yaxis_title='Source',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555')
)
fig.show(renderer="colab")

In [None]:
sorted_df = df_anime.sort_values('Favorites', ascending=False)
top_favorites = sorted_df.head(10)
fig = go.Figure(data=go.Bar(
    x=top_favorites['Favorites'],
    y=top_favorites['Name'],
    orientation='h',
    marker=dict(color='#1f77b4'),
    text=top_favorites['Favorites'],
    hovertemplate='Anime: %{y}<br>Favorites: %{x}<extra></extra>',
))
fig.update_layout(
    title='Top 10 Most Favorited Anime',
    xaxis_title='Number of Favorites',
    yaxis_title='Anime',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555')
)
fig.show(renderer="colab")

In [None]:
fig = go.Figure(go.Treemap(
    labels=top_favorites['Name'],
    parents=[""] * len(top_favorites),
    values=top_favorites['Favorites'],
    hovertemplate='Name: %{label}<br>Favorites: %{value}',
))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
fig.update_traces(marker=dict(colors=colors))
fig.update_layout(
    title='Top 10 Most Favorited Anime (Treemap)',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555'),
)
fig.show(renderer="colab")

In [None]:
rating_counts = df_anime[df_anime['Rating']!="UNKNOWN"]['Rating'].value_counts()
rating_counts = rating_counts[rating_counts.index != 'Unknown']
fig = go.Figure(data=go.Pie(
    labels=rating_counts.index,
    values=rating_counts.values,
    hoverinfo='label+percent',
    textinfo='value',
    textfont=dict(size=12),
    marker=dict(colors=['#1f77b4']),
    hole=0.6,
))
fig.update_layout(
    title='Distribution of Anime Ratings',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555'),
)
fig.show(renderer="colab")

In [None]:
def map_language_code(code):
    language_mapping = {
        'ja': 'Japanese',
        'ko': 'Korean',
        'zh-cn': 'Simplified Chinese',
        'de': 'German',
        'vi': 'Vietnamese',
        'en': 'English',
        'zh-tw': 'Traditional Chinese'
    }
    return language_mapping.get(code, 'Other')
def detect_language(name):
    try:
        return detect(name)
    except:
        return None

In [None]:
Detected_Language = df_anime[df_anime['Other name']!="UNKNOWN"]['Other name'].apply(detect_language)
Detected_Language = Detected_Language.dropna()
language_counts = Detected_Language.value_counts()
language_counts.index = language_counts.index.map(map_language_code)

fig = go.Figure(data=go.Bar(
    x=language_counts.values,
    y=language_counts.index,
    orientation='h',
    marker=dict(color=language_counts.values, colorscale='Viridis'),
    text=language_counts.values,
    hovertemplate='Native Language: %{y}<br>Number of Animes: %{x}<extra></extra>',
))
fig.update_layout(
    title='Count of Animes based on its Native Name',
    xaxis_title='Number of Animes',
    yaxis_title='Native Language',
    title_font=dict(size=20),
    font=dict(size=12, color='#555555')
)
fig.show(renderer="colab")

### For User Dataset

In [None]:
gender_counts = df_user['Gender'].value_counts(dropna=True)
colors = ['rgb(0, 123, 255)', 'rgb(255, 65, 54)', 'rgb(255, 187, 0)', 'rgb(125, 125, 125)']
fig = go.Figure()
fig.add_trace(go.Pie(
    labels=gender_counts.index,
    values=gender_counts.values,
    hole=0.3,
    marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)),
    hoverinfo='label+percent',
    hovertemplate='<b>%{label}</b><br>%{percent}',
    textinfo='value',
    textposition='inside',
    sort=False
))
fig.update_layout(
    title='Gender Distribution',
    title_x=0.5,
    uniformtext_minsize=12,
    uniformtext_mode='hide',
    showlegend=False,
    paper_bgcolor='rgba(255,255,255,255)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=20, r=20, t=100, b=20),
)
fig.show(renderer="colab")

In [None]:
df_user['Birthday'].value_counts(dropna=True)

Unnamed: 0_level_0,count
Birthday,Unnamed: 1_level_1
1990-01-01T00:00:00+00:00,177
1989-03-26T00:00:00+00:00,169
1980-01-01T00:00:00+00:00,166
1930-01-01T00:00:00+00:00,153
1991-01-01T00:00:00+00:00,115
...,...
1966-12-06T00:00:00+00:00,1
2001-11-08T00:00:00+00:00,1
1954-10-16T00:00:00+00:00,1
1958-03-13T00:00:00+00:00,1


In [None]:
def calculate_age(birth_date):
    if birth_date != 'NaN':
        try:
            birth_year = int(birth_date.split('-')[0])
            today_year = datetime.utcnow().year
            age = today_year - birth_year
            if age >= 10 and age < 60:
                return age
            else:
                return None
        except:
            return None
    return None
Age = df_user['Birthday'].dropna().apply(calculate_age)
fig = px.histogram(Age, nbins=20, title='Age Distribution', labels={'value': 'Age', 'count': 'Count'})
fig.update_layout(
    xaxis=dict(title='Age'),
    yaxis=dict(title='Count'),
    bargap=0.1,
    showlegend=False,
    paper_bgcolor='rgba(255,255,255,255)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=50, r=20, t=100, b=50),
)
fig.show(renderer="colab")

In [None]:
location_counts = df_user['Location'].value_counts()
fig = px.bar(location_counts.head(20),
             x=location_counts.head(20).index,
             y=location_counts.head(20).values,
             labels={'x': 'Location', 'y': 'Count'},
             title='Top 20 Anime Watchers Locations',
             color=location_counts.head(20).index)
fig.update_layout(
    xaxis=dict(title='Location'),
    yaxis=dict(title='Count'),
    bargap=0.1,
    showlegend=False,
    paper_bgcolor='rgba(255,255,255,255)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=50, r=20, t=100, b=50),
)
fig.show(renderer="colab")

In [None]:
metrics = ['Days Watched']
top_users = pd.DataFrame()
for metric in metrics:
    top_users = pd.concat([top_users, df_user.nlargest(15, metric)], ignore_index=True)
fig = px.bar(top_users, x='Username', y=metrics, barmode='group',
             title='Top 15 Anime Watchers (total days anime watched)',
             labels={'value': 'Count', 'variable': 'Metric'},
             color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(
    xaxis=dict(title='Users'),
    yaxis=dict(title='Count'),
    legend_title_text='Metric',
    paper_bgcolor='rgba(255,255,255,255)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=50, r=20, t=100, b=50),
)
fig.show(renderer="colab")

In [None]:
def get_watching_behavior(username):
    user_data = df_user[df_user['Username'] == username]
    if len(user_data) == 0:
        return None
    watching = user_data['Watching'].values[0]
    on_hold = user_data['On Hold'].values[0]
    completed = user_data['Completed'].values[0]
    dropped = user_data['Dropped'].values[0]
    plan_to_watch = user_data['Plan to Watch'].values[0]
    return watching, on_hold, completed, dropped, plan_to_watch
username_input = "camilOZ"
watching, on_hold, completed, dropped, plan_to_watch = get_watching_behavior(username_input)
fig = go.Figure(data=[go.Pie(labels=['Watching', 'On Hold', 'Completed', 'Dropped', 'Plan to Watch'],
                             values=[watching, on_hold, completed, dropped, plan_to_watch],
                             hole=0.3,
                             hoverinfo='label+percent',
                             textinfo='value',
                             textfont_size=15)])

fig.update_layout(title=f"Watching Behavior of {username_input}",
                  showlegend=True,
                  paper_bgcolor='rgba(255,255,255,255)',
                  plot_bgcolor='rgba(0,0,0,0)')

fig.show(renderer="colab")

In [None]:
correlation_matrix = df_user[['Days Watched', 'Mean Score', 'Total Entries', 'Rewatched', 'Episodes Watched']].corr()
fig = ff.create_annotated_heatmap(z=correlation_matrix.values,
                                  x=list(correlation_matrix.columns),
                                  y=list(correlation_matrix.index),
                                  colorscale='Viridis')
fig.update_layout(title='Correlation Matrix')
fig.show(renderer="colab")

### For User Score Dataset

In [None]:
anime_watch_count = df_score.groupby('Anime Title')['user_id'].nunique().reset_index()
anime_watch_count = anime_watch_count.rename(columns={'user_id': 'User Count'})
anime_watch_count = anime_watch_count.sort_values(by='User Count', ascending=False)
top_n = 10
top_anime_watch_count = anime_watch_count.head(top_n)
color_palette = px.colors.qualitative.Plotly
fig = px.bar(top_anime_watch_count, x='User Count', y='Anime Title', orientation='h',
             title=f'Top {top_n} Most Watched Anime Titles',
             labels={'Watcher Count': 'Number of Watchers', 'Anime Title': 'Anime Title'},
             color='User Count',
             color_discrete_sequence=color_palette)
fig.update_layout(showlegend=False, paper_bgcolor='rgba(255,255,255,255)', plot_bgcolor='rgba(0,0,0,0)',
                  margin=dict(l=50, r=20, t=100, b=50))
fig.show(renderer="colab")