In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

%matplotlib inline

In [2]:
pd.set_option('display.max_rows', 500)

### Uploading File

In [4]:
# load all the data on the animes and users
with open('pickles/full_list_final.pickle', 'rb') as infile:
    final=pickle.load(infile)

infile.close()
# load the animes and their types
anime_types = pd.read_csv('csv_files/anime_types.csv', index_col=0)

In [6]:
df = pd.DataFrame.from_dict(final)

# change the order of columns in df
df = df[['title', 'genres', 'user', 'ratings']]

# blank ratings mean user did NOT rate the anime
# replace all blank ratings with 0 as placeholder
df['ratings'] = df['ratings'].replace('', value = 0)

# make ratings floats instead of strings
df['ratings'] = df['ratings'].astype('float64')

###  RUN THIS ONLY ONCE!!

In [11]:
# RUN THIS ONLY ONCE!!

# change ratings scale from 1-5 to 1-10
df.ratings = df.ratings*2

###  Merging Anime Shows with Types

In [12]:
# make updated ratings into ints
df['ratings'] = df['ratings'].astype('int64')

In [13]:
# merge anime types with rest of the anime data
df = df.merge(anime_types, on='title')

In [14]:
# removing anime music videos
df = df[df.type != 'Video']

### Users that rated above 0 

In [15]:
# get only users that rated something (not 0 ratings)
only_rated_users = df[df.ratings > 0]

In [16]:
all_genres = set([l[i].strip().strip('\n') for l in only_rated_users.genres for i in range(len(l))])
all_genres_df = pd.DataFrame(all_genres)
all_genres_df.columns = ['Genre']
all_genres_df = all_genres_df.sort_values(by='Genre').reset_index().drop('index',axis=1)

In [19]:
# get how many times each anime got rated
num_of_ratings = only_rated_users.groupby('title').count()[['ratings']].sort_values(by='ratings')

In [33]:
# get the ratings and the count of how many time the rating appears
ratings_count = only_rated_users['ratings'].value_counts().sort_values()
ratings_count = ratings_count.reset_index()
ratings_count = ratings_count.rename({'index': 'ratings_score'}, axis = 1)

In [165]:
import plotly.express as px

fig = px.histogram(ratings_count.sort_values(by = 'ratings'), x='ratings_score', y='ratings',marginal="box", # or violin, rug
                   hover_data=ratings_count.columns)
fig.update_layout(title_text='Distrubtion of Ratings',
    xaxis = dict(title = 'Rating Score',tickfont_size= 14),
    yaxis=dict(
        title='Ratings Count',
        titlefont_size=16,
        tickfont_size=14,
    ),)
fig.show()

In [45]:
import plotly.express as px

fig = px.bar(ratings_count.sort_values(by = 'ratings'), x='ratings_score', y='ratings')
fig.update_layout(title_text='Total Count of Animes Rating',
    xaxis = dict(title = 'Rating Score',tickfont_size= 14),
    yaxis=dict(
        title='Ratings Count',
        titlefont_size=16,
        tickfont_size=14,
    ),)
fig.show()

### Most watched anime by production type

In [58]:
# get the counts of each type of anime thats in the data
count_types = only_rated_users.groupby('type').count()[['title']].sort_values(by='title')
count_types = count_types.reset_index()

In [62]:
import plotly.graph_objects as go

labels = count_types['type']
values = count_types['title']

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.show()

In [194]:
top_animes = num_of_ratings.reset_index()
top_animes = top_animes.tail(10)

In [196]:
import plotly.express as px

fig = px.bar(top_animes.sort_values(by = 'ratings'), x='title', y='ratings')
fig.update_layout(title_text='Top 10 Most Watched Animes',
    xaxis = dict(title = 'Anime Shows',tickfont_size= 14),
    yaxis=dict(
        title='Number of Users Ratings',
        titlefont_size=16,
        tickfont_size=14,
    ),)
fig.show()

In [95]:
# get only users that rated at least 5 animes
rated_5 = only_rated_users.groupby('user').filter(lambda x: len(x) >= 5)
rated_shows = rated_5[rated_5.type.isin(['TV','Web'])]

In [200]:
# get the average of each anime's ratings
avg_anime_ratings = rated_shows.groupby('title').mean().sort_values(by='ratings')
avg_anime_ratings = avg_anime_ratings.reset_index()
avg_anime_ratings = avg_anime_ratings.tail(10)

In [201]:
avg_anime_ratings.head(1)

Unnamed: 0,title,ratings
2199,Fullmetal Alchemist: Brotherhood,9.03125


In [203]:
import plotly.express as px

fig = px.bar(avg_anime_ratings.sort_values(by = 'ratings'), x='title', y='ratings')
fig.update_layout(title_text='Top 10 Most Highest Rated Anime Shows',
    xaxis = dict(title = 'Anime Shows',tickfont_size= 14),
    yaxis=dict(
        title='Average User Rating',
        titlefont_size=16,
        tickfont_size=14,
    ),)
fig.show()

In [190]:
avg_user_ratings_given = rated_5.groupby('user').agg({'user': 'count', 'ratings': 'mean'})
avg_user_ratings_given.columns=['user_count','avg_rating']
avg_user_ratings_given = avg_user_ratings_given.reset_index()
top_users_activity = avg_user_ratings_given
# to change number of users activity
num_of_users = 50
top_users_activity = top_users_activity.head(num_of_users)

In [193]:
fig = go.Figure(data=go.Scatter(x= top_users_activity['user'],
                                y= top_users_activity['user_count'],
                                mode='markers',
                                marker_color= top_users_activity['user_count'],
                                text= top_users_activity['user'])) # hover text goes here

fig.update_layout(title='Users Activity')
fig.update_layout(title_text=f'Top {num_of_users} Active Users',
    xaxis = dict(title = 'Users',tickfont_size= 14),
    yaxis=dict(
        title='Amount Of Shows Rated',
        titlefont_size=16,
        tickfont_size=14,
    ),)
fig.show()