In [1]:
from wordcloud import WordCloud
import os

import pandas as pd
import numpy as np
import re

#ensure all Plotly plots render while offline
import matplotlib.pyplot as plt
import seaborn as sb

from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objects as go
import plotly.express as px

init_notebook_mode(connected=True)

%matplotlib inline

In [17]:
#custom function to read data into Pandas DataFrame
def open_set(csv, sep=',', encoding='utf-8', usecols=None):
    df = pd.read_csv('data/'+csv, low_memory=False, encoding=encoding)
    
    return df

In [21]:
df_animes = open_set('animes_cleaned.csv')
df_reviews = open_set('reviews_cleaned.csv', encoding='latin', usecols=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14])
df_users = open_set('users_cleaned.csv')


In [49]:
def describe_pretty(x):
    if type(x) == float:
        x = f'{x:.1f}'
    elif type(x) == str:
        x = x
    
    return x

In [50]:
for frame in [df_animes, df_users, df_reviews.iloc[:,0:11]]:
    display(frame.head(), frame.info(), frame.describe().applymap(lambda x: describe_pretty(x)))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7158 entries, 0 to 7157
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   anime_id         7158 non-null   int64  
 1   title            7158 non-null   object 
 2   genre            7158 non-null   object 
 3   studio           7158 non-null   object 
 4   premiered        7158 non-null   object 
 5   type             7158 non-null   object 
 6   source           7158 non-null   object 
 7   episodes         7158 non-null   int64  
 8   status           7158 non-null   object 
 9   rank             7158 non-null   int64  
 10  image_url        7157 non-null   object 
 11  rating           7158 non-null   object 
 12  score            7158 non-null   float64
 13  scored_by        7158 non-null   int64  
 14  score_rank       7158 non-null   int64  
 15  popularity       7158 non-null   int64  
 16  popularity_rank  7158 non-null   int64  
 17  members       

Unnamed: 0,anime_id,title,genre,studio,premiered,type,source,episodes,status,rank,image_url,rating,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
0,5114,Fullmetal Alchemist: Brotherhood,Action,Bones,Spring 2009,TV,Manga,64,Finished Airing,1,https://myanimelist.cdn-dena.com/images/anime/...,R - 17+ (violence & profanity),9.25,733592,2,4,4,1199091,106895,3.0
1,9253,Steins;Gate,Thriller,White Fox,Spring 2011,TV,Visual novel,24,Finished Airing,2,https://myanimelist.cdn-dena.com/images/anime/...,PG-13 - Teens 13 or older,9.14,563857,5,8,8,1010330,92423,6.5
2,1575,Code Geass: Hangyaku no Lelouch,Action,Sunrise,Spring 2006,TV,Original,25,Finished Airing,3,https://myanimelist.cdn-dena.com/images/anime/...,R - 17+ (violence & profanity),8.79,627740,22,9,9,986897,63614,15.5
3,30276,One Punch Man,Action,Madhouse,Spring 2015,TV,Web manga,12,Finished Airing,4,https://myanimelist.cdn-dena.com/images/anime/...,R - 17+ (violence & profanity),8.73,691845,27,5,5,1020754,30747,16.0
4,1535,Death Note,Mystery,Madhouse,Spring 2006,TV,Manga,37,Finished Airing,4,https://myanimelist.cdn-dena.com/images/anime/...,R - 17+ (violence & profanity),8.67,1009477,31,1,1,1456378,88696,16.0


None

Unnamed: 0,anime_id,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
count,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0,7158.0
mean,15655.7,12.2,3579.0,6.7,22267.6,218.3,4649.9,3057.5,44227.3,613.5,1637.9
std,12982.9,31.0,2066.5,1.2,58877.2,94.2,3487.5,1765.0,101546.2,3682.4,914.3
min,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,16.0,0.0,3.0
25%,3078.5,1.0,1790.0,6.3,555.0,154.0,1830.2,1550.2,1998.0,2.0,863.5
50%,11506.0,4.0,3579.5,6.9,3328.0,210.0,3845.5,3031.5,9020.5,16.0,1621.8
75%,30190.0,13.0,5368.0,7.4,17143.5,268.0,6858.8,4553.8,38315.0,115.0,2409.0
max,37916.0,1818.0,7158.0,9.5,1009477.0,510.0,14456.0,6231.0,1456378.0,106895.0,3370.5


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137901 entries, 0 to 137900
Data columns (total 13 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   gender                  137901 non-null  object 
 1   age                     137901 non-null  int64  
 2   age_group               137901 non-null  object 
 3   stats_mean_score        137901 non-null  float64
 4   episodes_watched        137901 non-null  int64  
 5   days_spent_watching     137901 non-null  float64
 6   watching                137901 non-null  int64  
 7   completed               137901 non-null  int64  
 8   dropped                 137901 non-null  int64  
 9   on_hold                 137901 non-null  int64  
 10  plan_to_watch           137901 non-null  int64  
 11  rewatched               137901 non-null  int64  
 12  total_anime_engagement  137901 non-null  int64  
dtypes: float64(2), int64(9), object(2)
memory usage: 13.7+ MB


Unnamed: 0,gender,age,age_group,stats_mean_score,episodes_watched,days_spent_watching,watching,completed,dropped,on_hold,plan_to_watch,rewatched,total_anime_engagement
0,Female,32,Seniors,7.43,3391,55.31,3,49,0,1,0,0,53
1,Female,23,Youth,9.13,2978,49.44,13,21,0,7,12,15,53
2,Male,28,Young Adults,8.49,2774,47.61,0,43,1,1,8,33,53
3,Male,30,Young Adults,7.1,1554,25.58,5,21,7,1,19,4,53
4,Male,27,Young Adults,7.59,923,15.36,0,25,0,5,23,0,53


None

Unnamed: 0,age,stats_mean_score,episodes_watched,days_spent_watching,watching,completed,dropped,on_hold,plan_to_watch,rewatched,total_anime_engagement
count,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0,137901.0
mean,28.6,7.9,4414.5,68.0,14.1,177.0,11.0,10.4,63.5,12.5,275.9
std,4.9,0.9,46922.4,185.7,23.4,168.2,22.2,19.7,94.7,38.9,234.8
min,6.0,1.1,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0
25%,25.0,7.4,1540.0,25.7,3.0,56.0,0.0,1.0,7.0,0.0,97.0
50%,28.0,7.9,3082.0,51.1,8.0,124.0,4.0,4.0,29.0,0.0,208.0
75%,31.0,8.4,5459.0,90.0,16.0,243.0,12.0,12.0,78.0,9.0,389.0
max,75.0,10.0,16791524.0,41753.9,1038.0,1250.0,793.0,678.0,1090.0,2076.0,1305.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126100 entries, 0 to 126099
Data columns (total 11 columns):
 #   Column                 Non-Null Count   Dtype 
---  ------                 --------------   ----- 
 0   title                  126100 non-null  object
 1   text                   126100 non-null  object
 2   Overall                126044 non-null  object
 3   Story                  126044 non-null  object
 4   Animation              126044 non-null  object
 5   Music                  126044 non-null  object
 6   Character Development  126044 non-null  object
 7   Enjoyment              126044 non-null  object
 8   polarity               126044 non-null  object
 9   subjectivity           126040 non-null  object
 10  attitude               126038 non-null  object
dtypes: object(11)
memory usage: 10.6+ MB


Unnamed: 0,title,text,Overall,Story,Animation,Music,Character Development,Enjoyment,polarity,subjectivity,attitude
0,Made in Abyss,...,10.0,10,10,10,10,10,0.157887377,0.571908541,Positive
1,Made in Abyss,...,2.0,0,0,0,0,0,0.148181513,0.615992927,Positive
2,Made in Abyss,...,9.25,9,10,9,9,10,0.204097075,0.521248387,Positive
3,Made in Abyss,...,9.75,10,10,10,8,9,0.184404919,0.610297593,Positive
4,Made in Abyss,...,8.5,7,10,10,5,6,0.15258241,0.520738003,Positive


None

Unnamed: 0,title,text,Overall,Story,Animation,Music,Character Development,Enjoyment,polarity,subjectivity,attitude
count,126100,126100,126044,126044,126044,126044,126044,126044,126044,126040.0,126038
unique,690,76889,70,40,39,39,41,42,68743,66170.0,31
top,Death Note,...,10,10,10,10,10,10,0,0.5,Positive
freq,1708,14,11928,28732,31789,32399,36318,44569,93,105.0,119581


In [52]:
df_animes.head(2)

Unnamed: 0,anime_id,title,genre,studio,premiered,type,source,episodes,status,rank,image_url,rating,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
0,5114,Fullmetal Alchemist: Brotherhood,Action,Bones,Spring 2009,TV,Manga,64,Finished Airing,1,https://myanimelist.cdn-dena.com/images/anime/...,R - 17+ (violence & profanity),9.25,733592,2,4,4,1199091,106895,3.0
1,9253,Steins;Gate,Thriller,White Fox,Spring 2011,TV,Visual novel,24,Finished Airing,2,https://myanimelist.cdn-dena.com/images/anime/...,PG-13 - Teens 13 or older,9.14,563857,5,8,8,1010330,92423,6.5


> ### **Univariate Explorations**
> <hr>

>
>#### **Q1: What studios have produced the most anime between 18 years into the 21st Century?**
> Taking into consideration that some anime (if _not all anime_) have been produced by a collaboration of major studios and minor studios, records of such collaborations will be tabulated as one entity and attribute each studio's work as their own. 

In [101]:
df_animes.studio.value_counts().reset_index().rename(columns={'studio':'count', 'index':'studio'}).head(3)

Unnamed: 0,studio,count
0,Toei Animation,383
1,Sunrise,335
2,J.C.Staff,287


In [129]:
fig = px.bar(df_animes.studio.value_counts().reset_index().rename(columns={'studio':'count', 'index':'studio'}).head(10),
        y='studio', x='count', text='studio', orientation='h',
        labels={'studio':'Anime Studios','count':'Number of anime produced'},
        title='Anime studios producing the most anime between 2000-2018')
fig.update_yaxes(visible=False, showticklabels=False)

In [140]:
df_animes[['studio', 'genre']].value_counts().reset_index().rename(columns={0:'count'}).head(20)

Unnamed: 0,studio,genre,count
0,Sunrise,Action,170
1,Toei Animation,Action,153
2,J.C.Staff,Comedy,100
3,DLE,Comedy,89
4,Madhouse,Action,85
5,Production I.G,Action,76
6,Studio Pierrot,Action,69
7,J.C.Staff,Action,67
8,Studio Deen,Comedy,66
9,Bones,Action,66


In [142]:
px.bar(df_animes[['studio', 'genre']].value_counts().reset_index().rename(columns={0:'count'}).head(20),
        x='studio', y='count', text='genre', color='genre', text_auto='0.2s',
        title='Anime Studios Production by Genre (Top 20)',
        labels={'genre':'Genre', 'count':'Number of anime per genre', 'studio':'Anime Studios'})

In [146]:
px.bar(df_animes[['studio', 'type']].value_counts().reset_index().rename(columns={0:'count'}).head(20),
        x='studio', y='count', text='type', color='type', text_auto='0.2s',
        title='Anime Studios Production by Release Format (Top 20)',
        labels={'type':'Release Format', 'count':'Number of anime per release format', 'studio':'Anime Studios'})

In [147]:
px.bar(df_animes[['studio', 'source']].value_counts().reset_index().rename(columns={0:'count'}).head(20),
        x='studio', y='count', text='source', color='source', text_auto='0.2s',
        title='Anime Studios Production by Release Format (Top 20)',
        labels={'type':'Release Format', 'count':'Number of anime per release format', 'studio':'Anime Studios'})

In [149]:

df_animes.columns

Index(['anime_id', 'title', 'genre', 'studio', 'premiered', 'type', 'source',
       'episodes', 'status', 'rank', 'image_url', 'rating', 'score',
       'scored_by', 'score_rank', 'popularity', 'popularity_rank', 'members',
       'favorites', 'avg_rank_score'],
      dtype='object')

In [157]:

studios_avgs= df_animes.groupby('studio')[['episodes', 'status', 'rank', 'image_url', 'rating',
                  'score','scored_by', 'score_rank', 'popularity', 'popularity_rank',
                  'members','favorites', 'avg_rank_score']].mean()

display(studios_avgs, studios_avgs.describe())

Unnamed: 0_level_0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
studio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10Gauge,6.500000,6157.250000,5.525000,349.250000,338.250000,9918.500000,5282.500000,1322.750000,1.500000,2810.375000
2:10 Animation,9.333333,6046.666667,4.176667,124.000000,349.666667,8773.000000,5125.333333,1136.333333,2.000000,2737.500000
33 Collective,0.000000,7158.000000,0.000000,0.000000,510.000000,14456.000000,6231.000000,21.000000,0.000000,3370.500000
8bit,7.558824,2160.294118,6.662647,38130.823529,207.029412,2325.088235,1816.588235,75684.117647,345.882353,1011.808824
A-1 Pictures,10.034884,2261.569767,7.077093,55898.081395,181.552326,2694.110465,1934.395349,106915.976744,1626.209302,1057.973837
...,...,...,...,...,...,...,...,...,...,...
"ixtl, LIDENFILMS",12.000000,1200.000000,6.930000,26434.000000,203.000000,1163.000000,1001.000000,70801.000000,254.000000,602.000000
pH Studio,1.000000,6990.000000,6.060000,51.000000,290.000000,13320.000000,6138.000000,84.000000,0.000000,3214.000000
production doA,6.500000,2335.500000,6.990000,4299.000000,197.000000,2430.000000,1974.500000,34213.500000,72.000000,1085.750000
ufotable,4.553571,2454.535714,7.195714,42798.767857,176.035714,2776.142857,2102.678571,80084.232143,994.107143,1139.357143


Unnamed: 0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
count,691.0,691.0,691.0,691.0,691.0,691.0,691.0,691.0,691.0,691.0
mean,12.717311,4000.770012,6.437043,13241.434748,244.91373,5324.922285,3405.64617,28275.172796,338.364375,1825.27995
std,19.526138,1762.484791,1.035811,27504.316713,81.003575,3285.203642,1519.209857,51696.86802,1809.232774,786.113295
min,0.0,42.0,0.0,0.0,35.5,48.0,47.0,16.0,0.0,55.5
25%,2.375,2607.0,6.03,563.666667,190.330357,2828.5,2208.0,2125.0,4.0,1204.166667
50%,8.59375,3927.0,6.6,3672.166667,234.0,4532.0,3324.0,10255.0,28.0,1781.907407
75%,13.0,5479.5,7.0465,14276.741379,289.333333,7221.0,4661.095361,32239.333333,146.55,2463.333333
max,258.0,7158.0,8.615,364012.0,510.0,14456.0,6231.0,641101.0,41098.0,3370.5


In [168]:
studios_avgs.nsmallest(10,'score_rank')

Unnamed: 0_level_0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
studio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"J.C.Staff, Egg Firm",72.0,811.5,8.615,36726.0,35.5,949.5,822.0,102833.0,1721.0,428.75
"Madhouse, Satelight, Graphinica",10.0,67.0,8.53,221939.0,43.0,105.0,101.0,425255.0,13450.0,72.0
"Xebec, AIC",26.0,1128.0,8.46,21150.0,50.0,1282.0,1092.0,63676.0,1228.0,571.0
"Tezuka Productions, MAPPA",12.0,303.0,8.44,93986.0,52.0,390.0,349.0,192993.0,4600.0,200.5
"Gainax, Tatsunoko Production",26.0,42.0,8.32,364012.0,64.0,48.0,47.0,641101.0,41098.0,55.5
"Studio Gallop, Studio Comet",26.0,649.0,8.28,60560.0,68.0,745.0,654.0,115355.0,3395.0,361.0
Oh! Production,1.0,1238.0,8.19,34896.0,77.0,1366.0,1159.0,58598.0,376.0,618.0
"A-1 Pictures, Trigger, CloverWorks",24.0,200.0,8.17,76973.0,79.0,239.0,219.0,276912.0,4991.0,149.0
"Shirogumi, Shin-Ei Animation",1.0,3264.0,8.13,6520.0,83.0,3636.0,2885.0,10320.0,47.0,1484.0
"Studio Pierrot, Studio Hibari",50.0,1525.0,8.03,21618.0,93.0,1630.0,1382.0,45625.0,826.0,737.5


In [169]:
studios_avgs.nsmallest(10,'popularity_rank')

Unnamed: 0_level_0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
studio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Gainax, Tatsunoko Production",26.0,42.0,8.32,364012.0,64.0,48.0,47.0,641101.0,41098.0,55.5
"Madhouse, Satelight, Graphinica",10.0,67.0,8.53,221939.0,43.0,105.0,101.0,425255.0,13450.0,72.0
"Silver Link., Nexus",12.0,188.0,7.75,193030.0,121.0,168.0,158.0,336877.0,4087.0,139.5
"A-1 Pictures, Trigger, CloverWorks",24.0,200.0,8.17,76973.0,79.0,239.0,219.0,276912.0,4991.0,149.0
"Kyoto Animation, Animation Do",12.5,275.5,7.72,155257.5,124.0,264.0,239.0,277420.5,5126.0,181.5
"Ordet, SANZIGEN",8.0,370.0,7.0,125060.0,196.0,283.0,256.0,245436.0,1852.0,226.0
"Hal Film Maker, TYO Animations",12.0,357.0,7.1,142827.0,186.0,285.0,257.0,244791.0,1641.0,221.5
"Gainax, Production I.G",2.666667,323.666667,8.016667,165267.0,94.333333,344.0,309.0,274696.666667,8161.333333,201.666667
"Tezuka Productions, MAPPA",12.0,303.0,8.44,93986.0,52.0,390.0,349.0,192993.0,4600.0,200.5
"Xebec, Asread",24.0,434.0,7.24,97057.0,172.0,395.0,354.0,190780.0,1482.0,263.0


In [171]:
studios_avgs.nlargest(10,'members')

Unnamed: 0_level_0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
studio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Gainax, Tatsunoko Production",26.0,42.0,8.32,364012.0,64.0,48.0,47.0,641101.0,41098.0,55.5
"Madhouse, Satelight, Graphinica",10.0,67.0,8.53,221939.0,43.0,105.0,101.0,425255.0,13450.0,72.0
"Silver Link., Nexus",12.0,188.0,7.75,193030.0,121.0,168.0,158.0,336877.0,4087.0,139.5
"Kyoto Animation, Animation Do",12.5,275.5,7.72,155257.5,124.0,264.0,239.0,277420.5,5126.0,181.5
"A-1 Pictures, Trigger, CloverWorks",24.0,200.0,8.17,76973.0,79.0,239.0,219.0,276912.0,4991.0,149.0
"Gainax, Production I.G",2.666667,323.666667,8.016667,165267.0,94.333333,344.0,309.0,274696.666667,8161.333333,201.666667
"Kinema Citrus, Orange",7.0,1029.0,6.89,137591.5,207.0,969.5,828.5,251132.0,1502.5,517.75
"Ordet, SANZIGEN",8.0,370.0,7.0,125060.0,196.0,283.0,256.0,245436.0,1852.0,226.0
"Hal Film Maker, TYO Animations",12.0,357.0,7.1,142827.0,186.0,285.0,257.0,244791.0,1641.0,221.5
"Satelight, A-1 Pictures",36.6,856.0,7.68,123002.0,128.0,874.2,752.6,230361.2,7779.4,440.3


In [172]:
studios_avgs.nlargest(10,'favorites')

Unnamed: 0_level_0,episodes,rank,score,scored_by,score_rank,popularity,popularity_rank,members,favorites,avg_rank_score
studio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Gainax, Tatsunoko Production",26.0,42.0,8.32,364012.0,64.0,48.0,47.0,641101.0,41098.0,55.5
"Madhouse, Satelight, Graphinica",10.0,67.0,8.53,221939.0,43.0,105.0,101.0,425255.0,13450.0,72.0
"Gainax, Production I.G",2.666667,323.666667,8.016667,165267.0,94.333333,344.0,309.0,274696.666667,8161.333333,201.666667
"Satelight, A-1 Pictures",36.6,856.0,7.68,123002.0,128.0,874.2,752.6,230361.2,7779.4,440.3
Imagin,9.0,1900.0,7.57,115083.0,139.0,2062.5,1621.5,225190.0,6630.0,880.25
White Fox,9.066667,1999.7,6.504667,92496.766667,195.6,2260.366667,1676.8,175628.033333,5318.066667,936.2
"Kyoto Animation, Animation Do",12.5,275.5,7.72,155257.5,124.0,264.0,239.0,277420.5,5126.0,181.5
"A-1 Pictures, Trigger, CloverWorks",24.0,200.0,8.17,76973.0,79.0,239.0,219.0,276912.0,4991.0,149.0
"A-1 Pictures, Bridge",52.0,1663.5,7.755,95461.5,120.5,1789.5,1436.5,207060.5,4669.5,778.5
"Tezuka Productions, MAPPA",12.0,303.0,8.44,93986.0,52.0,390.0,349.0,192993.0,4600.0,200.5
