In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval

In [2]:
filepath = "MAL_anime_dataset/MAL_anime.csv"

full_anime_df = pd.read_csv(filepath)
full_anime_df.head()

Unnamed: 0,id,title,num_scoring_users,status,media_type,num_episodes,start_date,end_date,mean,source,...,studios,synopsis,nsfw,created_at,updated_at,main_picture_medium,main_picture_large,alternative_titles_en,alternative_titles_ja,alternative_titles_synonyms
0,5114,Fullmetal Alchemist: Brotherhood,1846307,finished_airing,tv,64,2009-04-05 00:00:00,2010-07-04 00:00:00,9.14,manga,...,['Bones'],After a horrific alchemy experiment goes wrong...,white,2008-08-21 03:35:22,2022-04-18 05:06:13,https://api-cdn.myanimelist.net/images/anime/1...,https://api-cdn.myanimelist.net/images/anime/1...,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,['Hagane no Renkinjutsushi: Fullmetal Alchemis...
1,11061,Hunter x Hunter (2011),1488156,finished_airing,tv,148,2011-10-02 00:00:00,2014-09-24 00:00:00,9.05,manga,...,"['Madhouse', 'Studio Live']",Hunters devote themselves to accomplishing haz...,white,2011-07-27 08:41:39,2022-04-18 05:11:21,https://api-cdn.myanimelist.net/images/anime/1...,https://api-cdn.myanimelist.net/images/anime/1...,Hunter x Hunter,HUNTER×HUNTER（ハンター×ハンター）,['HxH (2011)']
2,9253,Steins;Gate,1240087,finished_airing,tv,24,2011-04-06 00:00:00,2011-09-14 00:00:00,9.08,visual_novel,...,['White Fox'],Eccentric scientist Rintarou Okabe has a never...,white,2010-07-26 09:23:40,2022-05-01 21:24:42,https://api-cdn.myanimelist.net/images/anime/5...,https://api-cdn.myanimelist.net/images/anime/5...,Steins;Gate,STEINS;GATE,[]
3,21,One Piece,1076776,currently_airing,tv,0,1999-10-20 00:00:00,1970-01-01 00:00:00,8.65,manga,...,['Toei Animation'],"Gol D. Roger was known as the ""Pirate King,"" t...",white,2005-11-11 01:20:50,2022-04-18 05:12:08,https://api-cdn.myanimelist.net/images/anime/6...,https://api-cdn.myanimelist.net/images/anime/6...,One Piece,ONE PIECE,['OP']
4,1535,Death Note,2426765,finished_airing,tv,37,2006-10-04 00:00:00,2007-06-27 00:00:00,8.62,manga,...,['Madhouse'],"Brutal murders, petty thefts, and senseless vi...",white,2006-10-04 03:39:19,2022-04-18 04:59:29,https://api-cdn.myanimelist.net/images/anime/9...,https://api-cdn.myanimelist.net/images/anime/9...,Death Note,デスノート,['DN']


In [3]:
len(full_anime_df)

24012

In [4]:
full_anime_df['media_type'].unique()

array(['tv', 'movie', 'ova', 'ona', 'music', 'special', 'unknown'],
      dtype=object)

In [5]:
# leave out rows of data with 'media_type' == 'music'

full_anime_df = full_anime_df.loc[full_anime_df['media_type'] != 'music']

In [6]:
full_anime_df['media_type'].unique()

array(['tv', 'movie', 'ova', 'ona', 'special', 'unknown'], dtype=object)

In [7]:
# make column where title is EN title if available, else use JPN title
full_anime_df['new_title'] = full_anime_df['alternative_titles_en']

full_anime_df['new_title'].fillna(full_anime_df['title'], inplace=True)

In [8]:
anime_df = full_anime_df[['id', 'new_title','mean','num_scoring_users',
                           'popularity','num_list_users','rank','num_favorites',
                           'media_type','rating','studios', 'genres','source','num_episodes',
                           'average_episode_duration','start_date', 'end_date',
                           'start_season_year','start_season_season', 'broadcast_day_of_the_week',
                           'broadcast_start_time',]]

anime_df.head(10)

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,genres,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time
0,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00
1,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,"['Action', 'Adventure', 'Fantasy', 'Shounen']",manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00
2,9253,Steins;Gate,9.08,1240087,13,2248209,3,171579,tv,pg_13,...,"['Drama', 'Psychological', 'Sci-Fi', 'Suspense...",visual_novel,24,0 days 00:24:20,2011-04-06 00:00:00,2011-09-14 00:00:00,2011,spring,wednesday,02:05:00
3,21,One Piece,8.65,1076776,26,1893379,65,167571,tv,pg_13,...,"['Action', 'Adventure', 'Fantasy', 'Shounen']",manga,0,0 days 00:24:00,1999-10-20 00:00:00,1970-01-01 00:00:00,1999,fall,sunday,09:30:00
4,1535,Death Note,8.62,2426765,2,3421349,74,155891,tv,r,...,"['Psychological', 'Shounen', 'Supernatural', '...",manga,37,0 days 00:23:00,2006-10-04 00:00:00,2007-06-27 00:00:00,2006,fall,wednesday,00:56:00
5,16498,Attack on Titan,8.53,2459371,1,3438932,102,152655,tv,r,...,"['Action', 'Drama', 'Gore', 'Military', 'Shoun...",manga,25,0 days 00:24:00,2013-04-07 00:00:00,2013-09-29 00:00:00,2013,spring,sunday,01:58:00
6,1575,Code Geass: Lelouch of the Rebellion,8.7,1255301,17,1995638,49,99388,tv,r,...,"['Action', 'Drama', 'Mecha', 'Military', 'Scho...",original,25,0 days 00:24:00,2006-10-06 00:00:00,2007-07-29 00:00:00,2006,fall,friday,01:25:00
7,1735,Naruto Shippuden,8.24,1427961,16,2126396,285,97728,tv,pg_13,...,"['Action', 'Adventure', 'Fantasy', 'Martial Ar...",manga,500,0 days 00:23:09,2007-02-15 00:00:00,2017-03-23 00:00:00,2007,winter,thursday,19:30:00
8,30,Neon Genesis Evangelion,8.34,929469,48,1559217,206,91211,tv,pg_13,...,"['Action', 'Avant Garde', 'Drama', 'Mecha', 'P...",original,26,0 days 00:24:01,1995-10-04 00:00:00,1996-03-27 00:00:00,1995,fall,wednesday,18:30:00
9,32281,Your Name.,8.87,1657208,11,2367090,24,81809,movie,pg_13,...,"['Drama', 'Romantic Subtext', 'Supernatural']",original,1,0 days 01:46:31,2016-08-26 00:00:00,2016-08-26 00:00:00,2016,summer,,


In [9]:
len(anime_df)

21898

In [10]:
print(type(anime_df['studios'][0]))
print(type(anime_df['genres'][0]))

<class 'str'>
<class 'str'>


In [11]:
anime_df['studios'] = anime_df['studios'].apply(literal_eval)
anime_df['genres'] = anime_df['genres'].apply(literal_eval)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
print(type(anime_df['studios'][0]))
print(type(anime_df['genres'][0]))

<class 'list'>
<class 'list'>


In [13]:
demographics = ['Shounen', 'Seinen', 'Shoujo', 'Josei']

# create a new column for demographics

anime_df['demographics'] = [[value for value in genre_list if value in demographics] for genre_list in anime_df['genres']]
anime_df[['new_title','demographics']].loc[100:120]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,new_title,demographics
100,Maid Sama!,[Shoujo]
101,Nichijou - My Ordinary Life,[Shounen]
102,High School DxD,[]
103,Charlotte,[]
104,Devilman: Crybaby,[]
105,Terror in Resonance,[]
106,Demon Slayer: Kimetsu no Yaiba Entertainment D...,[Shounen]
107,Yu Yu Hakusho: Ghost Files,[Shounen]
108,Attack on Titan Season 2,[Shounen]
109,When They Cry,[]


In [14]:
anime_df = anime_df.explode('demographics', ignore_index=True)

anime_df[['new_title','demographics']].loc[100:120]

Unnamed: 0,new_title,demographics
100,Maid Sama!,Shoujo
101,Nichijou - My Ordinary Life,Shounen
102,High School DxD,
103,Charlotte,
104,Devilman: Crybaby,
105,Terror in Resonance,
106,Demon Slayer: Kimetsu no Yaiba Entertainment D...,Shounen
107,Yu Yu Hakusho: Ghost Files,Shounen
108,Attack on Titan Season 2,Shounen
109,When They Cry,


In [15]:
# fill in 'demographics' == NaN values
anime_df['demographics'].fillna("no demographic", inplace=True)

anime_df[['new_title','demographics']].loc[100:120]

Unnamed: 0,new_title,demographics
100,Maid Sama!,Shoujo
101,Nichijou - My Ordinary Life,Shounen
102,High School DxD,no demographic
103,Charlotte,no demographic
104,Devilman: Crybaby,no demographic
105,Terror in Resonance,no demographic
106,Demon Slayer: Kimetsu no Yaiba Entertainment D...,Shounen
107,Yu Yu Hakusho: Ghost Files,Shounen
108,Attack on Titan Season 2,Shounen
109,When They Cry,no demographic


In [16]:
anime_df['demographics'].value_counts()
# the majority of shows don't have a labeled target demographic
# less emphasis on age+gender targeting vs age(?)+genre_interest

no demographic    18093
Shounen            2069
Seinen              927
Shoujo              727
Josei               111
Name: demographics, dtype: int64

In [17]:
len(anime_df)

21927

In [18]:
# save aside anime that haven't aired yet

unaired_anime_df = anime_df.loc[anime_df['media_type'] == "unknown"]

In [19]:
anime_df['media_type'] == "unknown"

0        False
1        False
2        False
3        False
4        False
         ...  
21922    False
21923    False
21924    False
21925    False
21926    False
Name: media_type, Length: 21927, dtype: bool

In [20]:
# select rows where media_type is not "unknown"

anime_df = anime_df.loc[anime_df['media_type'] != "unknown"]

In [21]:
len(anime_df)

21856

In [22]:
unaired_anime_df.head()

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
2078,49363,SK∞ (Shinsaku Anime Project),0.0,1,3779,25578,0,237,unknown,pg_13,...,original,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
2482,50803,Jaku-Chara Tomozaki-kun (Shinsaku Anime),0.0,0,3743,26045,0,159,unknown,pg_13,...,light_novel,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
2612,51533,Tonikawa: Over the Moon for You - Uniform,0.0,1,3330,33250,0,140,unknown,pg_13,...,manga,1,,2022-07-01 00:00:00,2022-07-01 00:00:00,2022,summer,,,Shounen
2875,51440,Sasaki to Miyano (Shinsaku Anime),0.0,0,4965,13446,0,112,unknown,pg_13,...,manga,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
2979,49981,"Kimi to Boku no Saigo no Senjou, Aruiwa Sekai ...",0.0,1,3838,24834,0,102,unknown,pg_13,...,light_novel,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic


In [23]:
# unpack genres from list
# each record is a genre for a show
# many shows will have more than one record because it has multiple genres
genre_anime_df = anime_df.explode('genres', ignore_index=True)

genre_anime_df.head(10)

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
0,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
1,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
2,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
3,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
4,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
5,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
6,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
7,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
8,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
9,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen


In [24]:
genre_anime_df['genres'].unique()

array(['Action', 'Adventure', 'Drama', 'Fantasy', 'Military', 'Shounen',
       'Psychological', 'Sci-Fi', 'Suspense', 'Time Travel',
       'Supernatural', 'Gore', 'Survival', 'Mecha', 'School',
       'Super Power', 'Martial Arts', 'Avant Garde', 'Romantic Subtext',
       'Historical', 'Music', 'Romance', 'Comedy', 'Adult Cast', 'Space',
       'Sports', 'Team Sports', 'Love Polygon', 'Video Game', 'Isekai',
       'Parody', 'Seinen', 'Gag Humor', 'Samurai', 'Slice of Life',
       'Mahou Shoujo', 'Horror', 'Mystery', 'Vampire', 'Ecchi',
       'Strategy Game', 'Reincarnation', 'Detective', 'Mythology',
       'Crossdressing', 'Reverse Harem', 'Shoujo', 'High Stakes Game',
       'Award Winning', 'Organized Crime', 'Delinquents', 'Visual Arts',
       'Iyashikei', 'Childcare', 'Otaku Culture', 'Workplace', 'Gourmet',
       'CGDCT', 'Harem', 'Combat Sports', 'Showbiz', 'Boys Love',
       'Anthropomorphic', 'Racing', 'Idols (Female)', 'Josei',
       'Performing Arts', 'Girls Love',

In [25]:
genre_anime_df.loc[genre_anime_df['genres'].isnull()]

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
4855,50549,Bubble,7.34,55662,1523,118900,2264,987,ona,pg_13,...,original,1,0 days 01:40:00,2022-04-28 00:00:00,2022-04-28 00:00:00,2022,spring,,,no demographic
16511,51098,Shinobi no Ittoki,0.00,1,5761,9299,0,50,tv,,...,original,0,,2022-10-01 00:00:00,1970-01-01 00:00:00,2022,fall,,,no demographic
17987,50709,Lycoris Recoil,0.00,3,5211,11981,0,38,tv,,...,original,0,,2022-07-02 00:00:00,1970-01-01 00:00:00,2022,summer,,,no demographic
21474,33187,Katsudou Shashin,5.47,4025,6586,6698,11053,21,movie,g,...,original,1,0 days 00:00:03,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
24395,9911,Wish Upon the Pleiades,6.14,8657,4516,17215,8323,13,ona,g,...,original,4,0 days 00:06:00,2011-02-01 00:00:00,2011-02-01 00:00:00,2011,winter,,,no demographic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58713,50918,Lion Nanny's Journey,0.00,0,0,0,0,0,movie,,...,,1,0 days 00:07:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
58853,51944,Dian Dao,0.00,0,0,0,0,0,ona,pg_13,...,original,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
58933,50921,Kimetsu Gakuen Monogatari: Demon's Banquet Spe...,0.00,0,0,0,0,0,ova,g,...,,3,0 days 00:30:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
59002,50922,Three Tales,0.00,0,0,0,0,0,movie,,...,,1,0 days 02:00:00,1960-01-15 00:00:00,1970-01-01 00:00:00,0,,,,no demographic


In [26]:
# need to get rid of NaN values
genre_anime_df.dropna(subset=['genres'], inplace=True)

In [27]:
genre_anime_df['genres'].unique()

array(['Action', 'Adventure', 'Drama', 'Fantasy', 'Military', 'Shounen',
       'Psychological', 'Sci-Fi', 'Suspense', 'Time Travel',
       'Supernatural', 'Gore', 'Survival', 'Mecha', 'School',
       'Super Power', 'Martial Arts', 'Avant Garde', 'Romantic Subtext',
       'Historical', 'Music', 'Romance', 'Comedy', 'Adult Cast', 'Space',
       'Sports', 'Team Sports', 'Love Polygon', 'Video Game', 'Isekai',
       'Parody', 'Seinen', 'Gag Humor', 'Samurai', 'Slice of Life',
       'Mahou Shoujo', 'Horror', 'Mystery', 'Vampire', 'Ecchi',
       'Strategy Game', 'Reincarnation', 'Detective', 'Mythology',
       'Crossdressing', 'Reverse Harem', 'Shoujo', 'High Stakes Game',
       'Award Winning', 'Organized Crime', 'Delinquents', 'Visual Arts',
       'Iyashikei', 'Childcare', 'Otaku Culture', 'Workplace', 'Gourmet',
       'CGDCT', 'Harem', 'Combat Sports', 'Showbiz', 'Boys Love',
       'Anthropomorphic', 'Racing', 'Idols (Female)', 'Josei',
       'Performing Arts', 'Girls Love',

In [28]:
# genre & demographic column matches

genre_anime_df.loc[genre_anime_df['genres'] == genre_anime_df['demographics']].head()

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
5,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
9,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
18,21,One Piece,8.65,1076776,26,1893379,65,167571,tv,pg_13,...,manga,0,0 days 00:24:00,1999-10-20 00:00:00,1970-01-01 00:00:00,1999,fall,sunday,09:30:00,Shounen
20,1535,Death Note,8.62,2426765,2,3421349,74,155891,tv,r,...,manga,37,0 days 00:23:00,2006-10-04 00:00:00,2007-06-27 00:00:00,2006,fall,wednesday,00:56:00,Shounen
27,16498,Attack on Titan,8.53,2459371,1,3438932,102,152655,tv,r,...,manga,25,0 days 00:24:00,2013-04-07 00:00:00,2013-09-29 00:00:00,2013,spring,sunday,01:58:00,Shounen


In [29]:
len(genre_anime_df)

60368

In [30]:
# remove rows where 'genre' value == 'demographic' value
genre_demo_match = genre_anime_df.loc[genre_anime_df['genres'].isin(demographics)].index

genre_anime_df.drop(genre_demo_match, inplace=True)

In [31]:
# OH NO, some series have more than one demographic label
# Fantasy as a genre would be counted twice (no fair for double votes)
# therefore when counting genres, get rows of no duplicate 'new_title' and 'genre' 
genre_anime_df[['new_title', 'genres', 'demographics']].loc[genre_anime_df['new_title'] == 'Amnesia OVA']

Unnamed: 0,new_title,genres,demographics
20197,Amnesia OVA,Fantasy,Josei
20200,Amnesia OVA,Fantasy,Shoujo


In [32]:
# dataframe with no duplicates for 'new_title' and 'genre'
# !!! ONLY USE THIS TO COUNT GENRES (so series don't get a double vote for having 2+ demographics in genre_anime_df)

no_dupe_genre_df = genre_anime_df.drop_duplicates(['new_title', 'genres'])

In [33]:
# just checking

no_dupe_genre_df[['new_title', 'genres', 'demographics']].loc[no_dupe_genre_df['new_title'] == 'Amnesia OVA']

Unnamed: 0,new_title,genres,demographics
20197,Amnesia OVA,Fantasy,Josei


In [34]:
# repack genres into lists

repack_genre_col = no_dupe_genre_df.groupby('id', as_index=False).agg({'genres': lambda x: x.tolist()})

repack_genre_col.head()

Unnamed: 0,id,genres
0,1,"[Action, Adult Cast, Sci-Fi, Space]"
1,5,"[Action, Adult Cast, Sci-Fi, Space]"
2,6,"[Action, Adult Cast, Adventure, Comedy, Drama,..."
3,7,"[Action, Detective, Drama, Mystery, Supernatural]"
4,8,"[Adventure, Fantasy, Supernatural]"


In [35]:
genre_anime_df[['id','new_title', 'genres']].head(10)

Unnamed: 0,id,new_title,genres
0,5114,Fullmetal Alchemist: Brotherhood,Action
1,5114,Fullmetal Alchemist: Brotherhood,Adventure
2,5114,Fullmetal Alchemist: Brotherhood,Drama
3,5114,Fullmetal Alchemist: Brotherhood,Fantasy
4,5114,Fullmetal Alchemist: Brotherhood,Military
6,11061,Hunter x Hunter,Action
7,11061,Hunter x Hunter,Adventure
8,11061,Hunter x Hunter,Fantasy
10,9253,Steins;Gate,Drama
11,9253,Steins;Gate,Psychological


In [36]:
genre_anime_df[['id','new_title', 'genres']].loc[genre_anime_df['id'] == 1]

Unnamed: 0,id,new_title,genres
67,1,Cowboy Bebop,Action
68,1,Cowboy Bebop,Adult Cast
69,1,Cowboy Bebop,Sci-Fi
70,1,Cowboy Bebop,Space


In [37]:
genre_anime_df[['id','new_title', 'genres']].loc[genre_anime_df['id'] == 5]

Unnamed: 0,id,new_title,genres
4145,5,Cowboy Bebop: The Movie,Action
4146,5,Cowboy Bebop: The Movie,Adult Cast
4147,5,Cowboy Bebop: The Movie,Sci-Fi
4148,5,Cowboy Bebop: The Movie,Space


In [38]:
genre_anime_df[['id','new_title', 'genres', 'demographics']].loc[genre_anime_df['new_title'] == 'Amnesia OVA']

Unnamed: 0,id,new_title,genres,demographics
20197,20655,Amnesia OVA,Fantasy,Josei
20200,20655,Amnesia OVA,Fantasy,Shoujo


In [39]:
repack_anime_df = anime_df.copy()

In [40]:
# df1 = df1.merge(df2, on='team', how='left')

repack_anime_df = repack_anime_df.merge(repack_genre_col, on='id', how='left')
repack_anime_df.columns

Index(['id', 'new_title', 'mean', 'num_scoring_users', 'popularity',
       'num_list_users', 'rank', 'num_favorites', 'media_type', 'rating',
       'studios', 'genres_x', 'source', 'num_episodes',
       'average_episode_duration', 'start_date', 'end_date',
       'start_season_year', 'start_season_season', 'broadcast_day_of_the_week',
       'broadcast_start_time', 'demographics', 'genres_y'],
      dtype='object')

In [41]:
repack_demo_col = anime_df.groupby('id', as_index=False).agg({'demographics': lambda x: x.tolist()})

repack_demo_col.head()

Unnamed: 0,id,demographics
0,1,[no demographic]
1,5,[no demographic]
2,6,[Shounen]
3,7,[no demographic]
4,8,[Shounen]


In [42]:
repack_anime_df = repack_anime_df.merge(repack_demo_col, on='id', how='left')
repack_anime_df.columns

Index(['id', 'new_title', 'mean', 'num_scoring_users', 'popularity',
       'num_list_users', 'rank', 'num_favorites', 'media_type', 'rating',
       'studios', 'genres_x', 'source', 'num_episodes',
       'average_episode_duration', 'start_date', 'end_date',
       'start_season_year', 'start_season_season', 'broadcast_day_of_the_week',
       'broadcast_start_time', 'demographics_x', 'genres_y', 'demographics_y'],
      dtype='object')

In [43]:
repack_anime_df[['id', 'new_title', 'genres_x', 'genres_y', 'demographics_x', 'demographics_y']].head()

# drop genres_x and demographics_x, rename genres_y to 'genres' and demographics_y to 'demographics'
# this should give unique anime shows (IDs)

Unnamed: 0,id,new_title,genres_x,genres_y,demographics_x,demographics_y
0,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Military, ...","[Action, Adventure, Drama, Fantasy, Military]",Shounen,[Shounen]
1,11061,Hunter x Hunter,"[Action, Adventure, Fantasy, Shounen]","[Action, Adventure, Fantasy]",Shounen,[Shounen]
2,9253,Steins;Gate,"[Drama, Psychological, Sci-Fi, Suspense, Time ...","[Drama, Psychological, Sci-Fi, Suspense, Time ...",no demographic,[no demographic]
3,21,One Piece,"[Action, Adventure, Fantasy, Shounen]","[Action, Adventure, Fantasy]",Shounen,[Shounen]
4,1535,Death Note,"[Psychological, Shounen, Supernatural, Suspense]","[Psychological, Supernatural, Suspense]",Shounen,[Shounen]


In [44]:
repack_anime_df.drop(['genres_x', 'demographics_x'], inplace=True, axis=1)
repack_anime_df.rename(columns={'demographics_y':'demographics', 'genres_y':'genres'}, inplace=True)

In [45]:
repack_anime_df[['id', 'new_title', 'genres', 'demographics']].head()

Unnamed: 0,id,new_title,genres,demographics
0,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Military]",[Shounen]
1,11061,Hunter x Hunter,"[Action, Adventure, Fantasy]",[Shounen]
2,9253,Steins;Gate,"[Drama, Psychological, Sci-Fi, Suspense, Time ...",[no demographic]
3,21,One Piece,"[Action, Adventure, Fantasy]",[Shounen]
4,1535,Death Note,"[Psychological, Supernatural, Suspense]",[Shounen]


In [46]:
repack_anime_df.loc[repack_anime_df['new_title'] == 'Amnesia OVA']

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,genres,demographics
4968,20655,Amnesia OVA,6.34,15238,3414,31401,7247,26,ova,pg_13,...,1,0 days 00:25:00,2013-09-25 00:00:00,2013-09-25 00:00:00,2013,summer,,,[Fantasy],"[Josei, Shoujo]"
4969,20655,Amnesia OVA,6.34,15238,3414,31401,7247,26,ova,pg_13,...,1,0 days 00:25:00,2013-09-25 00:00:00,2013-09-25 00:00:00,2013,summer,,,[Fantasy],"[Josei, Shoujo]"


In [47]:
repack_anime_df.loc[repack_anime_df['id'].duplicated()].head()

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,genres,demographics
4969,20655,Amnesia OVA,6.34,15238,3414,31401,7247,26,ova,pg_13,...,1,0 days 00:25:00,2013-09-25 00:00:00,2013-09-25 00:00:00,2013,summer,,,[Fantasy],"[Josei, Shoujo]"
11604,49531,A Day Before Us: Merry Christmas!,0.0,0,0,0,0,1,ona,pg_13,...,1,0 days 00:00:30,2018-12-22 00:00:00,2018-12-22 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
11625,48766,A Story About a Yakuza and a Detective With a ...,0.0,0,0,0,0,1,ona,,...,0,0 days 00:00:10,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
11773,49552,sweet first love,0.0,0,0,0,0,1,ona,,...,16,0 days 00:03:00,2017-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
12846,49671,Ojamajo Doremi Dokkaan! Hana-chan ga Ippai,0.0,0,0,0,0,0,ova,,...,1,0 days 00:28:55,2002-01-01 00:00:00,2002-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"


In [48]:
len(repack_anime_df)

21856

In [49]:
repack_anime_df.loc[repack_anime_df.duplicated(subset=['id'])]

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,genres,demographics
4969,20655,Amnesia OVA,6.34,15238,3414,31401,7247,26,ova,pg_13,...,1,0 days 00:25:00,2013-09-25 00:00:00,2013-09-25 00:00:00,2013,summer,,,[Fantasy],"[Josei, Shoujo]"
11604,49531,A Day Before Us: Merry Christmas!,0.0,0,0,0,0,1,ona,pg_13,...,1,0 days 00:00:30,2018-12-22 00:00:00,2018-12-22 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
11625,48766,A Story About a Yakuza and a Detective With a ...,0.0,0,0,0,0,1,ona,,...,0,0 days 00:00:10,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
11773,49552,sweet first love,0.0,0,0,0,0,1,ona,,...,16,0 days 00:03:00,2017-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
12846,49671,Ojamajo Doremi Dokkaan! Hana-chan ga Ippai,0.0,0,0,0,0,0,ova,,...,1,0 days 00:28:55,2002-01-01 00:00:00,2002-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
15783,49244,Tarou no Banpei,0.0,0,0,0,0,0,movie,,...,1,0 days 02:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
15817,49245,Tarou no Banpei Senkoutei no Maki,0.0,0,0,0,0,0,movie,,...,1,0 days 02:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
15842,49246,Koshiore Tsubame,0.0,0,0,0,0,0,movie,,...,1,0 days 02:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
15882,49247,Tokechigai,0.0,0,0,0,0,0,movie,,...,1,0 days 02:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"
15896,41056,Disney Tsum Tsum: O Tannenbaum,0.0,0,0,0,0,0,special,g,...,1,0 days 00:02:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Comedy, Drama, Ecchi, Fantasy, Histor...","[Seinen, Shoujo]"


In [50]:
repack_anime_df.drop_duplicates(subset=['id'], inplace=True)

In [51]:
repack_anime_df.loc[repack_anime_df['new_title'] == 'Amnesia OVA']

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,genres,demographics
4968,20655,Amnesia OVA,6.34,15238,3414,31401,7247,26,ova,pg_13,...,1,0 days 00:25:00,2013-09-25 00:00:00,2013-09-25 00:00:00,2013,summer,,,[Fantasy],"[Josei, Shoujo]"


In [52]:
studio_anime_df = anime_df.explode('studios', ignore_index=True)

studio_anime_df.head(10)

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
0,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
1,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
2,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
3,9253,Steins;Gate,9.08,1240087,13,2248209,3,171579,tv,pg_13,...,visual_novel,24,0 days 00:24:20,2011-04-06 00:00:00,2011-09-14 00:00:00,2011,spring,wednesday,02:05:00,no demographic
4,21,One Piece,8.65,1076776,26,1893379,65,167571,tv,pg_13,...,manga,0,0 days 00:24:00,1999-10-20 00:00:00,1970-01-01 00:00:00,1999,fall,sunday,09:30:00,Shounen
5,1535,Death Note,8.62,2426765,2,3421349,74,155891,tv,r,...,manga,37,0 days 00:23:00,2006-10-04 00:00:00,2007-06-27 00:00:00,2006,fall,wednesday,00:56:00,Shounen
6,16498,Attack on Titan,8.53,2459371,1,3438932,102,152655,tv,r,...,manga,25,0 days 00:24:00,2013-04-07 00:00:00,2013-09-29 00:00:00,2013,spring,sunday,01:58:00,Shounen
7,1575,Code Geass: Lelouch of the Rebellion,8.7,1255301,17,1995638,49,99388,tv,r,...,original,25,0 days 00:24:00,2006-10-06 00:00:00,2007-07-29 00:00:00,2006,fall,friday,01:25:00,no demographic
8,1735,Naruto Shippuden,8.24,1427961,16,2126396,285,97728,tv,pg_13,...,manga,500,0 days 00:23:09,2007-02-15 00:00:00,2017-03-23 00:00:00,2007,winter,thursday,19:30:00,Shounen
9,30,Neon Genesis Evangelion,8.34,929469,48,1559217,206,91211,tv,pg_13,...,original,26,0 days 00:24:01,1995-10-04 00:00:00,1996-03-27 00:00:00,1995,fall,wednesday,18:30:00,no demographic


In [53]:
studio_anime_df.loc[studio_anime_df['studios'].isnull()]

# this needs to be cleaned up
# I think some of these series haven't aired yet
# UPDATE: this is already taken care of

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
607,40357,The Rising of the Shield Hero Season 3,0.0,3,872,226014,0,2436,tv,pg_13,...,light_novel,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
1000,48585,Black Clover Movie,0.0,4,1783,97423,0,1142,movie,pg_13,...,manga,1,,2023-01-01 00:00:00,2023-01-01 00:00:00,0,,,,Shounen
1009,49918,My Hero Academia Season 6,0.0,7,1124,172584,0,1132,tv,pg_13,...,manga,0,,2022-10-01 00:00:00,1970-01-01 00:00:00,2022,fall,,,Shounen
1131,41084,Made in Abyss: The Golden City of the Scorchin...,0.0,10,1224,159059,0,916,tv,r,...,web_manga,0,,2022-07-06 00:00:00,1970-01-01 00:00:00,2022,summer,wednesday,22:30:00,no demographic
1191,50307,Tonikaku Kawaii 2nd Season,0.0,0,1872,90802,0,839,tv,pg_13,...,manga,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,Shounen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22877,46847,Mini Tegong Dui X,0.0,0,0,0,0,0,tv,pg,...,original,52,0 days 00:13:20,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
22878,47103,Xiao Feiji Ka Ka,0.0,0,0,0,0,0,tv,pg,...,original,104,0 days 00:07:20,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
22879,47359,Sai Ya Lixian Ji,0.0,0,0,0,0,0,tv,pg,...,original,20,0 days 00:12:20,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
22881,50687,Pochi Hana,0.0,19,19506,42,13753,0,movie,g,...,original,1,0 days 00:03:53,2016-12-02 00:00:00,2016-12-02 00:00:00,2016,fall,,,no demographic


In [54]:
unpack_anime_df = anime_df.explode('genres', ignore_index=True)
unpack_anime_df = unpack_anime_df.explode('studios', ignore_index=True)

unpack_anime_df.head(10)

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
0,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
1,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
2,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
3,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
4,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
5,5114,Fullmetal Alchemist: Brotherhood,9.14,1846307,3,2897486,1,202552,tv,r,...,manga,64,0 days 00:24:20,2009-04-05 00:00:00,2010-07-04 00:00:00,2009,spring,sunday,17:00:00,Shounen
6,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
7,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
8,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen
9,11061,Hunter x Hunter,9.05,1488156,10,2387337,6,182740,tv,pg_13,...,manga,148,0 days 00:23:37,2011-10-02 00:00:00,2014-09-24 00:00:00,2011,fall,sunday,10:55:00,Shounen


In [55]:
unpack_anime_df['genres'].unique()

array(['Action', 'Adventure', 'Drama', 'Fantasy', 'Military', 'Shounen',
       'Psychological', 'Sci-Fi', 'Suspense', 'Time Travel',
       'Supernatural', 'Gore', 'Survival', 'Mecha', 'School',
       'Super Power', 'Martial Arts', 'Avant Garde', 'Romantic Subtext',
       'Historical', 'Music', 'Romance', 'Comedy', 'Adult Cast', 'Space',
       'Sports', 'Team Sports', 'Love Polygon', 'Video Game', 'Isekai',
       'Parody', 'Seinen', 'Gag Humor', 'Samurai', 'Slice of Life',
       'Mahou Shoujo', 'Horror', 'Mystery', 'Vampire', 'Ecchi',
       'Strategy Game', 'Reincarnation', 'Detective', 'Mythology',
       'Crossdressing', 'Reverse Harem', 'Shoujo', 'High Stakes Game',
       'Award Winning', 'Organized Crime', 'Delinquents', 'Visual Arts',
       'Iyashikei', 'Childcare', 'Otaku Culture', 'Workplace', 'Gourmet',
       'CGDCT', 'Harem', 'Combat Sports', 'Showbiz', 'Boys Love',
       'Anthropomorphic', 'Racing', 'Idols (Female)', 'Josei',
       'Performing Arts', 'Girls Love',

In [56]:
len(unpack_anime_df)

64384

In [57]:
# do a 'genre' == NaN cleanup for unpack_anime_df

unpack_anime_df.loc[unpack_anime_df['genres'].isnull()]

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,source,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,demographics
5182,50549,Bubble,7.34,55662,1523,118900,2264,987,ona,pg_13,...,original,1,0 days 01:40:00,2022-04-28 00:00:00,2022-04-28 00:00:00,2022,spring,,,no demographic
18083,51098,Shinobi no Ittoki,0.00,1,5761,9299,0,50,tv,,...,original,0,,2022-10-01 00:00:00,1970-01-01 00:00:00,2022,fall,,,no demographic
19711,50709,Lycoris Recoil,0.00,3,5211,11981,0,38,tv,,...,original,0,,2022-07-02 00:00:00,1970-01-01 00:00:00,2022,summer,,,no demographic
23552,33187,Katsudou Shashin,5.47,4025,6586,6698,11053,21,movie,g,...,original,1,0 days 00:00:03,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
26755,9911,Wish Upon the Pleiades,6.14,8657,4516,17215,8323,13,ona,g,...,original,4,0 days 00:06:00,2011-02-01 00:00:00,2011-02-01 00:00:00,2011,winter,,,no demographic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62575,50918,Lion Nanny's Journey,0.00,0,0,0,0,0,movie,,...,,1,0 days 00:07:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
62725,51944,Dian Dao,0.00,0,0,0,0,0,ona,pg_13,...,original,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
62805,50921,Kimetsu Gakuen Monogatari: Demon's Banquet Spe...,0.00,0,0,0,0,0,ova,g,...,,3,0 days 00:30:00,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,no demographic
62874,50922,Three Tales,0.00,0,0,0,0,0,movie,,...,,1,0 days 02:00:00,1960-01-15 00:00:00,1970-01-01 00:00:00,0,,,,no demographic


In [58]:
unpack_anime_df.dropna(subset=['genres'], inplace=True)

In [59]:
unpack_anime_df['genres'].unique()

array(['Action', 'Adventure', 'Drama', 'Fantasy', 'Military', 'Shounen',
       'Psychological', 'Sci-Fi', 'Suspense', 'Time Travel',
       'Supernatural', 'Gore', 'Survival', 'Mecha', 'School',
       'Super Power', 'Martial Arts', 'Avant Garde', 'Romantic Subtext',
       'Historical', 'Music', 'Romance', 'Comedy', 'Adult Cast', 'Space',
       'Sports', 'Team Sports', 'Love Polygon', 'Video Game', 'Isekai',
       'Parody', 'Seinen', 'Gag Humor', 'Samurai', 'Slice of Life',
       'Mahou Shoujo', 'Horror', 'Mystery', 'Vampire', 'Ecchi',
       'Strategy Game', 'Reincarnation', 'Detective', 'Mythology',
       'Crossdressing', 'Reverse Harem', 'Shoujo', 'High Stakes Game',
       'Award Winning', 'Organized Crime', 'Delinquents', 'Visual Arts',
       'Iyashikei', 'Childcare', 'Otaku Culture', 'Workplace', 'Gourmet',
       'CGDCT', 'Harem', 'Combat Sports', 'Showbiz', 'Boys Love',
       'Anthropomorphic', 'Racing', 'Idols (Female)', 'Josei',
       'Performing Arts', 'Girls Love',

In [60]:
len(unpack_anime_df)

64250

In [63]:
repack_anime_df.loc[repack_anime_df['mean'] == 0]

Unnamed: 0,id,new_title,mean,num_scoring_users,popularity,num_list_users,rank,num_favorites,media_type,rating,...,num_episodes,average_episode_duration,start_date,end_date,start_season_year,start_season_season,broadcast_day_of_the_week,broadcast_start_time,genres,demographics
191,44511,Chainsaw Man,0.0,12,437,419666,0,10762,tv,,...,0,,2022-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Adventure, Gore, Mythology]",[Shounen]
443,42963,Rent-a-Girlfriend 2nd Season,0.0,35,911,216014,0,3476,tv,pg_13,...,0,,2022-07-02 00:00:00,1970-01-01 00:00:00,2022,summer,saturday,01:25:00,"[Comedy, Romance, School]",[Shounen]
461,48413,The Devil is a Part-Timer! Season 2,0.0,29,822,238913,0,3237,tv,pg_13,...,0,,2022-07-14 00:00:00,1970-01-01 00:00:00,2022,summer,thursday,23:30:00,"[Comedy, Fantasy, Mythology, Romance, Supernat...",[no demographic]
471,48895,Overlord IV,0.0,35,973,202137,0,3134,tv,r,...,0,,2022-07-05 00:00:00,1970-01-01 00:00:00,2022,summer,,,"[Action, Fantasy, Isekai, Supernatural, Video ...",[no demographic]
568,40357,The Rising of the Shield Hero Season 3,0.0,3,872,226014,0,2436,tv,pg_13,...,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Adventure, Drama, Fantasy, Isekai]",[no demographic]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21851,47103,Xiao Feiji Ka Ka,0.0,0,0,0,0,0,tv,pg,...,104,0 days 00:07:20,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,[Kids],[no demographic]
21852,47359,Sai Ya Lixian Ji,0.0,0,0,0,0,0,tv,pg,...,20,0 days 00:12:20,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,[Kids],[no demographic]
21853,50431,Xyrin Empire,0.0,0,15507,301,0,0,ona,pg_13,...,0,,1970-01-01 00:00:00,1970-01-01 00:00:00,0,,,,"[Action, Fantasy]",[no demographic]
21854,50687,Pochi Hana,0.0,19,19506,42,13753,0,movie,g,...,1,0 days 00:03:53,2016-12-02 00:00:00,2016-12-02 00:00:00,2016,fall,,,[Slice of Life],[no demographic]


In [62]:
# export dataframes to Excel file

save_filepath = "MAL_anime_dataset/MAL_data_organize.xlsx"
writer = pd.ExcelWriter(save_filepath, engine='xlsxwriter')

anime_df.to_excel(writer, sheet_name='anime', index=False)
unaired_anime_df.to_excel(writer, sheet_name='unaired_anime', index=False)
genre_anime_df.to_excel(writer, sheet_name='genre_anime', index=False)
no_dupe_genre_df.to_excel(writer, sheet_name='no_dupe_genre', index=False)
studio_anime_df.to_excel(writer, sheet_name='studio_anime', index=False)
unpack_anime_df.to_excel(writer, sheet_name='unpack_anime', index=False)
repack_anime_df.to_excel(writer, sheet_name='repack_anime', index=False)

writer.close()