In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
anime_data = pd.read_csv("anime_cleaned.csv")
print("Number of Columns in Original Data: " + str(len(anime_data.columns)))
print("Number of Observations in Original Data: " + str(len(anime_data)))
anime_data.head()

Number of Columns in Original Data: 33
Number of Observations in Original Data: 6668


Unnamed: 0,anime_id,title,title_english,title_japanese,title_synonyms,image_url,type,source,episodes,status,...,broadcast,related,producer,licensor,studio,genre,opening_theme,ending_theme,duration_min,aired_from_year
0,11013,Inu x Boku SS,Inu X Boku Secret Service,妖狐×僕SS,Youko x Boku SS,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,12,Finished Airing,...,Fridays at Unknown,"{'Adaptation': [{'mal_id': 17207, 'type': 'man...","Aniplex, Square Enix, Mainichi Broadcasting Sy...",Sentai Filmworks,David Production,"Comedy, Supernatural, Romance, Shounen","['""Nirvana"" by MUCC']","['#1: ""Nirvana"" by MUCC (eps 1, 11-12)', '#2: ...",24.0,2012.0
1,2104,Seto no Hanayome,My Bride is a Mermaid,瀬戸の花嫁,The Inland Sea Bride,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,26,Finished Airing,...,Unknown,"{'Adaptation': [{'mal_id': 759, 'type': 'manga...","TV Tokyo, AIC, Square Enix, Sotsu",Funimation,Gonzo,"Comedy, Parody, Romance, School, Shounen","['""Romantic summer"" by SUN&LUNAR']","['#1: ""Ashita e no Hikari (明日への光)"" by Asuka Hi...",24.0,2007.0
2,5262,Shugo Chara!! Doki,Shugo Chara!! Doki,しゅごキャラ！！どきっ,"Shugo Chara Ninenme, Shugo Chara! Second Year",https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,51,Finished Airing,...,Unknown,"{'Adaptation': [{'mal_id': 101, 'type': 'manga...","TV Tokyo, Sotsu",,Satelight,"Comedy, Magic, School, Shoujo","['#1: ""Minna no Tamago (みんなのたまご)"" by Shugo Cha...","['#1: ""Rottara Rottara (ロッタラ ロッタラ)"" by Buono! ...",24.0,2008.0
3,721,Princess Tutu,Princess Tutu,プリンセスチュチュ,,https://myanimelist.cdn-dena.com/images/anime/...,TV,Original,38,Finished Airing,...,Fridays at Unknown,"{'Adaptation': [{'mal_id': 1581, 'type': 'mang...","Memory-Tech, GANSIS, Marvelous AQL",ADV Films,Hal Film Maker,"Comedy, Drama, Magic, Romance, Fantasy","['""Morning Grace"" by Ritsuko Okazaki']","['""Watashi No Ai Wa Chiisaikeredo"" by Ritsuko ...",16.0,2002.0
4,12365,Bakuman. 3rd Season,Bakuman.,バクマン。,Bakuman Season 3,https://myanimelist.cdn-dena.com/images/anime/...,TV,Manga,25,Finished Airing,...,Unknown,"{'Adaptation': [{'mal_id': 9711, 'type': 'mang...","NHK, Shueisha",,J.C.Staff,"Comedy, Drama, Romance, Shounen","['#1: ""Moshimo no Hanashi (もしもの話)"" by nano.RIP...","['#1: ""Pride on Everyday"" by Sphere (eps 1-13)...",24.0,2012.0


In [3]:
#Dropping unnecessary columns: 
anime_data = anime_data.drop(columns = ['rating', 'title_japanese','image_url', 'related', 'opening_theme', 'ending_theme'])
anime_data = anime_data.rename(columns = {'score':'rating'})
print("Number of Columns After Cleaning Data: " + str(len(anime_data.columns)))
print("Number of Observations After Cleaning Data: " + str(len(anime_data)))
anime_data.head()
#get rid of rating, rename score to rating

Number of Columns After Cleaning Data: 27
Number of Observations After Cleaning Data: 6668


0       7.63
1       7.89
2       7.55
3       8.21
4       8.67
        ... 
6663    9.52
6664    0.00
6665    4.20
6666    4.57
6667    7.00
Name: rating, Length: 6668, dtype: float64

## Analysis of Anime Source 

In [4]:
anime_data["source"].unique()

array(['Manga', 'Original', 'Light novel', '4-koma manga', 'Novel',
       'Visual novel', 'Other', 'Game', 'Picture book', 'Card game',
       'Web manga', 'Book', 'Music', 'Radio', 'Digital manga'],
      dtype=object)

In [5]:
anime_grouped_by_source = anime_data.groupby(by = "source")
list_of_sources = anime_grouped_by_source['source'].unique()

#Mean of Anime with Manga Source 
manga = anime_grouped_by_source.get_group('Manga')
manga_mean = manga['rating'].mean()
print("Average Rating of Anime that Was Adapted from Manga = {:.2f}".format(manga_mean))

#Mean Score of Original Anime 
original = anime_grouped_by_source.get_group("Original")
original_mean = original['rating'].mean()
print("Average Rating of Original Anime = {:.2f}".format(original_mean))

#Mean Score of Anime with Light Novel Source 
light_novel = anime_grouped_by_source.get_group("Light novel")
light_novel_mean = light_novel['rating'].mean()
print("Average Rating of Anime Adapted from Light Novel = {:.2f}".format(light_novel_mean))

Average Rating of Anime that Was Adapted from Manga = 7.12
Average Rating of Original Anime = 6.54
Average Rating of Anime Adapted from Light Novel = 7.25


## Analysis of The Number of Anime That Each Genre Has 

### Removing Whitespace in Genre Name in Original Data

In [6]:
anime_copy = anime_data['genre'].copy()
for anime in range(len(anime_copy)):
    anime_copy[anime] = str(anime_copy[anime]).split(",")
    for index in range(len(anime_copy[anime])):
        genre = anime_copy[anime][index]
        genre = genre.strip()
        anime_copy[anime][index] = genre

### Getting all the Unique Genre Values

In [7]:
genre_list = []
for observation in anime_data['genre']:
    tags = str(observation).split(",")
    for i in range(len(tags)):
        tags[i] = tags[i].strip()
        if tags[i] not in genre_list: 
            genre_list.append(tags[i])
print(genre_list)

['Comedy', 'Supernatural', 'Romance', 'Shounen', 'Parody', 'School', 'Magic', 'Shoujo', 'Drama', 'Fantasy', 'Kids', 'Action', 'Music', 'Slice of Life', 'Josei', 'Harem', 'Shounen Ai', 'Adventure', 'Super Power', 'Sci-Fi', 'Ecchi', 'Seinen', 'Martial Arts', 'Game', 'Sports', 'Demons', 'Historical', 'Horror', 'Mystery', 'Psychological', 'Vampire', 'Mecha', 'Military', 'Space', 'Samurai', 'Thriller', 'Hentai', 'Yaoi', 'Shoujo Ai', 'Police', 'Cars', 'Dementia', 'Yuri', 'nan']


### Creating Genre Count DataFrame

In [8]:
genre_count = pd.DataFrame(columns = ['genre', 'count'])
genre_count['genre'] = genre_list
genre_count['count'] = [0] * len(genre_list)
genre_count.head()

Unnamed: 0,genre,count
0,Comedy,0
1,Supernatural,0
2,Romance,0
3,Shounen,0
4,Parody,0


In [9]:
for observation in anime_copy:
    for genres in observation: 
        i = genre_list.index(genres)
        genre_count['count'][i] = genre_count['count'][i] + 1 
genre_count.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,genre,count
0,Comedy,3021
1,Supernatural,860
2,Romance,1177
3,Shounen,1182
4,Parody,267
