#### Project Idea:
primary objective is to make a recommendation system to find the next anime to watch.
<br>
for now, I'm using my own myanimelist and using both "completed" and "plan to watch" data.
<br>
for now the features that are available are just the genres in the myanimelist site.

In [1]:
# loading the dataset:
from MAL_extractor import MAL_extractor
mal_user = MAL_extractor(user_name='Aiush') # user_name is the username of the user on MAL

In [2]:
result = mal_user.recursive_request()

In [3]:
result # this is a generator, so we need to iterate over it to get the data.
# there's already a function that does this, I'm using generators because I'm used to use
# generators to get data from the internet.

<generator object MAL_extractor.recursive_request at 0x7ff0e83eb7d0>

In [4]:
# the get_data function is a generator that will yield the data from the request.
# It's not necessary to run the recursive_request first.
# Reminder this is is NOT the refined data.
result = mal_user.get_data()

In [5]:
len(result) # https://myanimelist.net/animelist/Aiush same len as the completed animes.

265

In [6]:
type(result) # its a list with dicts.

list

In [7]:
result[0] # example of one dict.

{'status': 2,
 'score': 5,
 'tags': '',
 'is_rewatching': 0,
 'num_watched_episodes': 12,
 'created_at': 1608610844,
 'updated_at': 1608610847,
 'anime_title': '100-man no Inochi no Ue ni Ore wa Tatteiru',
 'anime_title_eng': "I'm Standing on a Million Lives",
 'anime_num_episodes': 12,
 'anime_airing_status': 2,
 'anime_id': 41380,
 'anime_studios': None,
 'anime_licensors': None,
 'anime_season': None,
 'anime_total_members': 260122,
 'anime_total_scores': 132036,
 'anime_score_val': 6.5,
 'has_episode_video': True,
 'has_promotion_video': True,
 'has_video': True,
 'video_url': '/anime/41380/100-man_no_Inochi_no_Ue_ni_Ore_wa_Tatteiru/video',
 'genres': [{'id': 1, 'name': 'Action'},
  {'id': 8, 'name': 'Drama'},
  {'id': 10, 'name': 'Fantasy'}],
 'demographics': [{'id': 27, 'name': 'Shounen'}],
 'title_localized': None,
 'anime_url': '/anime/41380/100-man_no_Inochi_no_Ue_ni_Ore_wa_Tatteiru',
 'anime_image_path': 'https://cdn.myanimelist.net/r/192x272/images/anime/1506/117717.jpg?s=5f

In [8]:
# Refined data
result = mal_user.refined_data()

In [9]:
result[0]

{'score': 5,
 'anime_title': '100-man no Inochi no Ue ni Ore wa Tatteiru',
 'anime_title_eng': "I'm Standing on a Million Lives",
 'anime_season': None,
 'anime_score_val': 6.5,
 'genres': ['Action', 'Drama', 'Fantasy'],
 'demographics': ['Shounen'],
 'anime_url': '/anime/41380/100-man_no_Inochi_no_Ue_ni_Ore_wa_Tatteiru',
 'anime_media_type_string': 'TV',
 'anime_mpaa_rating_string': 'PG-13'}

In [10]:
# to work around that refined data, I'm using the pandas library.
import pandas as pd

In [11]:
df = pd.DataFrame(result)

In [12]:
df.head(5)

Unnamed: 0,score,anime_title,anime_title_eng,anime_season,anime_score_val,genres,demographics,anime_url,anime_media_type_string,anime_mpaa_rating_string
0,5,100-man no Inochi no Ue ni Ore wa Tatteiru,I'm Standing on a Million Lives,,6.5,"[Action, Drama, Fantasy]",[Shounen],/anime/41380/100-man_no_Inochi_no_Ue_ni_Ore_wa...,TV,PG-13
1,8,86,86 Eighty Six,,8.27,"[Action, Drama, Sci-Fi]",[],/anime/41457/86,TV,R
2,8,86 Part 2,86 Eighty-Six Part 2,,8.71,"[Action, Drama, Sci-Fi]",[],/anime/48569/86_Part_2,TV,R
3,6,Accel World,Accel World,,7.23,"[Action, Romance, Sci-Fi]",[],/anime/11759/Accel_World,TV,PG-13
4,8,Akame ga Kill!,Akame ga Kill!,,7.47,"[Action, Fantasy]",[Shounen],/anime/22199/Akame_ga_Kill,TV,R


In [13]:
# the first step here is to "expand" the genres column. So we can have hot encoded columns for each genre.

array(['Action', 'Drama', 'Fantasy', 'Sci-Fi', 'Romance', 'Adventure',
       'Supernatural', 'Horror', 'Mystery', 'Comedy', 'Suspense',
       'Sports', 'Award Winning', 'Ecchi', 'Slice of Life', 'Girls Love',
       'Avant Garde'], dtype=object)

In [20]:
df = df.join(pd.crosstab((s:=df['genres'].explode()).index, s))

In [26]:
# showing the created columns

In [22]:
df.columns

Index(['score', 'anime_title', 'anime_title_eng', 'anime_season',
       'anime_score_val', 'genres', 'demographics', 'anime_url',
       'anime_media_type_string', 'anime_mpaa_rating_string', 'Action',
       'Adventure', 'Avant Garde', 'Award Winning', 'Comedy', 'Drama', 'Ecchi',
       'Fantasy', 'Girls Love', 'Horror', 'Mystery', 'Romance', 'Sci-Fi',
       'Slice of Life', 'Sports', 'Supernatural', 'Suspense'],
      dtype='object')

In [25]:
df[df['genres'].explode().unique()]

Unnamed: 0,Action,Drama,Fantasy,Sci-Fi,Romance,Adventure,Supernatural,Horror,Mystery,Comedy,Suspense,Sports,Award Winning,Ecchi,Slice of Life,Girls Love,Avant Garde
0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0
261,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0
262,1,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0
263,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [None]:
# from what ive been looking and training the data, those columns are really not useful for classifying which anime to watch next.
# this is where i started to think about using the tags from anilist.

In [27]:
from AniList_extractor import AniList

In [30]:
AniL = AniList({'search':'Ijiranaide, Nagatoro-san'})

In [31]:
AniListResult = AniL.get_json()

In [None]:
# just a example getting one anime from anilist.
# there's way more information in genres and tags which might be way more useful for a model.

In [32]:
AniListResult

{'data': {'Page': {'pageInfo': {'total': 2,
    'currentPage': 1,
    'lastPage': 1,
    'hasNextPage': False,
    'perPage': 10},
   'media': [{'genres': ['Comedy', 'Romance', 'Slice of Life'],
     'id': 120697,
     'idMal': 42361,
     'status': 'FINISHED',
     'seasonInt': 212,
     'averageScore': 71,
     'tags': [{'name': 'Slapstick', 'rank': 92, 'category': 'Theme-Comedy'},
      {'name': 'Bullying', 'rank': 88, 'category': 'Theme-Drama'},
      {'name': 'School', 'rank': 86, 'category': 'Setting-Scene'},
      {'name': 'Tsundere', 'rank': 85, 'category': 'Cast-Traits'},
      {'name': 'Episodic', 'rank': 84, 'category': 'Technical'},
      {'name': 'Heterosexual', 'rank': 84, 'category': 'Theme-Romance'},
      {'name': 'Tanned Skin', 'rank': 80, 'category': 'Cast-Traits'},
      {'name': 'Male Protagonist', 'rank': 79, 'category': 'Cast-Main Cast'},
      {'name': 'Primarily Teen Cast',
       'rank': 78,
       'category': 'Cast-Main Cast'},
      {'name': 'School Club', '

In [None]:
# from here its the time to do some exploration and define the target. I'll create another notebook other time exploring models.