In [190]:
# Import libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [191]:
# Creating a dictionary that we can use to create a specific URL depending on which genre we want to access

genre_dict = {
    'Action': '1',
    'Adventure':'2',
    'Cars':'3',
    'Comedy':'4',
    'Dementia':'5',
    'Demons':'6',
    'Drama':'8',
    'Ecchi':'9',
    'Fantasy':'10',
    'Game':'11',
    'Historical':'13',
    'Horror':'14',
    'Kids':'15',
    'Harem':'35',
    'Josei':'43'
}

In [192]:
# Setting up website references

site_url = 'https://myanimelist.net/anime/genre/'

# Target website follows the pattern: https://myanimelist.net/anime/genre/1/Action

genre = 'Action'
genre_url = site_url + genre_dict[genre]+ '/'+genre 
genre_url

'https://myanimelist.net/anime/genre/1/Action'

In [193]:
# Creating a method that creates the desired URL, checks whether the request succeeds, and then 
# returns a Beautifoul Soup object containing the parsed HTML of all anime in the specific genre entered

def get_topic_page(genre):
    final_url = site_url + genre_dict[genre]+ '/' + genre
    response = requests.get(final_url)
    if response.status_code != 200:
        print('Status code:', response.status_code)
        raise Exception('Failed to fetch web page ' + final_url)
    return BeautifulSoup(response.text)  

In [194]:
# An object containing all of the anime in the 'Adventure' genre

doc = get_topic_page('Adventure')

In [195]:
# Finding all of the titles and then creating a list of all the titles

titles = doc.find_all('h2', class_ = 'h2_anime_title')

titles_list = []
for title in titles:
    name = title.find('a').text
    titles_list.append(name)
    


In [204]:
# Finding the anime information URLs and then creating a list of the anime information URLs

infos = doc.find_all('h2', class_ = 'h2_anime_title')

infos_list = []
for info in infos:
    i = info.find('a')['href']
    infos_list.append(i)
    

['https://myanimelist.net/anime/5114/Fullmetal_Alchemist__Brotherhood',
 'https://myanimelist.net/anime/11757/Sword_Art_Online',
 'https://myanimelist.net/anime/20/Naruto',
 'https://myanimelist.net/anime/11061/Hunter_x_Hunter_2011',
 'https://myanimelist.net/anime/19815/No_Game_No_Life',
 'https://myanimelist.net/anime/1735/Naruto__Shippuuden',
 'https://myanimelist.net/anime/20507/Noragami',
 'https://myanimelist.net/anime/21/One_Piece',
 'https://myanimelist.net/anime/22199/Akame_ga_Kill',
 'https://myanimelist.net/anime/23755/Nanatsu_no_Taizai',
 'https://myanimelist.net/anime/21881/Sword_Art_Online_II',
 'https://myanimelist.net/anime/30831/Kono_Subarashii_Sekai_ni_Shukufuku_wo',
 'https://myanimelist.net/anime/269/Bleach',
 'https://myanimelist.net/anime/1/Cowboy_Bebop',
 'https://myanimelist.net/anime/199/Sen_to_Chihiro_no_Kamikakushi',
 'https://myanimelist.net/anime/6702/Fairy_Tail',
 'https://myanimelist.net/anime/2001/Tengen_Toppa_Gurren_Lagann',
 'https://myanimelist.net/an

In [218]:
# Finding the anime scores and then creating a list of the anime scores

ratings = doc.find_all('div', class_= 'scormem')

ratings_list = []
for rating in ratings:
    r = rating.get_text().strip().split('\n')
    m = r[0]
    m = float(m)
    ratings_list.append(m)
    
type(ratings_list[0])

float

In [215]:
# Finding the number of community members and then creating a list of the number of community members

members = doc.find_all('div', class_= 'scormem')

def convert_to_number(num):
    result = 0
    num_conversion = {'K':1000, 'M':1000000, 'B':1000000000}
    if num.isdigit():
        result = int(num)
    else:
        if len(num) > 1:
            result = float(num[:-1]) * num_conversion.get(num[-1].upper(), 1)
    return int(result)

members_list =[]
for member in members:
        m = member.get_text().strip().split('\n')
        m = m[3]
        m = convert_to_number(m)
        members_list.append(m)

int

In [199]:
# Finding all of the producers for each anime and then creating a list of producers for each anime

producers = doc.find_all('div', class_ ='properties')

producers_list = []
for producer in producers:
    studios_list = []
    tags = producer.find('div', class_='property').find_all('span', class_='item')
    for tag in tags:
        studio = tag.find('a').text
        studios_list.append(studio)
    producers_list.append(studios_list)
    


In [200]:
# Finding all of the numbers of episodes for each anime and then creating a list of number of episodes

episodes = doc.find_all('div', class_='prodsrc')

episodes_list = []
for episode in episodes:
    e = episode.find_all('span', class_='item', limit=3)
    e = e[2].find_all('span')
    e = e[0].text
    episodes_list.append(e)
    

In [205]:
column_names = ['Title', 'Producer', 'Episodes', 'Rating', 'Members','Animation Information Url']
current_list = list(zip(titles_list, producers_list, episodes_list, ratings_list, members_list, infos_list))
df = pd.DataFrame(current_list, columns = column_names)

In [208]:
df

Unnamed: 0,Title,Producer,Episodes,Rating,Members,Animation Information Url
0,Fullmetal Alchemist: Brotherhood,[Bones],64 eps,9.15,2.8M,https://myanimelist.net/anime/5114/Fullmetal_A...
1,Sword Art Online,[A-1 Pictures],25 eps,7.20,2.7M,https://myanimelist.net/anime/11757/Sword_Art_...
2,Naruto,[Studio Pierrot],220 eps,7.96,2.4M,https://myanimelist.net/anime/20/Naruto
3,Hunter x Hunter (2011),[Madhouse],148 eps,9.05,2.3M,https://myanimelist.net/anime/11061/Hunter_x_H...
4,No Game No Life,[Madhouse],12 eps,8.12,2.1M,https://myanimelist.net/anime/19815/No_Game_No...
...,...,...,...,...,...,...
95,Sora yori mo Tooi Basho,[Madhouse],13 eps,8.54,424K,https://myanimelist.net/anime/35839/Sora_yori_...
96,Maoyuu Maou Yuusha,[Arms],12 eps,7.26,423K,https://myanimelist.net/anime/14833/Maoyuu_Mao...
97,Magi: Sinbad no Bouken (TV),[Lay-duce],13 eps,7.86,422K,https://myanimelist.net/anime/31741/Magi__Sinb...
98,Fairy Tail: Final Series,"[A-1 Pictures, Bridge, CloverWorks]",51 eps,7.55,419K,https://myanimelist.net/anime/35972/Fairy_Tail...
