In [4]:
# importing libraries

import requests 
from bs4 import BeautifulSoup 
import pandas as pd

In [5]:
# Setting up website references and checking if the request succeeded

site_url = 'https://myanimelist.net'
response = requests.get(site_url)
response.status_code

200

In [6]:
# Creating a beautiful soup object and checking if it worked correctly

doc = BeautifulSoup(response.text)
type(doc)

bs4.BeautifulSoup

In [7]:
# Accessing the top anime list webpage

top_anime_url = site_url + '/topanime.php'
response = requests.get(top_anime_url)
doc = BeautifulSoup(response.text)

In [8]:
# Page Title

doc.title.text.strip()

'Top Anime - MyAnimeList.net'

In [9]:
# Finding all the column headers to use for future csv

headers = doc.find('tr', class_ = 'table-header')
headers.find_all('td') 

[<td class="rank">Rank</td>,
 <td class="title">Title</td>,
 <td class="score">Score</td>,
 <td class="your-score">Your Score</td>,
 <td class="status">Status</td>]

In [10]:
# Finding the content of each row and checking what is inside each of the returned objects

row_content = doc.find_all('tr', {'class' : "ranking-list"})
row_content[0]

<tr class="ranking-list">
<td class="rank ac" valign="top">
<span class="lightLink top-anime-rank-text rank1">1</span>
</td>
<td class="title al va-t word-break">
<a class="hoverinfo_trigger fl-l ml12 mr8" href="https://myanimelist.net/anime/5114/Fullmetal_Alchemist__Brotherhood" id="#area5114" rel="#info5114">
<img alt="Anime: Fullmetal Alchemist: Brotherhood" border="0" class="lazyload" data-src="https://cdn.myanimelist.net/r/50x70/images/anime/1223/96541.jpg?s=faffcb677a5eacd17bf761edd78bfb3f" data-srcset="https://cdn.myanimelist.net/r/50x70/images/anime/1223/96541.jpg?s=faffcb677a5eacd17bf761edd78bfb3f 1x, https://cdn.myanimelist.net/r/100x140/images/anime/1223/96541.jpg?s=0c3b98cf4905422c00981025cd20d271 2x" height="70" width="50"/>
</a>
<div class="detail"><div id="area5114">
<div class="hoverinfo" id="info5114" rel="a5114"></div>
</div>
<div class="di-ib clearfix"><h3 class="hoverinfo_trigger fl-l fs14 fw-b anime_ranking_h3"><a href="https://myanimelist.net/anime/5114/Fullmetal_

In [11]:
# Finding the rank of a specific row content

row_content[0].find('td', class_ = "rank ac").find('span').text

'1'

In [12]:
# Finding the show title of a specific row content

row_content[0].find('div', class_="di-ib clearfix").find('a').text

'Fullmetal Alchemist: Brotherhood'

In [13]:
# Finding the Score of a specific row content

row_content[0].find('td', class_="score ac fs14").find('span').text

'9.15'

In [14]:
# Finding the anime page URL of a specific row content

row_content[0].find('div', class_ ='di-ib clearfix').find('a')['href']

'https://myanimelist.net/anime/5114/Fullmetal_Alchemist__Brotherhood'

In [15]:
# Finding the image URL of a specific row content

row_content[0].find('td', class_ ='title al va-t word-break').find('img')['data-src']

'https://cdn.myanimelist.net/r/50x70/images/anime/1223/96541.jpg?s=faffcb677a5eacd17bf761edd78bfb3f'

In [16]:
# Finding the episodes and release date

var = row_content[0].find('div', class_ = "information di-ib mt4").text.strip().split("\n")
var

['TV (64 eps)', '        Apr 2009 - Jul 2010', '        2,803,104 members']

In [17]:
# Extracting only the first two entries of the list called var from the last line

def parse_episodes(info):
    result = []
    for i in info[:2]:
        r = i.strip()
        result.append(r)
    return result

In [18]:
# Returns a list containing the type of anime, number of episodes, and release date

parse_episodes(var)

['TV (64 eps)', 'Apr 2009 - Jul 2010']

In [19]:
# Creating a dictionary for each of the anime entries that contain their rank, title, rating, anime information page URL,
# image URL, Number of episodes, and release date, then adding them to a list

top_anime = []
for row in row_content:
    episode = parse_episodes(row.find('div', class_ = "information di-ib mt4").text.strip().split('\n'))
    ranking = {
        'Rank' : row.find('td', class_ = "rank ac").find('span').text,
        'Title': row.find('div', class_="di-ib clearfix").find('a').text,
        'Rating': row.find('td', class_="score ac fs14").find('span').text,
        'Anime_Page_URL': row_content[0].find('div', class_ ='di-ib clearfix').find('a')['href'],
        'Image_URL': row.find('td', class_ ='title al va-t word-break').find('img')['data-src'],
        'Episodes': episode[0],
        'Dates': episode[1]
    }
    top_anime.append(ranking)

In [20]:
# Creating a method that creates csv that contains all of the information in the list created from the list created before

def write_csv(items, path):
    # Open the file in write mode
    with open(path, 'w', encoding = 'utf-8') as f:
        # Return if there's nothing to write
        if len(items) == 0:
            return
        
        # Write the headers in the first line
        headers = list(items[0].keys())
        f.write(','.join(headers) + '\n')
        
        # Write one item per line
        for item in items:
            values = []
            for header in headers:
                values.append(str(item.get(header, "")))
            f.write(','.join(values) + "\n")

In [21]:
# Executing the method to create a csv

write_csv(top_anime, 'top_anime.csv')