# MyAnimeList (MAL) data scraping

**This file aims to scrape anime data from MAL for the year 2023 and 2024 (till summer 2024)**

In [1]:
import requests
from bs4 import BeautifulSoup


In [2]:
site_url = 'https://myanimelist.net'
response = requests.get(site_url)
print(response.status_code)

200


In [3]:
site_url = 'https://myanimelist.net/anime/season/2024/winter'
def get_topic_page():
    final_url = site_url
    response = requests.get(final_url)
    if response.status_code != 200:
        print('Status code:', response.status_code)
        raise Exception('Failed to fetch web page ' + final_url)
    return BeautifulSoup(response.text)

In [None]:
doc = get_topic_page()

In [None]:
doc.title.text.strip()

'Winter 2024 - Anime - MyAnimeList.net'

## Getting the Titles

In [None]:
doc.find('h2', class_ = 'h2_anime_title').find('a').text

'Ore dake Level Up na Ken'

In [None]:
titles = doc.find_all('h2', class_ = 'h2_anime_title')
list_titles = []
for title in titles:
    name = title.find('a').text
    list_titles.append(name)

len(list_titles)

230

In [None]:
list_titles[:5]

['Ore dake Level Up na Ken',
 'Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e 3rd Season',
 'Mashle: Shinkakusha Kouho Senbatsu Shiken-hen',
 'Dungeon Meshi',
 'Tsuki ga Michibiku Isekai Douchuu 2nd Season']

## Getting the Studio names

In [None]:
# Scraping studios
properties_divs = doc.find_all('div', class_='properties')

list_studio = []
missing_studio_titles = []

for properties in properties_divs:
    studio_names = []
    property_divs = properties.find_all('div', class_='property')

    for prop in property_divs:
        caption = prop.find('span', class_='caption')
        if caption and caption.text.strip() in ['Studio', 'Studios']:
            studio_spans = prop.find_all('span', class_='item')
            for studio_span in studio_spans:
                studio_link = studio_span.find('a')
                if studio_link:
                    studio_names.append(studio_link.text.strip())

    if studio_names:
        list_studio.append(", ".join(studio_names))
    else:
        list_studio.append("Unknown")
        # Find the corresponding title and add it to the missing_studio_titles list
        index = len(list_studio) - 1
        missing_studio_titles.append(list_titles[index])

print("Number of studios scraped:", len(list_studio))
print("Number of titles without studio names:", len(missing_studio_titles))

# Print titles without studio names
if missing_studio_titles:
    print("Titles without studio names:")
    for title in missing_studio_titles:
        print(title)

# Check if the number of titles matches the number of studios
if len(list_titles) != len(list_studio):
    print("Mismatch between number of titles and studios.")


Number of studios scraped: 230
Number of titles without studio names: 55
Titles without studio names:
Enjou Bokumetsu! Mahou Shoujo Aiko
Colorful na Everyday
Magical Fang
Balala Xiao Mo Xian: Xing Yuan Die Qi 2
Larva in Mars
Xin Xiyou Lixian Ji
Dinoster: Gonglyongsuhodae 2nd Season
DoReMi Friends
Shikaru Neko
Kamiusagi Rope: Warau Asa ni wa Fukuraitaru tte Maji ssuka!?
Manul no Yuube
Yowamushi Monsters
Otoppe
Pakkororin
Reizouko no Tsukenosuke!
Shin Nippon History
Cabeon
Dino Powers 2
Youkoso Ninchishou Sekai e
Hello Carbot Season 14
Geomeongsupeul Jikyeola! Birdy Friends
Mythteria 2nd Season
Kemonokko Tsuushin: The Animation
Yi Ren Zhi Xia: Xiu Tie Chong Xian
Haramaseya The Animation
Wangzhe Rongyao: Rongyao Zhi Zhang
Yishi Zhi Zun
Bu Xing Si: Yuan Qi
Xiao Lu He Xiao Lan 5th Season
Shirarezaru "Momowarou Monogatari"
Yin Shizong Men Zhang Jiao
Bai Jia Jue Zhi: Jianghu Gui Shi Lu
Fei Ren Zai Spring Festival Special
Huo Shen: Tianqi Zhizi
Dixia Cheng Yu Yongshi: Po Jie Shaonu
Rebirth Cho

In [None]:
list_studio[:5]

['A-1 Pictures', 'Lerche', 'A-1 Pictures', 'Trigger', 'J.C.Staff']

In [None]:
list_studio.count('Unknown')

55

## Getting the Theme and Source names

In [None]:
# Scraping themes and sources
properties_divs = doc.find_all('div', class_='properties')

list_themes = []
list_sources = []
missing_themes_titles = []
missing_sources_titles = []

for properties in properties_divs:
    theme_names = []
    source_name = None  # Variable to store source name
    property_divs = properties.find_all('div', class_='property')

    for prop in property_divs:
        caption = prop.find('span', class_='caption')
        if caption:
            if caption.text.strip() in ['Theme', 'Themes']:
                theme_spans = prop.find_all('span', class_='item')
                for theme_span in theme_spans:
                    theme_link = theme_span.find('a')
                    if theme_link:
                        theme_names.append(theme_link.text.strip())
            elif caption.text.strip() == 'Source':
                source_span = prop.find('span', class_='item')
                if source_span:
                    source_name = source_span.text.strip()

    if theme_names:
        list_themes.append(", ".join(theme_names))
    else:
        list_themes.append("Unknown")
        # Find the corresponding title and add it to the missing_themes_titles list
        index = len(list_themes) - 1
        missing_themes_titles.append(list_titles[index])

    if source_name:
        list_sources.append(source_name)
    else:
        list_sources.append("Unknown")
        # Find the corresponding title and add it to the missing_sources_titles list
        index = len(list_sources) - 1
        missing_sources_titles.append(list_titles[index])

print("Number of themes scraped:", len(list_themes))
print("Number of sources scraped:", len(list_sources))
print("Number of titles without theme names:", len(missing_themes_titles))
print("Number of titles without source names:", len(missing_sources_titles))

# Print titles without theme names
if missing_themes_titles:
    print("Titles without theme names:")
    for title in missing_themes_titles:
        print(title)
print('--------------------------------------------------')
# Print titles without source names
if missing_sources_titles:
    print("Titles without source names:")
    for title in missing_sources_titles:
        print(title)

# Check if the number of titles matches the number of themes and sources
if len(list_titles) != len(list_themes) or len(list_titles) != len(list_sources):
    print("Mismatch between number of titles, themes, and sources.")


Number of themes scraped: 230
Number of sources scraped: 230
Number of titles without theme names: 97
Number of titles without source names: 6
Titles without theme names:
Dungeon Meshi
Ninja Kamui
Nozomanu Fushi no Boukensha
Majo to Yajuu
Shin no Nakama ja Nai to Yuusha no Party wo Oidasareta node, Henkyou de Slow Life suru Koto ni Shimashita 2nd
Metallic Rouge
Saikyou Tank no Meikyuu Kouryaku: Tairyoku 9999 no Rare Skill-mochi Tank, Yuusha Party wo Tsuihou sareru
Himesama "Goumon" no Jikan desu
Urusei Yatsura (2022) 2nd Season
Hikari no Ou 2nd Season
Yami Shibai 12
Chou Futsuu Ken Chiba Densetsu
Shinobanai! Crypto Ninja Sakuya: Ni no Maki
Sai-Kyo-Oh! Zukan: The Ultimate Battles
Sakuretsu! Amabie-hime. Season 3
Magical Fang
One Piece
Sousou no Frieren
Ragna Crimson
Nanatsu no Taizai: Mokushiroku no Yonkishi
Pokemon (2023)
Gudetama
Sazae-san
Nintama Rantarou
Sore Ike! Anpanman
Chiikawa
Beyblade X
Ninjala (TV)
Shikaru Neko
Ojarumaru
Fushigi Dagashiya: Zenitendou
Kamiusagi Rope: Warau Asa

In [None]:
list_themes.count('Unknown')

97

In [None]:
list_sources.count('Unknown')

6

## Getting the Episode count

In [None]:
# Extracting number of episodes
list_episodes = []
missing_episodes_titles = []

# Find all 'info' divs that contain episode information
info_divs = doc.find_all('div', class_='info')

for info in info_divs:
    # Find 'span' elements with 'ep' or 'eps'
    episode_span = info.find('span', string=lambda x: x and ('ep' in x or 'eps' in x))
    if episode_span:
        # Store the entire text, including 'ep' or 'eps'
        episode_text = episode_span.get_text(strip=True)
        list_episodes.append(episode_text)
    else:
        list_episodes.append("Unknown")
        # Find the corresponding title and add it to the missing_episodes_titles list
        index = len(list_episodes) - 1
        if index < len(list_titles):
            missing_episodes_titles.append(list_titles[index])

print("Number of anime with episodes scraped:", len(list_episodes))
print("Number of titles without episode information:", len(missing_episodes_titles))

# Print titles without episode information
if missing_episodes_titles:
    print("Titles without episode information:")
    for title in missing_episodes_titles:
        print(title)

Number of anime with episodes scraped: 230
Number of titles without episode information: 0


In [None]:
list_episodes[:5]

['12 eps', '13 eps', '12 eps', '24 eps', '25 eps']

## Geting the Ratings and Member count

In [None]:
# Initialize lists
list_ratings = []
list_members = []
missing_ratings_titles = []
missing_members_titles = []

info_divs = doc.find_all('div', class_='scormem-container')

for info in info_divs:
    # Extract rating
    rating_div = info.find('div', class_='score')
    if rating_div:
        rating_text = rating_div.get_text(strip=True).replace('Score', '').strip()
        list_ratings.append(rating_text)
    else:
        list_ratings.append("Unknown")
        # Assuming each info_div corresponds to a title (adjust if needed)
        missing_ratings_titles.append("Unknown Title")

    # Extract members
    members_div = info.find('div', class_='member')
    if members_div:
        members_text = members_div.get_text(strip=True).replace('Members', '').strip()
        list_members.append(members_text)
    else:
        list_members.append("Unknown")
        # Assuming each info_div corresponds to a title (adjust if needed)
        missing_members_titles.append("Unknown Title")

print("Number of ratings scraped:", len(list_ratings))
print("Number of members scraped:", len(list_members))
print("Number of titles without rating information:", len(missing_ratings_titles))
print("Number of titles without member information:", len(missing_members_titles))


Number of ratings scraped: 230
Number of members scraped: 230
Number of titles without rating information: 0
Number of titles without member information: 0


In [None]:
list_ratings[:5], list_members[:5]


(['8.29', '7.97', '7.83', '8.61', '7.83'],
 ['712K', '389K', '350K', '331K', '239K'])

## Getting the Image URLs

In [None]:
list_images = []

# Find all image divs
image_divs = doc.find_all('div', class_='image')

for img_div in image_divs:
    # Find the img tag
    img_tag = img_div.find('img')
    if img_tag:
        # Extract URL from either src or data-src attribute
        img_url = img_tag.get('src') or img_tag.get('data-src')
        if img_url:
            list_images.append(img_url)

print("Number of image URLs scraped:", len(list_images))
list_images[:5]

Number of image URLs scraped: 230


['https://cdn.myanimelist.net/images/anime/1801/142390.jpg',
 'https://cdn.myanimelist.net/images/anime/1332/139318.jpg',
 'https://cdn.myanimelist.net/images/anime/1912/140804.jpg',
 'https://cdn.myanimelist.net/images/anime/1711/142478.jpg',
 'https://cdn.myanimelist.net/images/anime/1794/142621.jpg']

## Getting the anime release-date

In [None]:
list_dates = []

# Find all 'info' divs that contain date information
info_divs = doc.find_all('div', class_='info')

for info in info_divs:
    # Find all 'span' elements with class 'item'
    date_spans = info.find_all('span', class_='item')
    for date_span in date_spans:
        date_text = date_span.get_text(strip=True)
        # Check if it looks like a date (e.g., not containing 'eps' or 'ep')
        if 'eps' not in date_text and 'ep' not in date_text:
            list_dates.append(date_text)
            break  # Assume only one date per info div
    else:
        list_dates.append("Unknown")

print(len(list_dates))
print(list_dates[:5])


230
['Jan 7, 2024', 'Jan 3, 2024', 'Jan 6, 2024', 'Jan 4, 2024', 'Jan 8, 2024']


## Converting the data into dataframe

In [None]:
import pandas as pd
import numpy as np

In [None]:
column_names = ['Title', 'Theme', 'Studio', 'Source', 'Episodes', 'Rating', 'Members', 'Date', 'Img_url']
df_winter_24 = pd.DataFrame(list(zip(list_titles, list_themes, list_studio, list_sources, list_episodes, list_ratings, list_members, list_dates, list_images)), columns = column_names)

## Combining all into one function

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def scrape_anime_data(site_url):
    def get_topic_page(url):
        response = requests.get(url)
        if response.status_code != 200:
            print('Status code:', response.status_code)
            raise Exception('Failed to fetch web page ' + url)
        return BeautifulSoup(response.text, 'html.parser')

    doc = get_topic_page(site_url)

    # Extract titles
    titles = doc.find_all('h2', class_='h2_anime_title')
    list_titles = [title.find('a').text for title in titles]

    # Extract studios
    properties_divs = doc.find_all('div', class_='properties')
    list_studio = []
    missing_studio_titles = []

    for properties in properties_divs:
        studio_names = []
        property_divs = properties.find_all('div', class_='property')
        for prop in property_divs:
            caption = prop.find('span', class_='caption')
            if caption and caption.text.strip() in ['Studio', 'Studios']:
                studio_spans = prop.find_all('span', class_='item')
                for studio_span in studio_spans:
                    studio_link = studio_span.find('a')
                    if studio_link:
                        studio_names.append(studio_link.text.strip())
        if studio_names:
            list_studio.append(", ".join(studio_names))
        else:
            list_studio.append("Unknown")
            index = len(list_studio) - 1
            missing_studio_titles.append(list_titles[index])

    # Extract themes and sources
    list_themes = []
    list_sources = []
    missing_themes_titles = []
    missing_sources_titles = []

    for properties in properties_divs:
        theme_names = []
        source_name = None
        property_divs = properties.find_all('div', class_='property')
        for prop in property_divs:
            caption = prop.find('span', class_='caption')
            if caption:
                if caption.text.strip() in ['Theme', 'Themes']:
                    theme_spans = prop.find_all('span', class_='item')
                    for theme_span in theme_spans:
                        theme_link = theme_span.find('a')
                        if theme_link:
                            theme_names.append(theme_link.text.strip())
                elif caption.text.strip() == 'Source':
                    source_span = prop.find('span', class_='item')
                    if source_span:
                        source_name = source_span.text.strip()
        if theme_names:
            list_themes.append(", ".join(theme_names))
        else:
            list_themes.append("Unknown")
            index = len(list_themes) - 1
            missing_themes_titles.append(list_titles[index])
        if source_name:
            list_sources.append(source_name)
        else:
            list_sources.append("Unknown")
            index = len(list_sources) - 1
            missing_sources_titles.append(list_titles[index])

    # Extract episodes and dates
    list_episodes = []
    list_dates = []
    missing_episodes_titles = []
    missing_dates_titles = []

    info_divs = doc.find_all('div', class_='info')
    for info in info_divs:
        spans = info.find_all('span', class_='item')
        episode_text = None
        date_text = None

        for span in spans:
            text = span.get_text(strip=True)
            # Check if the text contains episode information
            if 'ep' in text or 'eps' in text:
                # Extract only the episode count (before any comma or other non-numeric text)
                match = re.match(r'(\d+)\s*ep', text, re.IGNORECASE)
                if match:
                    episode_text = match.group(1) + ' eps'
                else:
                    episode_text = text.split(',')[0].strip()
            elif re.match(r'\w{3} \d{1,2}, \d{4}', text):
                date_text = text

        if episode_text:
            list_episodes.append(episode_text)
        else:
            list_episodes.append("Unknown")
            index = len(list_episodes) - 1
            if index < len(list_titles):
                missing_episodes_titles.append(list_titles[index])

        if date_text:
            list_dates.append(date_text)
        else:
            list_dates.append("Unknown")
            index = len(list_dates) - 1
            if index < len(list_titles):
                missing_dates_titles.append(list_titles[index])

    # Extract ratings and member counts
    list_ratings = []
    list_members = []
    missing_ratings_titles = []
    missing_members_titles = []

    info_divs = doc.find_all('div', class_='scormem-container')
    for info in info_divs:
        rating_div = info.find('div', class_='score')
        if rating_div:
            rating_text = rating_div.get_text(strip=True).replace('Score', '').strip()
            list_ratings.append(rating_text)
        else:
            list_ratings.append("Unknown")
            missing_ratings_titles.append("Unknown Title")

        members_div = info.find('div', class_='member')
        if members_div:
            members_text = members_div.get_text(strip=True).replace('Members', '').strip()
            list_members.append(members_text)
        else:
            list_members.append("Unknown")
            missing_members_titles.append("Unknown Title")

    # Extract image URLs
    list_images = []
    image_divs = doc.find_all('div', class_='image')
    for img_div in image_divs:
        img_tag = img_div.find('img')
        if img_tag:
            img_url = img_tag.get('src') or img_tag.get('data-src')
            if img_url:
                list_images.append(img_url)

    # Create DataFrame
    column_names = ['Title', 'Theme', 'Studio', 'Source', 'Episodes', 'Rating', 'Members', 'Date', 'Img_url']
    df = pd.DataFrame(list(zip(list_titles, list_themes, list_studio, list_sources, list_episodes, list_ratings, list_members, list_dates, list_images)), columns=column_names)

    return df



### Winter-2024 anime

In [17]:
site_url = 'https://myanimelist.net/anime/season/2024/winter'
df_winter_24 = scrape_anime_data(site_url)
df_winter_24.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Ore dake Level Up na Ken,Adult Cast,A-1 Pictures,Web manga,12 eps,8.29,713K,"Jan 7, 2024",https://cdn.myanimelist.net/images/anime/1801/...
1,Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu ...,"Psychological, School",Lerche,Light novel,13 eps,7.97,389K,"Jan 3, 2024",https://cdn.myanimelist.net/images/anime/1332/...
2,Mashle: Shinkakusha Kouho Senbatsu Shiken-hen,"Gag Humor, Parody, School",A-1 Pictures,Manga,12 eps,7.83,350K,"Jan 6, 2024",https://cdn.myanimelist.net/images/anime/1912/...
3,Dungeon Meshi,Unknown,Trigger,Manga,24 eps,8.61,332K,"Jan 4, 2024",https://cdn.myanimelist.net/images/anime/1711/...
4,Tsuki ga Michibiku Isekai Douchuu 2nd Season,Isekai,J.C.Staff,Light novel,25 eps,7.83,239K,"Jan 8, 2024",https://cdn.myanimelist.net/images/anime/1794/...


### Spring-2024 anime

In [18]:
site_url = 'https://myanimelist.net/anime/season/2024/spring'
df_spring_24 = scrape_anime_data(site_url)
df_spring_24.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Kimetsu no Yaiba: Hashira Geiko-hen,Historical,ufotable,Manga,8 eps,8.17,468K,"May 12, 2024",https://cdn.myanimelist.net/images/anime/1565/...
1,Kaijuu 8-gou,"Adult Cast, Military",Production I.G,Manga,12 eps,8.34,415K,"Apr 13, 2024",https://cdn.myanimelist.net/images/anime/1370/...
2,Kono Subarashii Sekai ni Shukufuku wo! 3,"Isekai, Parody",Drive,Light novel,11 eps,8.4,413K,"Apr 10, 2024",https://cdn.myanimelist.net/images/anime/1758/...
3,Mushoku Tensei II: Isekai Ittara Honki Dasu Pa...,"Isekai, Reincarnation",Studio Bind,Light novel,12 eps,8.47,376K,"Apr 8, 2024",https://cdn.myanimelist.net/images/anime/1876/...
4,Tensei shitara Slime Datta Ken 3rd Season,"Isekai, Reincarnation",8bit,Manga,24 eps,7.85,316K,"Apr 5, 2024",https://cdn.myanimelist.net/images/anime/1211/...


In [19]:
df_spring_24.shape

(228, 9)

### Summer-2024 anime

In [20]:
site_url = 'https://myanimelist.net/anime/season/2024/summer'
df_summer_24 = scrape_anime_data(site_url)
df_summer_24.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,"""Oshi no Ko"" 2nd Season","Reincarnation, Showbiz",Doga Kobo,Manga,13 eps,8.41,288K,"Jul 3, 2024",https://cdn.myanimelist.net/images/anime/1006/...
1,Tokidoki Bosotto Russia-go de Dereru Tonari no...,School,Doga Kobo,Light novel,12 eps,8.07,228K,"Jul 3, 2024",https://cdn.myanimelist.net/images/anime/1825/...
2,Kami no Tou: Ouji no Kikan,Unknown,The Answer Studio,Web manga,? eps,7.43,174K,"Jul 7, 2024",https://cdn.myanimelist.net/images/anime/1107/...
3,Shikanoko Nokonoko Koshitantan,"Gag Humor, School",Wit Studio,Manga,? eps,7.62,158K,"Jul 7, 2024",https://cdn.myanimelist.net/images/anime/1084/...
4,Fairy Tail: 100-nen Quest,Unknown,J.C.Staff,Web manga,? eps,8.06,122K,"Jul 7, 2024",https://cdn.myanimelist.net/images/anime/1087/...


### Winter-2023 anime

In [21]:
site_url = 'https://myanimelist.net/anime/season/2023/winter'
df_winter_23 = scrape_anime_data(site_url)
df_winter_23.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Vinland Saga Season 2,"Gore, Historical",MAPPA,Manga,24 eps,8.81,643K,"Jan 10, 2023",https://cdn.myanimelist.net/images/anime/1170/...
1,Tomo-chan wa Onnanoko!,School,Lay-duce,Web manga,13 eps,7.77,410K,"Jan 5, 2023",https://cdn.myanimelist.net/images/anime/1444/...
2,Tokyo Revengers: Seiya Kessen-hen,"Delinquents, Time Travel",LIDENFILMS,Manga,13 eps,7.65,365K,"Jan 8, 2023",https://cdn.myanimelist.net/images/anime/1773/...
3,Maou Gakuin no Futekigousha II: Shijou Saikyou...,"Reincarnation, School",SILVER LINK.,Light novel,12 eps,6.88,347K,"Jan 8, 2023",https://cdn.myanimelist.net/images/anime/1369/...
4,Otonari no Tenshi-sama ni Itsunomanika Dame Ni...,School,Project No.9,Light novel,12 eps,7.82,325K,"Jan 7, 2023",https://cdn.myanimelist.net/images/anime/1240/...


### Spring-2023 anime

In [22]:
site_url = 'https://myanimelist.net/anime/season/2023/spring'
df_spring_23 = scrape_anime_data(site_url)
df_spring_23.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Kimetsu no Yaiba: Katanakaji no Sato-hen,Historical,ufotable,Manga,11 eps,8.22,870K,"Apr 9, 2023",https://cdn.myanimelist.net/images/anime/1765/...
1,"""Oshi no Ko""","Reincarnation, Showbiz",Doga Kobo,Manga,11 eps,8.63,830K,"Apr 12, 2023",https://cdn.myanimelist.net/images/anime/1812/...
2,Jigokuraku,"Gore, Historical, Samurai",MAPPA,Manga,13 eps,8.1,710K,"Apr 1, 2023",https://cdn.myanimelist.net/images/anime/1075/...
3,Mashle,"Gag Humor, Parody, School",A-1 Pictures,Manga,12 eps,7.62,588K,"Apr 8, 2023",https://cdn.myanimelist.net/images/anime/1218/...
4,Tengoku Daimakyou,Unknown,Production I.G,Manga,13 eps,8.21,497K,"Apr 1, 2023",https://cdn.myanimelist.net/images/anime/1121/...


### Summer-2023 anime

In [23]:
site_url = 'https://myanimelist.net/anime/season/2023/summer'
df_summer_23 = scrape_anime_data(site_url)
df_summer_23.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Jujutsu Kaisen 2nd Season,"Gore, School",MAPPA,Manga,23 eps,8.81,1.0M,"Jul 6, 2023",https://cdn.myanimelist.net/images/anime/1792/...
1,Mushoku Tensei II: Isekai Ittara Honki Dasu,"Isekai, Reincarnation",Studio Bind,Light novel,12 eps,8.25,584K,"Jul 10, 2023",https://cdn.myanimelist.net/images/anime/1898/...
2,Zom 100: Zombie ni Naru made ni Shitai 100 no ...,"Adult Cast, Survival",BUG FILMS,Manga,12 eps,7.79,549K,"Jul 9, 2023",https://cdn.myanimelist.net/images/anime/1384/...
3,Horimiya: Piece,School,CloverWorks,Manga,13 eps,8.18,336K,"Jul 1, 2023",https://cdn.myanimelist.net/images/anime/1007/...
4,Bleach: Sennen Kessen-hen - Ketsubetsu-tan,Unknown,Pierrot,Manga,13 eps,8.69,289K,"Jul 8, 2023",https://cdn.myanimelist.net/images/anime/1164/...


### Fall-2023 anime

In [24]:
site_url = 'https://myanimelist.net/anime/season/2023/fall'
df_fall_23 = scrape_anime_data(site_url)
df_fall_23.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Sousou no Frieren,Unknown,Madhouse,Manga,28 eps,9.34,834K,Unknown,https://cdn.myanimelist.net/images/anime/1015/...
1,Spy x Family Season 2,Childcare,"CloverWorks, Wit Studio",Manga,12 eps,8.07,505K,"Oct 7, 2023",https://cdn.myanimelist.net/images/anime/1506/...
2,Kusuriya no Hitorigoto,"Historical, Medical","OLM, TOHO animation STUDIO",Light novel,24 eps,8.9,447K,"Oct 22, 2023",https://cdn.myanimelist.net/images/anime/1708/...
3,Tate no Yuusha no Nariagari Season 3,Isekai,Kinema Citrus,Light novel,12 eps,7.11,438K,"Oct 6, 2023",https://cdn.myanimelist.net/images/anime/1317/...
4,Kage no Jitsuryokusha ni Naritakute! 2nd Season,"Isekai, Reincarnation",Nexus,Light novel,12 eps,8.34,378K,"Oct 4, 2023",https://cdn.myanimelist.net/images/anime/1938/...


## Combining all dataframes into one

In [25]:
df = [df_winter_23, df_spring_23, df_summer_23, df_fall_23, df_winter_24, df_spring_24, df_summer_24]
final_df = pd.concat(df, ignore_index=True)
final_df.head()

Unnamed: 0,Title,Theme,Studio,Source,Episodes,Rating,Members,Date,Img_url
0,Vinland Saga Season 2,"Gore, Historical",MAPPA,Manga,24 eps,8.81,643K,"Jan 10, 2023",https://cdn.myanimelist.net/images/anime/1170/...
1,Tomo-chan wa Onnanoko!,School,Lay-duce,Web manga,13 eps,7.77,410K,"Jan 5, 2023",https://cdn.myanimelist.net/images/anime/1444/...
2,Tokyo Revengers: Seiya Kessen-hen,"Delinquents, Time Travel",LIDENFILMS,Manga,13 eps,7.65,365K,"Jan 8, 2023",https://cdn.myanimelist.net/images/anime/1773/...
3,Maou Gakuin no Futekigousha II: Shijou Saikyou...,"Reincarnation, School",SILVER LINK.,Light novel,12 eps,6.88,347K,"Jan 8, 2023",https://cdn.myanimelist.net/images/anime/1369/...
4,Otonari no Tenshi-sama ni Itsunomanika Dame Ni...,School,Project No.9,Light novel,12 eps,7.82,325K,"Jan 7, 2023",https://cdn.myanimelist.net/images/anime/1240/...


In [26]:
final_df.shape

(1684, 9)

In [27]:
from google.colab import files

final_df.to_csv('anime_data_23-24.csv', index=False)

files.download('anime_data_23-24.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>