In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
def scrap_anime_review(base_url):
	reviews = []

	# Mulai dengan URL awal
	url = base_url

	while url:
		print(f"Mengambil halaman: {url}")
		response = requests.get(url)

		if response.status_code != 200:
			print(f"Gagal mengambil halaman: {url}, status code: {response.status_code}")
			break

		soup = BeautifulSoup(response.text, 'html.parser')

		# Ambil semua ulasan di halaman ini
		for item in soup.find_all(class_='review-element js-review-element'):
			review_body = item.find(class_='text')
			if review_body:
				review = review_body.get_text(strip=True)
				reviews.append(review)

		# Cari URL halaman berikutnya
		next_button = soup.find(attrs={'data-ga-click-type': 'review-more-reviews'})
		if next_button:
			url = next_button['href']
		else:
			url = None  # Berhenti jika tidak ada halaman berikutnya

	return reviews
  
def scrap_all_anime_review(max_page=1):
	reviews = []
	page = 0

	# Mulai dengan URL awal
	url = "https://myanimelist.net/reviews.php?t=anime"

	while url and page < max_page:
		print(f"Mengambil halaman: {url}")
		response = requests.get(url)

		if response.status_code != 200:
			print(f"Gagal mengambil halaman: {url}, status code: {response.status_code}")
			break

		soup = BeautifulSoup(response.text, 'html.parser')

		# Ambil semua ulasan di halaman ini
		for item in soup.find_all(class_='review-element js-review-element'):
			# Ambil judul anime
			title_tag = item.find(attrs={'data-ga-click-type': 'review-anime-title'})
			title = title_tag.get_text(strip=True) if title_tag else "No Title"

			review_body = item.find(class_='text')
			review = review_body.get_text(strip=True) if review_body else "No Review"
			
			# Simpan ke dalam list
			reviews.append({"title": title, "review": review})

		page += 1

		# Cari URL halaman berikutnya
		next_button = soup.find('a', attrs={'data-ga-click-type': 'review-next'})
		if next_button:
			url = next_button['href']
		else:
			url = None  # Berhenti jika tidak ada halaman berikutnya

	return reviews

In [4]:
# Mengambil review dari halaman top anime reviews
max_pages = 20
top_anime_reviews = scrap_all_anime_review(max_pages)

# Simpan ke file CSV
if top_anime_reviews:
	df = pd.DataFrame(top_anime_reviews)
	csv_file = "top_anime_reviews.csv"
	df.to_csv(csv_file, index=False)
	print(f"{len(df)} data berhasil disimpan ke {csv_file}")
else:
	print("Tidak ada data yang disimpan.")

Mengambil halaman: https://myanimelist.net/reviews.php?t=anime
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=2
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=3
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=4
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=5
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=6
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=7
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&preliminary=on&spoiler=off&p=8
Mengambil halaman: https://myanimelist.net/reviews.php?t=anime&filter_check=&filter_hide=&

In [None]:
# URL awal
base_url = "https://myanimelist.net/anime/40333/Uzumaki/reviews"

# Panggil fungsi untuk mengambil semua ulasan
all_reviews = scrap_anime_review(base_url)

# Menyimpan hasil ke file CSV
if all_reviews:
	df = pd.DataFrame(all_reviews, columns=["review"])
	csv_file = "reviews.csv"
	df.to_csv(csv_file, index=False)
	print(f"Berhasil menyimpan {len(all_reviews)} ulasan ke file {csv_file}.")
else:
	print("Tidak ada ulasan yang berhasil diambil.")