# 1. Import libraries

In [1]:
import pandas as pd

from bs4 import BeautifulSoup

import requests
import re
import numpy as np

# 2. Getting the URLs for each K-Drama

In [7]:
default_URL = "https://mydramalist.com"
drama_URL = []

for i in range(1, 2):
    page_URL = f"https://mydramalist.com/search?adv=titles&ty=68&co=3&st=3&so=top&page={i}"

    try:
        result = requests.get(page_URL)
        result.raise_for_status()
        soup = BeautifulSoup(result.content, "html.parser") # Creating the soup object which we will be able to scrape

        drama_URL.extend((default_URL + x.get('href')) for x in soup.find_all('a', class_='block'))

    except requests.exceptions.RequestException as e:
        print(f"Error during request: {e}")

# 3. Initialising a dataframe

In [8]:
kdrama_df = pd.DataFrame(columns=['Title', 'URL', 'Genres', 'Tags', 'Description', 'Score', 'Users Scored', 'Episodes', 'Content Rating'])

In [9]:
kdrama_df['URL'] = drama_URL

# 4. Scraping relevant data

## Title

In [10]:
def req(url):
    result = requests.get(url)
    return BeautifulSoup(result.text, 'html.parser')

In [11]:
kdrama_df['Title'] = kdrama_df['URL'].apply(lambda x: req(x).find('h1', class_= 'film-title').text)

# Genre

In [12]:
def get_genres(x):

    try:
        genres = req(x).find('li', class_='list-item p-a-0 show-genres')
        if genres:
            return genres.text.split(':', 1)[1].strip()
        else:
            return "Unknown"

    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error"

In [13]:
kdrama_df['Genres'] = kdrama_df['URL'].apply(get_genres)

## Tags

In [14]:
def get_tags(x):

    tags = req(x).find('li', attrs = {'class':'list-item p-a-0 show-tags'})
    if tags:
      tags = tags.text.split(':',1)[1].strip().replace('(Vote or add tags)','')
      return tags
    else:
      return "Unknown"

In [15]:
kdrama_df['Tags'] = kdrama_df['URL'].apply(get_tags)

## Description

In [16]:
def get_synopsis(x):

    synopsis = req(x).find('div', class_="show-synopsis" )
    if synopsis:
      synopsis = synopsis.text.split('Edit Translation')[0].replace('\n', '').strip()
      return synopsis
    else:
      return "Unknown"

In [17]:
kdrama_df['Description'] = kdrama_df['URL'].apply(get_synopsis)

## Remaining Attributes

In [18]:
def get_attributes(x):
  try:
    drama_details = []

    for row in req(x).findAll('li', attrs = {'class':'list-item p-a-0'}):
      drama_details.append(row.text)


    num_episodes = None
    score = None
    num_users_scored = None
    content_rating = None

    for detail in drama_details:

      if 'Episodes:' in detail:
        num_episodes = int(detail.split(':')[1].strip())

      elif 'Score:' in detail:
        score = detail.split(':')[1].split('(')[0].strip()
        num_users_scored = int(detail.split('(')[1].split('scored by')[1].split('users')[0].replace(',', '').strip())

      elif 'Content Rating:' in detail:
        content_rating = detail.split(':')[1].strip()

    return num_episodes, score, num_users_scored, content_rating

  except Exception as e:
    pass

In [19]:
attribute_results = kdrama_df['URL'].apply(get_attributes)

In [20]:
kdrama_df[['Episodes', 'Score', 'Users Scored', 'Content Rating']] = list(attribute_results)

In [21]:
kdrama_df

Unnamed: 0,Title,URL,Genres,Tags,Description,Score,Users Scored,Episodes,Content Rating
0,Moving (2023),https://mydramalist.com/25560-moving,"Action, Thriller, Mystery, Supernatural","Graphic Violence, Supernatural Power, Student ...","Kim Bong Seok, Jang Hee Soo, and Lee Gang Hoon...",9.2,13835,20,18+ Restricted (violence & profanity)
1,Move to Heaven (2021),https://mydramalist.com/49231-move-to-heaven,"Life, Drama","Uncle-Nephew Relationship, Autism, Death, Tear...",Han Geu Roo is an autistic 20-year-old. He wor...,9.2,39327,10,18+ Restricted (violence & profanity)
2,Weak Hero Class 1 (2022),https://mydramalist.com/702267-weak-hero,"Action, Youth, Drama","Violence, Friendship, Bromance, School Bullyin...",Yeon Shi Eun is a model student who ranks at t...,9.1,28278,8,18+ Restricted (violence & profanity)
3,Hospital Playlist Season 2 (2021),https://mydramalist.com/57173-hospital-playlist-2,"Romance, Life, Drama, Medical","Multiple Mains, Band, Music, Multiple Couples,...",Everyday is extraordinary for five doctors and...,9.1,26007,12,15+ - Teens 15 or older
4,Flower of Evil (2020),https://mydramalist.com/54625-flower-of-evil,"Thriller, Romance, Crime, Melodrama","Deception, Family Secret, Mystery, Suspense, H...",Although Baek Hee Sung is hiding a dark secret...,9.1,52959,16,15+ - Teens 15 or older
5,Hospital Playlist (2020),https://mydramalist.com/36269-doctor-playbook,"Romance, Life, Drama, Medical","Multiple Mains, Slow Romance, Multiple Couples...",The stories of people going through their days...,9.1,42013,12,15+ - Teens 15 or older
6,Alchemy of Souls (2022),https://mydramalist.com/52939-can-this-person-...,"Action, Historical, Romance, Fantasy","Transmigration, Master-Disciple Relationship, ...",Set in a fictional country called Daeho that d...,9.1,43177,20,15+ - Teens 15 or older
7,Reply 1988 (2015),https://mydramalist.com/13544-reply-1988,"Comedy, Romance, Life, Youth","Nostalgia, Multiple Couples, Poor Family, Love...","Five childhood friends, who all live in the sa...",9.1,42509,20,15+ - Teens 15 or older
8,My Mister (2018),https://mydramalist.com/25172-my-ajusshi,"Psychological, Life, Drama","Nice Male Lead, Depression, Healing, Poor Fema...",Park Dong Hoon is a middle-aged engineer who i...,9.0,25853,16,15+ - Teens 15 or older
9,Under the Queen's Umbrella (2022),https://mydramalist.com/705857-umbrella,"Historical, Comedy, Drama, Political","Motherhood, Mother-Son Relationship, Palace Se...",Within the palace exist troublemaking princes ...,9.0,11980,16,15+ - Teens 15 or older


# 5. Save dataframe as csv file

In [None]:
kdrama_df.to_csv('subset1.csv')