In [1]:
import sqlite3 
import pandas as pd
import numpy as np
import pymysql
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
import time
import requests
import re
from tqdm import tqdm
import json

In [2]:
# --- Í≥µÌÜµ ÏÑ§Ï†ï ---
REQUEST_URL = 'https://bff-page.kakao.com/graphql'
HEADERS = {
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Referer': 'https://page.kakao.com/', 
}

# --- Ï†ÄÏû•ÏÜå ---
main_webtoon_list = []      
all_series_ids = set()      

# ÏöîÏùº ÏΩîÎìú
DAY_CODES = range(1, 8) 
DAY_NAMES = {1: 'Ïõî', 2: 'Ìôî', 3: 'Ïàò', 4: 'Î™©', 5: 'Í∏à', 6: 'ÌÜ†', 7: 'Ïùº'}

# ‚ö†Ô∏è Ï§ëÏöî: ÏïÑÍπå Ï∞æÏïÑÏ£ºÏã† 'ÏõêÎ≥∏ ÏøºÎ¶¨' Ï†ÑÏ≤¥ÏûÖÎãàÎã§. Ï†àÎåÄ Ï§ÑÏù¥ÏßÄ ÎßàÏÑ∏Ïöî!
FULL_QUERY_STRING = """
query staticLandingDayOfWeekLayout($queryInput: StaticLandingDayOfWeekParamInput!) {
  staticLandingDayOfWeekLayout(input: $queryInput) {
    ...Layout
  }
}

fragment Layout on Layout {
  id
  type
  sections {
    ...Section
  }
  screenUid
}

fragment Section on Section {
  id
  uid
  type
  title
  ... on RecommendSection {
    isRecommendArea
    isRecommendedItems
  }
  ... on DependOnLoggedInSection {
    loggedInTitle
    loggedInScheme
  }
  ... on SchemeSection {
    scheme
  }
  ... on MetaInfoTypeSection {
    metaInfoType
  }
  ... on TabSection {
    sectionMainTabList {
      uid
      title
      isSelected
      scheme
      additionalString
      subTabList {
        uid
        title
        isSelected
        groupId
      }
    }
  }
  ... on ThemeKeywordSection {
    themeKeywordList {
      uid
      title
      scheme
    }
  }
  ... on StaticLandingDayOfWeekSection {
    isEnd
    totalCount
    param {
      categoryUid
      businessModel {
        name
        param
      }
      subcategory {
        name
        param
      }
      dayTab {
        name
        param
      }
      page
      size
      screenUid
    }
    businessModelList {
      name
      param
    }
    subcategoryList {
      name
      param
    }
    dayTabList {
      name
      param
    }
    promotionBanner {
      ...PromotionBannerItem
    }
  }
  ... on StaticLandingTodayNewSection {
    totalCount
    param {
      categoryUid
      subcategory {
        name
        param
      }
      screenUid
    }
    categoryTabList {
      name
      param
    }
    subcategoryList {
      name
      param
    }
    promotionBanner {
      ...PromotionBannerItem
    }
    viewType
  }
  ... on StaticLandingTodayUpSection {
    isEnd
    totalCount
    param {
      categoryUid
      subcategory {
        name
        param
      }
      page
    }
    categoryTabList {
      name
      param
    }
    subcategoryList {
      name
      param
    }
  }
  ... on StaticLandingRankingSection {
    isEnd
    rankingTime
    totalCount
    param {
      categoryUid
      subcategory {
        name
        param
      }
      rankingType {
        name
        param
      }
      page
      screenUid
    }
    categoryTabList {
      name
      param
    }
    subcategoryList {
      name
      param
    }
    rankingTypeList {
      name
      param
    }
    displayAd {
      ...DisplayAd
    }
    promotionBanner {
      ...PromotionBannerItem
    }
    withOperationArea
    viewType
  }
  ... on StaticLandingGenreSection {
    isEnd
    totalCount
    param {
      categoryUid
      subcategory {
        name
        param
      }
      sortType {
        name
        param
      }
      page
      isComplete
      screenUid
    }
    subcategoryList {
      name
      param
    }
    sortTypeList {
      name
      param
    }
    displayAd {
      ...DisplayAd
    }
    promotionBanner {
      ...PromotionBannerItem
    }
  }
  ... on StaticLandingFreeSeriesSection {
    isEnd
    totalCount
    param {
      categoryUid
      tab {
        name
        param
      }
      page
      screenUid
    }
    tabList {
      name
      param
    }
    promotionBanner {
      ...PromotionBannerItem
    }
  }
  ... on StaticLandingEventSection {
    isEnd
    totalCount
    param {
      categoryUid
      page
    }
    categoryTabList {
      name
      param
    }
  }
  ... on StaticLandingOriginalSection {
    isEnd
    totalCount
    originalCount
    param {
      categoryUid
      subcategory {
        name
        param
      }
      sortType {
        name
        param
      }
      isComplete
      page
      screenUid
    }
    subcategoryList {
      name
      param
    }
    sortTypeList {
      name
      param
    }
    recommendItemList {
      ...Item
    }
  }
  ... on HelixThemeSection {
    subtitle
    isRecommendArea
  }
  groups {
    ...Group
  }
}

fragment PromotionBannerItem on PromotionBannerItem {
  title
  scheme
  leftImage
  rightImage
  eventLog {
    ...EventLogFragment
  }
}

fragment EventLogFragment on EventLog {
  fromGraphql
  click {
    layer1
    layer2
    setnum
    ordnum
    copy
    imp_id
    imp_provider
  }
  eventMeta {
    id
    name
    subcategory
    category
    series
    provider
    series_id
    type
  }
  viewimp_contents {
    type
    name
    id
    imp_area_ordnum
    imp_id
    imp_provider
    imp_type
    layer1
    layer2
  }
  customProps {
    landing_path
    view_type
    helix_id
    helix_yn
    helix_seed
    content_cnt
    event_series_id
    event_ticket_type
    play_url
    banner_uid
  }
}

fragment DisplayAd on DisplayAd {
  sectionUid
  bannerUid
  treviUid
  momentUid
}

fragment Item on Item {
  id
  type
  ...BannerItem
  ...OnAirItem
  ...CardViewItem
  ...CleanViewItem
  ... on DisplayAdItem {
    displayAd {
      ...DisplayAd
    }
  }
  ...PosterViewItem
  ...StrategyViewItem
  ...RankingListViewItem
  ...NormalListViewItem
  ...MoreItem
  ...EventBannerItem
  ...PromotionBannerItem
  ...LineBannerItem
}

fragment BannerItem on BannerItem {
  bannerType
  bannerViewType
  thumbnail
  videoUrl
  badgeList
  statusBadge
  titleImage
  title
  altText
  metaList
  caption
  scheme
  seriesId
  eventLog {
    ...EventLogFragment
  }
  discountRate
  discountRateText
  backgroundColor
  characterImage
}

fragment OnAirItem on OnAirItem {
  thumbnail
  videoUrl
  titleImage
  title
  subtitleList
  caption
  scheme
}

fragment CardViewItem on CardViewItem {
  title
  altText
  thumbnail
  scheme
  badgeList
  ageGradeBadge
  statusBadge
  ageGrade
  selfCensorship
  subtitleList
  caption
  rank
  rankVariation
  isEventBanner
  categoryType
  discountRate
  discountRateText
  backgroundColor
  isBook
  isLegacy
  cardCover {
    ...CardCoverFragment
  }
  eventLog {
    ...EventLogFragment
  }
}

fragment CardCoverFragment on CardCover {
  coverImg
  coverRestricted
}

fragment CleanViewItem on CleanViewItem {
  id
  type
  showPlayerIcon
  scheme
  title
  thumbnail
  badgeList
  ageGradeBadge
  statusBadge
  subtitleList
  rank
  ageGrade
  selfCensorship
  eventLog {
    ...EventLogFragment
  }
  discountRate
  discountRateText
}

fragment PosterViewItem on PosterViewItem {
  id
  type
  showPlayerIcon
  scheme
  title
  altText
  thumbnail
  badgeList
  labelBadgeList
  ageGradeBadge
  statusBadge
  subtitleList
  rank
  rankVariation
  ageGrade
  selfCensorship
  eventLog {
    ...EventLogFragment
  }
  seriesId
  showDimmedThumbnail
  discountRate
  discountRateText
}

fragment StrategyViewItem on StrategyViewItem {
  id
  title
  count
  scheme
}

fragment RankingListViewItem on RankingListViewItem {
  title
  thumbnail
  badgeList
  ageGradeBadge
  statusBadge
  ageGrade
  selfCensorship
  metaList
  descriptionList
  scheme
  rank
  eventLog {
    ...EventLogFragment
  }
  discountRate
  discountRateText
}

fragment NormalListViewItem on NormalListViewItem {
  id
  type
  altText
  ticketUid
  thumbnail
  badgeList
  ageGradeBadge
  statusBadge
  ageGrade
  isAlaramOn
  row1
  row2
  row3 {
    id
    metaList
  }
  row4
  row5
  scheme
  continueScheme
  nextProductScheme
  continueData {
    ...ContinueInfoFragment
  }
  seriesId
  isCheckMode
  isChecked
  isReceived
  isHelixGift
  price
  discountPrice
  discountRate
  discountRateText
  showPlayerIcon
  rank
  isSingle
  singleSlideType
  ageGrade
  selfCensorship
  eventLog {
    ...EventLogFragment
  }
  giftEventLog {
    ...EventLogFragment
  }
}

fragment ContinueInfoFragment on ContinueInfo {
  title
  isFree
  productId
  lastReadProductId
  scheme
  continueProductType
  hasNewSingle
  hasUnreadSingle
}

fragment MoreItem on MoreItem {
  id
  scheme
  title
}

fragment EventBannerItem on EventBannerItem {
  bannerType
  thumbnail
  videoUrl
  titleImage
  title
  subtitleList
  caption
  scheme
  eventLog {
    ...EventLogFragment
  }
}

fragment LineBannerItem on LineBannerItem {
  title
  scheme
  subTitle
  bgColor
  rightImage
  eventLog {
    ...EventLogFragment
  }
}

fragment Group on Group {
  id
  ... on ListViewGroup {
    meta {
      title
      count
    }
  }
  ... on CardViewGroup {
    meta {
      title
      count
    }
  }
  ... on PosterViewGroup {
    meta {
      title
      count
    }
  }
  type
  dataKey
  groups {
    ...GroupInGroup
  }
  items {
    ...Item
  }
}

fragment GroupInGroup on Group {
  id
  type
  dataKey
  items {
    ...Item
  }
  ... on ListViewGroup {
    meta {
      title
      count
    }
  }
  ... on CardViewGroup {
    meta {
      title
      count
    }
  }
  ... on PosterViewGroup {
    meta {
      title
      count
    }
  }
}
"""

def crawl_all_webtoons_pagination():
    print("üöÄ Ï†ÑÏ≤¥ ÏõπÌà∞ Î™©Î°ù(ÌéòÏù¥ÏßÄÎÑ§Ïù¥ÏÖò Ìè¨Ìï®) ÏàòÏßë ÏãúÏûë...")

    for day_code in DAY_CODES:
        current_page = 0
        is_end = False
        collected_count = 0 
        
        pbar = tqdm(desc=f"[{DAY_NAMES[day_code]}ÏöîÏùº] ÏàòÏßë Ï§ë", unit="page")
        
        while not is_end:
            try:
                # ÏøºÎ¶¨ Ï†ÑÏ≤¥ Íµ¨ÏÑ±
                payload = {
                    "query": FULL_QUERY_STRING, # ÏúÑÏóêÏÑú Ï†ïÏùòÌïú Í∏¥ Î¨∏ÏûêÏó¥
                    "variables": {
                        "queryInput": {
                            "categoryUid": 10,
                            "dayTabUid": str(day_code),
                            "type": "Layout",
                            "screenUid": 52,
                            "page": current_page, # ÌéòÏù¥ÏßÄ Î≥ÄÏàò Ï∂îÍ∞Ä
                            "size": 30
                        }
                    }
                }
                
                response = requests.post(
                    REQUEST_URL, 
                    headers=HEADERS, 
                    data=json.dumps(payload)
                )
                response.raise_for_status()
                data = response.json()

                sections = data.get('data', {}).get('staticLandingDayOfWeekLayout', {}).get('sections', [])
                
                if not sections:
                    break
                
                main_section = sections[0]
                
                # Ï¢ÖÎ£å Ï°∞Í±¥ ÌôïÏù∏
                is_end = main_section.get('isEnd', True)
                
                # ÏïÑÏù¥ÌÖú Ï∂îÏ∂ú
                items_found_in_page = 0
                for group in main_section.get('groups', []):
                    for item in group.get('items', []):
                        if item.get('type') == 'CardView':
                            series_id = item.get('eventLog', {}).get('eventMeta', {}).get('series_id')
                            if series_id:
                                main_webtoon_list.append({
                                    'series_id': int(series_id),
                                    'title': item.get('title'),
                                    'url': f'https://page.kakao.com/content/{series_id}',
                                    'category': item.get('eventLog', {}).get('eventMeta', {}).get('subcategory'),
                                    'views': item.get('subtitleList', [None])[0],
                                    # 'thumbnail_url': item.get('thumbnail'),
                                    'weekday': DAY_NAMES[day_code]
                                })
                                all_series_ids.add(int(series_id))
                                items_found_in_page += 1
                
                collected_count += items_found_in_page
                pbar.update(1)
                pbar.set_postfix({'ÎàÑÏ†Å': collected_count})
                
                current_page += 1
                time.sleep(0.3) # 0.3Ï¥à ÎåÄÍ∏∞ (ÏÑúÎ≤Ñ ÏòàÏùò ÏßÄÌÇ§Í∏∞)
                
            except Exception as e:
                print(f"\nError on {DAY_NAMES[day_code]} page {current_page}: {e}")
                # ÏóêÎü¨ Î∞úÏÉù Ïãú Î∞òÎ≥µÎ¨∏ ÌÉàÏ∂ú
                break
        
        pbar.close()

    print(f"\n‚úÖ ÏàòÏßë ÏôÑÎ£å!")
    print(f"Ï¥ù ÏõπÌà∞ Í∞úÏàò (Ï§ëÎ≥µ Ìè¨Ìï® ÏöîÏùºÎ≥Ñ Ìï©ÏÇ∞): {len(main_webtoon_list)}")
    print(f"Í≥†Ïú† ÏûëÌíà ID Í∞úÏàò: {len(all_series_ids)}")

# --- Ïã§Ìñâ ---
# if __name__ == "__main__":
#     crawl_all_webtoons_pagination()

In [None]:
# # 1. ÏöîÏ≤≠ URL
# REQUEST_URL = 'https://bff-page.kakao.com/graphql'

# # 2. Ìó§Îçî ÏÑ§Ï†ï
# headers = {
#     'Content-Type': 'application/json',
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
#     'Referer': 'https://page.kakao.com/', 
# }

# # 3. GraphQL ÏøºÎ¶¨ÏôÄ Î≥ÄÏàò (Ï†úÍ≥µÌï¥Ï£ºÏã† Î¨∏ÏûêÏó¥ Í∑∏ÎåÄÎ°ú ÏÇ¨Ïö©)
# query_body = {
#     "query":"\n    query contentHomeInfo($seriesId: Long!) {\n  contentHomeInfo(seriesId: $seriesId) {\n    about {\n      id\n      themeKeywordList {\n        uid\n        title\n        scheme\n      }\n      description\n      screenshotList\n      authorList {\n        id\n        name\n        role\n        roleDisplayName\n      }\n      detail {\n        id\n        publisherName\n        retailPrice\n        ageGrade\n        category\n        rank\n      }\n      guideTitle\n      characterList {\n        thumbnail\n        name\n        description\n      }\n      detailInfoList {\n        title\n        info\n      }\n    }\n    recommend {\n      id\n      seriesId\n      list {\n        ...ContentRecommendGroup\n      }\n    }\n  }\n}\n    \n    fragment ContentRecommendGroup on ContentRecommendGroup {\n  id\n  impLabel\n  type\n  title\n  description\n  items {\n    id\n    type\n    ...PosterViewItem\n  }\n}\n    \n\n    fragment PosterViewItem on PosterViewItem {\n  id\n  type\n  showPlayerIcon\n  scheme\n  title\n  altText\n  thumbnail\n  badgeList\n  labelBadgeList\n  ageGradeBadge\n  statusBadge\n  subtitleList\n  rank\n  rankVariation\n  ageGrade\n  selfCensorship\n  eventLog {\n    ...EventLogFragment\n  }\n  seriesId\n  showDimmedThumbnail\n  discountRate\n  discountRateText\n}\n    \n\n    fragment EventLogFragment on EventLog {\n  fromGraphql\n  click {\n    layer1\n    layer2\n    setnum\n    ordnum\n    copy\n    imp_id\n    imp_provider\n  }\n  eventMeta {\n    id\n    name\n    subcategory\n    category\n    series\n    provider\n    series_id\n    type\n  }\n  viewimp_contents {\n    type\n    name\n    id\n    imp_area_ordnum\n    imp_id\n    imp_provider\n    imp_type\n    layer1\n    layer2\n  }\n  customProps {\n    landing_path\n    view_type\n    helix_id\n    helix_yn\n    helix_seed\n    content_cnt\n    event_series_id\n    event_ticket_type\n    play_url\n    banner_uid\n  }\n}\n    ",
#     "variables":{"seriesId":60910969}} # <-- ÏûëÌíà ID: 60910969

In [3]:
# --- Í≥µÌÜµ ÏÑ§Ï†ï ---
REQUEST_URL = 'https://bff-page.kakao.com/graphql'
HEADERS = {
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Referer': 'https://page.kakao.com/', 
}

# --- ÏµúÏ¢Ö Ï†ÄÏû•Îê† Î¶¨Ïä§Ìä∏ (DF Ïó≠Ìï†ÏùÑ Ìï† ÎîïÏÖîÎÑàÎ¶¨ Î¶¨Ïä§Ìä∏) ---
author_role_list = []       # ÏûëÍ∞Ä/Í∑∏Î¶º ÏûëÍ∞Ä/ÏõêÏûë ÏûëÍ∞Ä Ï†ïÎ≥¥
keyword_list = []           # ÌÇ§ÏõåÎìú Ï†ïÎ≥¥ Ï†ÄÏû•Ïö© Î¶¨Ïä§Ìä∏

# ÏöîÏùº ÏΩîÎìú: 1=Ïõî, 2=Ìôî, ..., 7=Ïùº (dayTabUid)
DAY_CODES = range(1, 8) 
DAY_NAMES = {1: 'Ïõî', 2: 'Ìôî', 3: 'Ïàò', 4: 'Î™©', 5: 'Í∏à', 6: 'ÌÜ†', 7: 'Ïùº'}

# --- ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÌÅ¨Î°§ÎßÅ Ìï®Ïàò (Ï§ÑÍ±∞Î¶¨, ÏûëÍ∞Ä Ï†ïÎ≥¥ ÏàòÏßë) ---
def crawl_detail_info():
    """ÏàòÏßëÎêú Î™®Îì† series_idÏóê ÎåÄÌï¥ ÏÉÅÏÑ∏ Ï†ïÎ≥¥Î•º ÌÅ¨Î°§ÎßÅÌï©ÎãàÎã§."""
    print("\nüöÄ 2Îã®Í≥Ñ: ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÌÅ¨Î°§ÎßÅ ÏãúÏûë...")

    detail_query = {
        "query":"\n    query contentHomeInfo($seriesId: Long!) {\n  contentHomeInfo(seriesId: $seriesId) {\n    about {\n      id\n      themeKeywordList {\n        uid\n        title\n        scheme\n      }\n      description\n      screenshotList\n      authorList {\n        id\n        name\n        role\n        roleDisplayName\n      }\n      detail {\n        id\n        publisherName\n        retailPrice\n        ageGrade\n        category\n        rank\n      }\n      guideTitle\n      characterList {\n        thumbnail\n        name\n        description\n      }\n      detailInfoList {\n        title\n        info\n      }\n    }\n    recommend {\n      id\n      seriesId\n      list {\n        ...ContentRecommendGroup\n      }\n    }\n  }\n}\n    \n    fragment ContentRecommendGroup on ContentRecommendGroup {\n  id\n  impLabel\n  type\n  title\n  description\n  items {\n    id\n    type\n    ...PosterViewItem\n  }\n}\n    \n\n    fragment PosterViewItem on PosterViewItem {\n  id\n  type\n  showPlayerIcon\n  scheme\n  title\n  altText\n  thumbnail\n  badgeList\n  labelBadgeList\n  ageGradeBadge\n  statusBadge\n  subtitleList\n  rank\n  rankVariation\n  ageGrade\n  selfCensorship\n  eventLog {\n    ...EventLogFragment\n  }\n  seriesId\n  showDimmedThumbnail\n  discountRate\n  discountRateText\n}\n    \n\n    fragment EventLogFragment on EventLog {\n  fromGraphql\n  click {\n    layer1\n    layer2\n    setnum\n    ordnum\n    copy\n    imp_id\n    imp_provider\n  }\n  eventMeta {\n    id\n    name\n    subcategory\n    category\n    series\n    provider\n    series_id\n    type\n  }\n  viewimp_contents {\n    type\n    name\n    id\n    imp_area_ordnum\n    imp_id\n    imp_provider\n    imp_type\n    layer1\n    layer2\n  }\n  customProps {\n    landing_path\n    view_type\n    helix_id\n    helix_yn\n    helix_seed\n    content_cnt\n    event_series_id\n    event_ticket_type\n    play_url\n    banner_uid\n  }\n}\n    ",
        # Ïã§Ï†ú ÏÉÅÏÑ∏ ÏøºÎ¶¨ (Ï†úÍ≥µÌï¥Ï£ºÏã† Í≤É)Ïùò ÌïµÏã¨Îßå ÎÇ®ÍπÄ
        "variables": {"seriesId": 0} # Ïó¨Í∏∞Ïóê series_id ÏÇΩÏûÖ
    }

    # ÏÉÅÏÑ∏ Ï†ïÎ≥¥ Ï†ÄÏû•ÏùÑ ÏúÑÌïú ÏûÑÏãú Î¶¨Ïä§Ìä∏
    temp_detail_list = [] 
    
    # Ï§ëÎ≥µ IDÎäî Ìïú Î≤àÎßå Ï≤òÎ¶¨
    for series_id in tqdm(list(all_series_ids), desc="ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÏàòÏßë Ï§ë"):
        try:
            detail_query["variables"]["seriesId"] = series_id
            
            response = requests.post(
                REQUEST_URL, 
                headers=HEADERS, 
                data=json.dumps(detail_query)
            )
            response.raise_for_status()
            data = response.json()
            
            about_data = data.get('data', {}).get('contentHomeInfo', {}).get('about', {})

            if about_data:
                # A. Ï§ÑÍ±∞Î¶¨ Îì± Î©îÏù∏ Ï†ïÎ≥¥ (ÎÇòÏ§ëÏóê Webtoon_DFÏóê Î≥ëÌï©)
                temp_detail_list.append({
                    'series_id': series_id,
                    'description': about_data.get('description', 'ÏÑ§Î™Ö ÏóÜÏùå').replace('\\n', '\n'),
                    'publisher': about_data.get('detail', {}).get('publisherName'),
                    'age_limit': about_data.get('detail', {}).get('ageGrade'),
                })
                
                # B. ÏûëÍ∞Ä Ïó≠Ìï†Î≥Ñ Ï†ïÎ≥¥ (Author_DF ÏÉùÏÑ±)
                for author in about_data.get('authorList', []):
                    author_role_list.append({
                        'series_id': series_id,
                        'author_id': author.get('id', 'N/A'),
                        'name': author.get('name'),
                        'role': author.get('role'),             # Writer, Painter, Original Îì±
                        'role_display': author.get('roleDisplayName')
                    })
            
                # C. ÌÇ§ÏõåÎìú Ï†ïÎ≥¥ Ï†ÄÏû•
                for keyword in about_data.get('themeKeywordList', []):
                    keyword_list.append({
                        'series_id': series_id,
                        'keyword_title': keyword.get('title'),
                    })
            
            # ÏßßÏùÄ ÏßÄÏó∞ ÏãúÍ∞ÑÏùÑ ÎëêÏñ¥ ÏÑúÎ≤Ñ Î∂ÄÌïòÎ•º Ï§ÑÏûÑ
            time.sleep(0.1) 
            
        except Exception as e:
            print(f"[{series_id}] ÏÉÅÏÑ∏ ÌÅ¨Î°§ÎßÅ Ïò§Î•ò Î∞úÏÉù: {e}")
            time.sleep(1) # ÏóêÎü¨ Ïãú ÏßÄÏó∞ ÏãúÍ∞ÑÏùÑ ÎäòÎ†§ ÏûÑÏãú Ï∞®Îã® Î∞©ÏßÄ

    return temp_detail_list

# --- Ïã§Ìñâ Î∂ÄÎ∂Ñ ---
if __name__ == "__main__":
    
    # 1Îã®Í≥Ñ Ïã§Ìñâ: ÏöîÏùºÎ≥Ñ Î™©Î°ù ÏàòÏßë
    crawl_all_webtoons_pagination()
    
    # 2Îã®Í≥Ñ Ïã§Ìñâ: ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÏàòÏßë
    detail_data_list = crawl_detail_info()
    
    # --- 3. Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑ Íµ¨Ï°∞Ïóê ÎßûÍ≤å ÏµúÏ¢Ö Ï†ïÎ¶¨ ---
    
    # 1. Webtoon_DF: Î©îÏù∏ Î™©Î°ù + ÏÉÅÏÑ∏ Ï†ïÎ≥¥(Ï§ÑÍ±∞Î¶¨ Îì±) Î≥ëÌï©
    final_webtoon_df = []
    description_map = {item['series_id']: item for item in detail_data_list}
    
    for webtoon in main_webtoon_list:
        series_id = webtoon['series_id']
        detail = description_map.get(series_id, {})
        
        # ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÌïÑÎìúÎ•º Î©îÏù∏ ÏõπÌà∞ Ï†ïÎ≥¥Ïóê Ï∂îÍ∞Ä
        webtoon['description'] = detail.get('description')
        webtoon['publisher'] = detail.get('publisher')
        webtoon['age_limit_detail'] = detail.get('age_limit')
        final_webtoon_df.append(webtoon)

    # 2. Author_DF: ÏûëÍ∞Ä Ï†ïÎ≥¥
    final_author_df = author_role_list
    
    print("\n--- ‚úÖ ÏµúÏ¢Ö Í≤∞Í≥º ÏöîÏïΩ ---")
    print(f"**Webtoon_DF (Î©îÏù∏ Î™©Î°ù + Ï§ÑÍ±∞Î¶¨)**: Ï¥ù {len(final_webtoon_df)}Í∞ú Î†àÏΩîÎìú")
    print(f"**Author_DF (ÏûëÍ∞Ä Ïó≠Ìï† Ï†ïÎ≥¥)**: Ï¥ù {len(final_author_df)}Í∞ú Î†àÏΩîÎìú")

    # ÏòàÏãú Ï∂úÎ†•
    if final_webtoon_df:
        print("\n[Webtoon_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÏûëÌíà)]")
        print(f"Ï†úÎ™©: {final_webtoon_df[0]['title']}")
        print(f"ÏöîÏùº: {final_webtoon_df[0]['weekday']}")
        print(f"Ï§ÑÍ±∞Î¶¨: {final_webtoon_df[0]['description'][:50]}...")
        
    if final_author_df:
        print("\n[Author_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÏûëÍ∞Ä)]")
        print(f"ÏûëÌíà ID: {final_author_df[0]['series_id']}")
        print(f"Ïù¥Î¶Ñ: {final_author_df[0]['name']} ({final_author_df[0]['role_display']})")
        
    if keyword_list:
        print(f"\n**Keyword_DF (ÌÇ§ÏõåÎìú Ï†ïÎ≥¥)**: Ï¥ù {len(keyword_list)}Í∞ú Î†àÏΩîÎìú")
        print("\n[Keyword_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÌÇ§ÏõåÎìú)]")
        print(f"ÏûëÌíà ID: {keyword_list[0]['series_id']}")
        print(f"ÌÇ§ÏõåÎìú: {keyword_list[0]['keyword_title']}")

üöÄ Ï†ÑÏ≤¥ ÏõπÌà∞ Î™©Î°ù(ÌéòÏù¥ÏßÄÎÑ§Ïù¥ÏÖò Ìè¨Ìï®) ÏàòÏßë ÏãúÏûë...


[ÏõîÏöîÏùº] ÏàòÏßë Ï§ë: 9page [00:03,  2.25page/s, ÎàÑÏ†Å=201]
[ÌôîÏöîÏùº] ÏàòÏßë Ï§ë: 10page [00:04,  2.39page/s, ÎàÑÏ†Å=231]
[ÏàòÏöîÏùº] ÏàòÏßë Ï§ë: 9page [00:03,  2.26page/s, ÎàÑÏ†Å=223]
[Î™©ÏöîÏùº] ÏàòÏßë Ï§ë: 9page [00:03,  2.25page/s, ÎàÑÏ†Å=217]
[Í∏àÏöîÏùº] ÏàòÏßë Ï§ë: 11page [00:04,  2.28page/s, ÎàÑÏ†Å=251]
[ÌÜ†ÏöîÏùº] ÏàòÏßë Ï§ë: 10page [00:04,  2.19page/s, ÎàÑÏ†Å=237]
[ÏùºÏöîÏùº] ÏàòÏßë Ï§ë: 8page [00:03,  2.28page/s, ÎàÑÏ†Å=193]



‚úÖ ÏàòÏßë ÏôÑÎ£å!
Ï¥ù ÏõπÌà∞ Í∞úÏàò (Ï§ëÎ≥µ Ìè¨Ìï® ÏöîÏùºÎ≥Ñ Ìï©ÏÇ∞): 1553
Í≥†Ïú† ÏûëÌíà ID Í∞úÏàò: 1497

üöÄ 2Îã®Í≥Ñ: ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÌÅ¨Î°§ÎßÅ ÏãúÏûë...


ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÏàòÏßë Ï§ë: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1497/1497 [06:31<00:00,  3.82it/s]


--- ‚úÖ ÏµúÏ¢Ö Í≤∞Í≥º ÏöîÏïΩ ---
**Webtoon_DF (Î©îÏù∏ Î™©Î°ù + Ï§ÑÍ±∞Î¶¨)**: Ï¥ù 1553Í∞ú Î†àÏΩîÎìú
**Author_DF (ÏûëÍ∞Ä Ïó≠Ìï† Ï†ïÎ≥¥)**: Ï¥ù 3981Í∞ú Î†àÏΩîÎìú

[Webtoon_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÏûëÌíà)]
Ï†úÎ™©: ÌöåÍ∑ÄÌïú ÏïÖÎÖÄÎäî ÏàúÍ≤∞ÏÑúÏïΩ Ìï©ÎãàÎã§
ÏöîÏùº: Ïõî
Ï§ÑÍ±∞Î¶¨: ÎºàÎåÄ ÏûàÎäî ÌõÑÏûë ÏßëÏïà, ÌéëÌéë Ïç®ÎèÑ ÎÇ®ÏùÑ Ïû¨Î¨º, Î∞òÎ∞òÌïú ÏñºÍµ¥ÍπåÏßÄ Í∞ÄÏßÑ Î¶¨ÎîîÏïÑ ÏóêÎ∞òÏãú.
Í∑∏ÎÖÄÍ∞Ä...

[Author_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÏûëÍ∞Ä)]
ÏûëÌíà ID: 56901633
Ïù¥Î¶Ñ: Í∑∏Î†â Îü¨Ïπ¥ (Í∏Ä)

**Keyword_DF (ÌÇ§ÏõåÎìú Ï†ïÎ≥¥)**: Ï¥ù 14788Í∞ú Î†àÏΩîÎìú

[Keyword_DF ÏòàÏãú (Ï≤´ Î≤àÏß∏ ÌÇ§ÏõåÎìú)]
ÏûëÌíà ID: 66813956
ÌÇ§ÏõåÎìú: ÌòÑÎåÄÎ°úÎß®Ïä§





In [10]:
# --- ÏµúÏ¢Ö Ï†ÄÏû•Îê† Î¶¨Ïä§Ìä∏ (DF Ïó≠Ìï†ÏùÑ Ìï† ÎîïÏÖîÎÑàÎ¶¨ Î¶¨Ïä§Ìä∏) ---
thumbnail_list = []      # ÏõπÌà∞ Î©îÏù∏ Ï†ïÎ≥¥ + Ï§ÑÍ±∞Î¶¨ (ÎÇòÏ§ëÏóê Î≥ëÌï©Îê®)

# --- Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÌÅ¨Î°§ÎßÅ Ìï®Ïàò ---
def crawl_thumbnail_info():
    """ÏàòÏßëÎêú Î™®Îì† series_idÏóê ÎåÄÌï¥ Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÌÅ¨Î°§ÎßÅÌï©ÎãàÎã§."""
    print("\nüöÄ 2Îã®Í≥Ñ: Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÌÅ¨Î°§ÎßÅ ÏãúÏûë...")

    detail_query = {
        "query":"\n    query contentHomeOverview($seriesId: Long!) {\n  contentHomeOverview(seriesId: $seriesId) {\n    id\n    seriesId\n    displayAd {\n      ...DisplayAd\n      ...DisplayAd\n    }\n    content {\n      ...SeriesFragment\n    }\n    displayAd {\n      ...DisplayAd\n    }\n    lastNoticeDate\n    setList {\n      ...NormalListViewItem\n    }\n    relatedSeries {\n      ...SeriesFragment\n    }\n  }\n}\n    \n    fragment DisplayAd on DisplayAd {\n  sectionUid\n  bannerUid\n  treviUid\n  momentUid\n}\n    \n\n    fragment SeriesFragment on Series {\n  id\n  seriesId\n  title\n  thumbnail\n  landThumbnail\n  categoryUid\n  lang\n  category\n  categoryType\n  subcategoryUid\n  subcategory\n  badge\n  isAllFree\n  isWaitfree\n  ageGrade\n  state\n  onIssue\n  authors\n  description\n  pubPeriod\n  freeSlideCount\n  lastSlideAddedDate\n  waitfreeBlockCount\n  waitfreePeriodByMinute\n  bm\n  saleState\n  startSaleDt\n  saleMethod\n  discountRate\n  discountRateText\n  serviceProperty {\n    ...ServicePropertyFragment\n  }\n  operatorProperty {\n    ...OperatorPropertyFragment\n  }\n  assetProperty {\n    ...AssetPropertyFragment\n  }\n  translateProperty {\n    ...TranslatePropertyFragment\n  }\n}\n    \n\n    fragment ServicePropertyFragment on ServiceProperty {\n  viewCount\n  readCount\n  ratingCount\n  ratingSum\n  commentCount\n  pageContinue {\n    ...ContinueInfoFragment\n  }\n  todayGift {\n    ...TodayGift\n  }\n  preview {\n    ...PreviewFragment\n    ...PreviewFragment\n  }\n  waitfreeTicket {\n    ...WaitfreeTicketFragment\n  }\n  isAlarmOn\n  isLikeOn\n  ticketCount\n  purchasedDate\n  lastViewInfo {\n    ...LastViewInfoFragment\n  }\n  purchaseInfo {\n    ...PurchaseInfoFragment\n  }\n  preview {\n    ...PreviewFragment\n  }\n  ticketInfo {\n    price\n    discountPrice\n    ticketType\n  }\n}\n    \n\n    fragment ContinueInfoFragment on ContinueInfo {\n  title\n  isFree\n  productId\n  lastReadProductId\n  scheme\n  continueProductType\n  hasNewSingle\n  hasUnreadSingle\n}\n    \n\n    fragment TodayGift on TodayGift {\n  id\n  uid\n  ticketType\n  ticketKind\n  ticketCount\n  ticketExpireAt\n  ticketExpiredText\n  isReceived\n  seriesId\n}\n    \n\n    fragment PreviewFragment on Preview {\n  item {\n    ...PreviewSingleFragment\n  }\n  nextItem {\n    ...PreviewSingleFragment\n  }\n  usingScroll\n}\n    \n\n    fragment PreviewSingleFragment on Single {\n  id\n  productId\n  seriesId\n  title\n  thumbnail\n  badge\n  isFree\n  ageGrade\n  state\n  slideType\n  lastReleasedDate\n  size\n  pageCount\n  isHidden\n  remainText\n  isWaitfreeBlocked\n  saleState\n  operatorProperty {\n    ...OperatorPropertyFragment\n  }\n  assetProperty {\n    ...AssetPropertyFragment\n  }\n}\n    \n\n    fragment OperatorPropertyFragment on OperatorProperty {\n  thumbnail\n  copy\n  helixImpId\n  isTextViewer\n  selfCensorship\n  isBook\n  cashInfo {\n    discountRate\n    setDiscountRate\n  }\n  ticketInfo {\n    price\n    discountPrice\n    ticketType\n  }\n}\n    \n\n    fragment AssetPropertyFragment on AssetProperty {\n  bannerImage\n  cardImage\n  cardTextImage\n  cleanImage\n  ipxVideo\n  bannerSet {\n    ...BannerSetFragment\n  }\n  cardSet {\n    ...CardSetFragment\n  }\n  cardCover {\n    ...CardCoverFragment\n  }\n}\n    \n\n    fragment BannerSetFragment on BannerSet {\n  backgroundImage\n  backgroundColor\n  mainImage\n  titleImage\n}\n    \n\n    fragment CardSetFragment on CardSet {\n  backgroundColor\n  backgroundImage\n}\n    \n\n    fragment CardCoverFragment on CardCover {\n  coverImg\n  coverRestricted\n}\n    \n\n    fragment WaitfreeTicketFragment on WaitfreeTicket {\n  chargedPeriod\n  chargedCount\n  chargedAt\n}\n    \n\n    fragment LastViewInfoFragment on LastViewInfo {\n  isDone\n  lastViewDate\n  rate\n  spineIndex\n}\n    \n\n    fragment PurchaseInfoFragment on PurchaseInfo {\n  purchaseType\n  rentExpireDate\n  expired\n}\n    \n\n    fragment TranslatePropertyFragment on TranslateProperty {\n  category {\n    ...LocaleMapFragment\n  }\n  sub_category {\n    ...LocaleMapFragment\n  }\n}\n    \n\n    fragment LocaleMapFragment on LocaleMap {\n  ko\n  en\n  th\n}\n    \n\n    fragment NormalListViewItem on NormalListViewItem {\n  id\n  type\n  altText\n  ticketUid\n  thumbnail\n  badgeList\n  ageGradeBadge\n  statusBadge\n  ageGrade\n  isAlaramOn\n  row1\n  row2\n  row3 {\n    id\n    metaList\n  }\n  row4\n  row5\n  scheme\n  continueScheme\n  nextProductScheme\n  continueData {\n    ...ContinueInfoFragment\n  }\n  seriesId\n  isCheckMode\n  isChecked\n  isReceived\n  isHelixGift\n  price\n  discountPrice\n  discountRate\n  discountRateText\n  showPlayerIcon\n  rank\n  isSingle\n  singleSlideType\n  ageGrade\n  selfCensorship\n  eventLog {\n    ...EventLogFragment\n  }\n  giftEventLog {\n    ...EventLogFragment\n  }\n}\n    \n\n    fragment EventLogFragment on EventLog {\n  fromGraphql\n  click {\n    layer1\n    layer2\n    setnum\n    ordnum\n    copy\n    imp_id\n    imp_provider\n  }\n  eventMeta {\n    id\n    name\n    subcategory\n    category\n    series\n    provider\n    series_id\n    type\n  }\n  viewimp_contents {\n    type\n    name\n    id\n    imp_area_ordnum\n    imp_id\n    imp_provider\n    imp_type\n    layer1\n    layer2\n  }\n  customProps {\n    landing_path\n    view_type\n    helix_id\n    helix_yn\n    helix_seed\n    content_cnt\n    event_series_id\n    event_ticket_type\n    play_url\n    banner_uid\n  }\n}\n    ",
        "variables":{"seriesId":0}
    }
    
        # Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ Ï†ÄÏû•ÏùÑ ÏúÑÌïú ÏûÑÏãú Î¶¨Ïä§Ìä∏
    temp_detail_list = [] 
    
    # Ï§ëÎ≥µ IDÎäî Ìïú Î≤àÎßå Ï≤òÎ¶¨
    for series_id in tqdm(list(all_series_ids), desc="Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÏàòÏßë Ï§ë"):
        try:
            detail_query["variables"]["seriesId"] = series_id
            
            response = requests.post(
                REQUEST_URL, 
                headers=HEADERS, 
                data=json.dumps(detail_query)
            )
            response.raise_for_status()
            data = response.json()
            
            thumbnail = data.get('data', {}).get('contentHomeOverview', {}).get('content', {}).get("thumbnail")
            
            if thumbnail:
                # A. Ï§ÑÍ±∞Î¶¨ Îì± Î©îÏù∏ Ï†ïÎ≥¥ (ÎÇòÏ§ëÏóê Webtoon_DFÏóê Î≥ëÌï©)
                temp_detail_list.append({
                    'series_id': series_id,
                    'thumbnail': thumbnail,
                })
            
            # ÏßßÏùÄ ÏßÄÏó∞ ÏãúÍ∞ÑÏùÑ ÎëêÏñ¥ ÏÑúÎ≤Ñ Î∂ÄÌïòÎ•º Ï§ÑÏûÑ
            time.sleep(0.1) 
            
        except Exception as e:
            print(f"[{series_id}] ÏÉÅÏÑ∏ ÌÅ¨Î°§ÎßÅ Ïò§Î•ò Î∞úÏÉù: {e}")
            time.sleep(1) # ÏóêÎü¨ Ïãú ÏßÄÏó∞ ÏãúÍ∞ÑÏùÑ ÎäòÎ†§ ÏûÑÏãú Ï∞®Îã® Î∞©ÏßÄ

    return temp_detail_list

# --- Ïã§Ìñâ Î∂ÄÎ∂Ñ ---
if __name__ == "__main__":
    # 3Îã®Í≥Ñ Ïã§Ìñâ: ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÏàòÏßë
    detail_data_list = crawl_thumbnail_info()
    print(detail_data_list)
    # --- 3. Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑ Íµ¨Ï°∞Ïóê ÎßûÍ≤å ÏµúÏ¢Ö Ï†ïÎ¶¨ ---
    
    # 1. Webtoon_DF: Î©îÏù∏ Î™©Î°ù + Ïç∏ÎÑ§Ïùº url Î≥ëÌï©
    description_map = {item['series_id']: item for item in detail_data_list}
    
    for webtoon in main_webtoon_list:
        series_id = webtoon['series_id']
        detail = description_map.get(series_id, {})
        
        # ÏÉÅÏÑ∏ Ï†ïÎ≥¥ ÌïÑÎìúÎ•º Î©îÏù∏ ÏõπÌà∞ Ï†ïÎ≥¥Ïóê Ï∂îÍ∞Ä
        webtoon['thumbnailUrl'] = detail.get('thumbnail')
        final_webtoon_df.append(webtoon)


üöÄ 2Îã®Í≥Ñ: Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÌÅ¨Î°§ÎßÅ ÏãúÏûë...


Ïç∏ÎÑ§Ïùº Ïù¥ÎØ∏ÏßÄ ÏàòÏßë Ï§ë: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1497/1497 [05:23<00:00,  4.62it/s]

[{'series_id': 56901633, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=cS2c9y/hyZ8xa8AiV/q6p2AjVzpIOkeDmgkP2rF1&filename=o1'}, {'series_id': 56901634, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=Vw5S3/hyZ8xWu42b/IqJmFoScjg7nKJOCjfv5q0&filename=o1'}, {'series_id': 66813956, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=bYiT6E/hAMB6jna4I/fUZZ5cYPOKIXPlclq1x1U0&filename=o1'}, {'series_id': 67141639, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=uISDn/hAN90PjZPr/dK82f6x6uqv4kcZ2bKk741&filename=o1'}, {'series_id': 67477518, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=bejGJy/dJMcaj8l12I/WjunyepOHLbTA1H1QUjQq0&filename=o1'}, {'series_id': 51396640, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=WiTPL/hyksMAw1hV/qqTK8oy7rk9kgfR7VGKZz0&filename=o1'}, {'series_id': 60907554, 'thumbnail': '//page-images.kakaoentcdn.com/download/resource?kid=nhRcV/dJMb89SkkHi/Y98Ie9eJTmBm5EQXE




In [11]:
from collections import defaultdict

by_series = defaultdict(list)
for item in keyword_list:
    sid = item["series_id"]
    kw  = item["keyword_title"]
    by_series[sid].append(kw)

rows = []
for sid, kws in by_series.items():
    rows.append({
        "series_id": sid,
        "keywords": ", ".join(kws),   # Î¶¨Ïä§Ìä∏ Í∑∏ÎåÄÎ°ú ÎëêÍ≥† Ïã∂ÏúºÎ©¥ kws Î°ú
    })
    
df_keywords = pd.DataFrame(rows).sort_values("series_id").reset_index(drop=True)
df_keywords.head()

Unnamed: 0,series_id,keywords
0,47196143,"Í≤åÏûÑ, Ï†ÑÎ¨∏ÏßÅ, Ìì®Ï†Ñ, Ï†ÑÏüÅ, ÏÉùÏ°¥, ÍπΩÌåê, Î®ºÏπòÌÇ®, ÏãúÏä§ÌÖú, Ï≤úÏû¨, ÎÖ∏Î†•, ÏÑ±Ïû•,..."
1,47497773,"Ìì®Ï†Ñ, Î®ºÏπòÌÇ®, Ï¥àÎä•Î†•"
2,47936014,"Ï≤úÏû¨, Í≤ÄÏÇ¨, ÎßàÎ≤ïÏÇ¨, Í≤©Ìà¨Í∞Ä, Ïö©Î≥ë, Ï†ïÎ†πÏà†ÏÇ¨, Ïö©ÏÇ¨, ÌûêÎü¨"
3,48475105,"Ï†ïÌÜµ, Î¨¥Î¶º, Ï†ÑÏüÅ, Í≥§Î•ú, Î¨¥Îãπ, Ï†ïÌåå, ÏÑ±Ïû•, Ï≤úÏû¨, ÎèÑÏÇ¨, ÎßàÍµê"
4,48683376,"ÌåêÌÉÄÏßÄÎ¨º, Ïã†Î†π, Ï¥àÏõîÏ†ÅÏ°¥Ïû¨, ÏπúÍµ¨, ÌïôÏÉù, ÎèôÎ¨º, Ïù¥ÏïºÍ∏∞Ï§ëÏã¨"


In [12]:
# (Í∞ÄÏ†ï) Ïù¥Ï†ÑÏóê ÏÑ±Í≥µÏ†ÅÏúºÎ°ú ÏàòÏßëÎêú author_role_list ÏÇ¨Ïö©
Author_DF_Raw = pd.DataFrame(author_role_list)

def split_and_clean_authors_final_v3(df, role_key):
    """
    ÎÖºÎ¶¨ ÏàúÏÑúÎ•º ÏôÑÎ≤ΩÌïòÍ≤å ÏàòÏ†ïÌïú Î≤ÑÏ†ÑÏûÖÎãàÎã§.
    1. Î∂ÑÎ¶¨ -> 2. Í∏∞Ï°¥ Ïª¨Îüº ÏÇ≠Ï†ú -> 3. Ïª¨ÎüºÎ™Ö Î≥ÄÍ≤Ω
    """
    # 1. Ïó≠Ìï† ÌïÑÌÑ∞ÎßÅ
    filtered_df = df[df['role'] == role_key].copy()
    
    # 2. Ïù¥Î¶Ñ Î∂ÑÎ¶¨ (ÏûÑÏãú Ïª¨Îüº 'name_temp' ÏÉùÏÑ±)
    filtered_df['name_temp'] = filtered_df['name'].str.split(', ')
    
    # 3. Ìñâ Î∂ÑÎ¶¨ (Explode)
    exploded_df = filtered_df.explode('name_temp').reset_index(drop=True)
    
    # 4. [Ï§ëÏöî] Î∂àÌïÑÏöîÌïú Í∏∞Ï°¥ Ïª¨Îüº Î®ºÏ†Ä ÏÇ≠Ï†ú
    # - 'name': ÏΩ§ÎßàÎ°ú Î≠âÏ≥êÏûàÎçò ÏõêÎ≥∏ Ïù¥Î¶Ñ
    # - 'author_id': UUID ÏóêÎü¨Í∞Ä ÎÇ¨Îçò ÏãùÎ≥ÑÏûê
    exploded_df = exploded_df.drop(columns=['name', 'author_id'])
    
    # 5. [Ï§ëÏöî] ÏûÑÏãú Ïª¨ÎüºÏùò Ïù¥Î¶ÑÏùÑ ÏµúÏ¢Ö Ïù¥Î¶Ñ('name')ÏúºÎ°ú Î≥ÄÍ≤Ω
    final_df = exploded_df.rename(columns={'name_temp': 'name'})
    
    # 6. Ïª¨Îüº ÏàúÏÑú Ï†ïÎ†¨
    return final_df[['series_id', 'name', 'role', 'role_display']]

# --- Ïã§Ìñâ ---

print("--- ‚úÖ ÏûëÍ∞Ä Ï†ïÎ≥¥ Î∂ÑÎ¶¨ Î∞è Ï†ïÏ†ú (V3) ---")

# 1. Í∏Ä ÏûëÍ∞Ä
Writer_DF = split_and_clean_authors_final_v3(Author_DF_Raw, 'writer')
print(f"\n[Í∏Ä ÏûëÍ∞Ä] Ï¥ù {len(Writer_DF)}Î™Ö")
if not Writer_DF.empty:
    print(Writer_DF.head())

# 2. Í∑∏Î¶º ÏûëÍ∞Ä
Painter_DF = split_and_clean_authors_final_v3(Author_DF_Raw, 'illustrator')
print(f"\n[Í∑∏Î¶º ÏûëÍ∞Ä] Ï¥ù {len(Painter_DF)}Î™Ö")
if not Painter_DF.empty:
    print(Painter_DF.head())

# 3. ÏõêÏûë ÏûëÍ∞Ä
Original_Author_DF = split_and_clean_authors_final_v3(Author_DF_Raw, 'original_author')
print(f"\n[ÏõêÏûë ÏûëÍ∞Ä] Ï¥ù {len(Original_Author_DF)}Î™Ö")
if not Original_Author_DF.empty:
    print(Original_Author_DF.head())

--- ‚úÖ ÏûëÍ∞Ä Ï†ïÎ≥¥ Î∂ÑÎ¶¨ Î∞è Ï†ïÏ†ú (V3) ---

[Í∏Ä ÏûëÍ∞Ä] Ï¥ù 1499Î™Ö
   series_id       name    role role_display
0   56901633      Í∑∏Î†â Îü¨Ïπ¥  writer            Í∏Ä
1   56901634  ÌîºÌÑ∞ J. ÌÜ†ÎßàÏãú  writer            Í∏Ä
2   66813956        Îç§Î≤ôÏù¥  writer            Í∏Ä
3   66813956         ÌîåÎûë  writer            Í∏Ä
4   67141639         ÏÑ§Í∏∞  writer            Í∏Ä

[Í∑∏Î¶º ÏûëÍ∞Ä] Ï¥ù 1644Î™Ö
   series_id     name         role role_display
0   56901633    Î¶¨Ïïî ÏÉ§ÌîÑ  illustrator           Í∑∏Î¶º
1   56901634  Ìå®Ìä∏Î¶≠ Í∏ÄÎ¶¨Ïä®  illustrator           Í∑∏Î¶º
2   66813956      Îç§Î≤ôÏù¥  illustrator           Í∑∏Î¶º
3   67141639       Ïù¥Îß§  illustrator           Í∑∏Î¶º
4   67477518      Ìï®ÎçïÏù¥  illustrator           Í∑∏Î¶º

[ÏõêÏûë ÏûëÍ∞Ä] Ï¥ù 1148Î™Ö
   series_id name             role role_display
0   66813956   Ïù¥ÎÖ∏  original_author           ÏõêÏûë
1   67141639  Î¨∏Ï†ïÎØº  original_author           ÏõêÏûë
2   60907554   ÏãúÏïº  original_author           

In [21]:
Writer_DF.to_csv("kp_writer.csv", index=False)
Painter_DF.to_csv("kp_painter.csv", index=False)
Original_Author_DF.to_csv("kp_original.csv", index=False)

In [13]:
kp_df = pd.DataFrame(final_webtoon_df)
kp_df.head()

Unnamed: 0,series_id,title,url,category,views,weekday,description,publisher,age_limit_detail,thumbnailUrl
0,68059016,ÌöåÍ∑ÄÌïú ÏïÖÎÖÄÎäî ÏàúÍ≤∞ÏÑúÏïΩ Ìï©ÎãàÎã§,https://page.kakao.com/content/68059016,Î°úÌåê,99.9Îßå,Ïõî,"ÎºàÎåÄ ÏûàÎäî ÌõÑÏûë ÏßëÏïà, ÌéëÌéë Ïç®ÎèÑ ÎÇ®ÏùÑ Ïû¨Î¨º, Î∞òÎ∞òÌïú ÏñºÍµ¥ÍπåÏßÄ Í∞ÄÏßÑ Î¶¨ÎîîÏïÑ ÏóêÎ∞òÏãú....",NHN,All,//page-images.kakaoentcdn.com/download/resourc...
1,61822163,ÏïÖÎãπÎì§ÏóêÍ≤å ÌÇ§ÏõåÏßÄÎäî Ï§ëÏûÖÎãàÎã§,https://page.kakao.com/content/61822163,Î°úÌåê,"4,436.4Îßå",Ïõî,Ï¶êÍ≤®Î≥¥Îçò Î°úÌåê ÏÜåÏÑ§Ïùò ÏóëÏä§Ìä∏Îùº ÏÜåÎÖÄÎ°ú ÎπôÏùòÌïú ÏßÄ 1ÎÖÑ ÎßåÏóê ÎÇ¥Í∞Ä Í≥ß Ï´ìÍ≤®ÎÇ† Ïã†ÏÑ∏ÎùºÎäî...,ÌÖåÎùºÌïÄ,All,//page-images.kakaoentcdn.com/download/resourc...
2,63818432,Îû≠Ïª§Î•º ÏúÑÌïú Î∞îÎ•∏ ÏÉùÌôú ÏïàÎÇ¥ÏÑú,https://page.kakao.com/content/63818432,Î°úÌåê,"1,745.7Îßå",Ïõî,"21ÏÑ∏Í∏∞, ÏÑ∏ÏÉÅÏùò Ïû•Î•¥Îäî 'ÌåêÌÉÄÏßÄ'\nÏò® Íµ≠ÎØºÎì§ÏóêÍ≤å Ï∂©Í≤©Í≥º Í≥µÌè¨Î•º ÏïàÍ≤®Ï§Ä 'ÏïÖÎ™ΩÏùò ...",Ïó∞Îã¥XÌÅ¨Îûô,All,//page-images.kakaoentcdn.com/download/resourc...
3,61614855,Ï≤†ÌòàÍ≤ÄÍ∞Ä ÏÇ¨ÎÉ•Í∞úÏùò ÌöåÍ∑Ä,https://page.kakao.com/content/61614855,ÌåêÌÉÄÏßÄ,"4,255.8Îßå",Ïõî,Î∞îÏä§Ïª§Îπå Í∞ÄÎ¨∏Ïùò ÏÇ¨ÎÉ•Í∞ú 'ÎπÑÌÇ§Î•¥'.\n\nÏÇ¨ÎÉ•Í∞úÎ°úÏÑú Í∞ÄÎ¨∏Ïóê Ï∂©ÏÑ±Ìïú Î≥¥ÎãµÏùÄ \nÎ™®Ìï®Í≥º...,redicestudio,All,//page-images.kakaoentcdn.com/download/resourc...
4,56566288,Ïù¥Î≤à ÏÉùÏùÄ Í∞ÄÏ£ºÍ∞Ä ÎêòÍ≤†ÏäµÎãàÎã§,https://page.kakao.com/content/56566288,Î°úÌåê,2Ïñµ,Ïõî,"‚Äú‚Ä¶ÌïúÎ≤à Ìï¥ Î≥¥Ïûê. ÎÇ¥Í∞Ä Í∞ÄÏ£ºÍ∞Ä ÎêòÎäî Í±∞Ïïº.‚Äù\n\nÏ†úÍµ≠ÏóêÏÑú Ï†úÏùºÍ∞ÄÎäî Í∞ÄÎ¨∏, Î°¨Î∞îÎ•¥...",ÎîîÏï§Ïî®ÎØ∏ÎîîÏñ¥ | ÎîîÏï§Ïî®ÏõπÌà∞,All,//page-images.kakaoentcdn.com/download/resourc...


In [14]:
df_merged = (
    kp_df.groupby(
        ["series_id", "title", "url", "category", "views",
         "thumbnailUrl", "description", "publisher", "age_limit_detail"],
        as_index=False
    )["weekday"]
    .agg(lambda s: ", ".join(sorted(set(s))))
)

len(df_merged)

1497

In [15]:
kp_final_df = df_merged.merge(df_keywords, on="series_id", how="left")

def combine_cat(row):
    base = str(row["category"]).strip()           # Ïòà: "Î°úÌåê"
    kws  = str(row["keywords"]).strip()
    if not kws or kws == "nan":
        return base
    return f"{base}, {kws}"

kp_final_df["category"] = kp_final_df.apply(combine_cat, axis=1)

# ÌïÑÏöîÌïòÎ©¥ keywords Ïª¨ÎüºÏùÄ ÎìúÎ°≠
kp_final_df = kp_final_df.drop(columns=["keywords"])
kp_final_df.head()

Unnamed: 0,series_id,title,url,category,views,thumbnailUrl,description,publisher,age_limit_detail,weekday
0,46609100,ÏùÄÎèôÏä§+ÏòπÎèôÏä§,https://page.kakao.com/content/46609100,ÎìúÎùºÎßà,"3,388.5Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïò§Îπ†ÏôÄ Ïù¥Î≥ÑÌñàÏßÄÎßå Í∑ÄÏóΩÍ≥† ÏàúÏàòÌïú ÏûëÏùÄ Í≥†ÏñëÏù¥ ÏùÄÎèôÏùÄ Ïò§ÎäòÎèÑ ÏóÑÎßàÏôÄ Ìï®Íªò Î∞ùÍ≥† Ïî©Ïî©Ìïò...,snowcat,All,Í∏à
1,47196143,Îã¨ÎπõÏ°∞Í∞ÅÏÇ¨,https://page.kakao.com/content/47196143,"ÌåêÌÉÄÏßÄ, Í≤åÏûÑ, Ï†ÑÎ¨∏ÏßÅ, Ìì®Ï†Ñ, Ï†ÑÏüÅ, ÏÉùÏ°¥, ÍπΩÌåê, Î®ºÏπòÌÇ®, ÏãúÏä§ÌÖú, Ï≤úÏû¨, ÎÖ∏Î†•...",3.4Ïñµ,//page-images.kakaoentcdn.com/download/resourc...,"ÏïÑÎ•¥Î∞îÏù¥Ìä∏Î•º Ï†ÑÏ†ÑÌïòÎ©∞ Ïñ¥Î†µÍ≤å ÏÇ¥Îçò ÏÜåÎÖÑÍ∞ÄÏû• Ïù¥ÌòÑ, Í∑∏Í∞Ä ‚ÄòÏúÑÎìú‚ÄôÍ∞Ä ÎêòÏñ¥ Í∞ÄÏÉÅÌòÑÏã§Í≤åÏûÑ...",Ïù∏ÌÉÄÏûÑ,All,Ìôî
2,47497773,Ìä∏Î†àÏù¥Ïä§ - Ïã†Îì§Ïùò ÏãúÏÑ†,https://page.kakao.com/content/47497773,"Ïï°ÏÖò, Ìì®Ï†Ñ, Î®ºÏπòÌÇ®, Ï¥àÎä•Î†•","4,191.7Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïñ¥ÎäêÎÇ† Í∞ëÏûêÍ∏∞ ÏÑ∏ÏÉÅÏóê ÎÇòÌÉÄÎÇò Ïù∏Í∞ÑÎì§ÏùÑ Í≥µÍ≤©ÌïòÍ∏∞ ÏãúÏûëÌïú Ï†ïÏ≤¥Î∂àÎ™ÖÏùò Ï°¥Ïû¨ Ìä∏Îü¨Î∏î.\nÌä∏...,Ïπ¥Ïπ¥Ïò§ÏõπÌà∞Ïä§ÌäúÎîîÏò§,All,Ìôî
3,47740585,ÎÇòÏùò Í∑∏ÎÖÄÎäî Íµ¨ÎØ∏Ìò∏,https://page.kakao.com/content/47740585,ÌåêÌÉÄÏßÄ,"3,434.9Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïñ¥ÎäêÎÇ† ÎÇòÌÉÄÎÇú ÏÇ¨ÎûåÏùÑ Îπ®ÏïÑ Î®πÎäî ÎÇòÎ¨¥ÏöîÍ¥¥ ÎØ∏ÎÖÄ..\nÍ∑∏Î¶¨Í≥† ÏñëÍ∏∞Í∞Ä Ï∂©ÎßåÌïú Ï£ºÏù∏Í≥µ Î¥â...,Îã§Ïò®ÌÅ¨Î¶¨ÏóêÏù¥Ìã∞Î∏å,Fifteen,Ïàò
4,47936014,Ïã†Ïù∏ÏôïÏ¢å,https://page.kakao.com/content/47936014,"ÌåêÌÉÄÏßÄ, Ï≤úÏû¨, Í≤ÄÏÇ¨, ÎßàÎ≤ïÏÇ¨, Í≤©Ìà¨Í∞Ä, Ïö©Î≥ë, Ï†ïÎ†πÏà†ÏÇ¨, Ïö©ÏÇ¨, ÌûêÎü¨","4,985.7Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïú†Î™Ö ÌåêÌÉÄÏßÄ ÏÜåÏÑ§ÏùÑ ÎßåÌôîÌôî Ìïú ÏûëÌíà!\n\nÎßàÏ°±Ïù¥ Í∞ïÏÑ∏ÌïòÍ≥† Ïù∏Î•òÎäî Î©∏Ï¢Ö ÏúÑÍ∏∞Ïóê ÏßÅÎ©¥...,Îã§Ïò®ÌÅ¨Î¶¨ÏóêÏù¥Ìã∞Î∏å,All,ÌÜ†


In [16]:
wirters_pivot = (
    Writer_DF
    .groupby(["series_id", "role"])["name"]
    .agg(lambda s: ", ".join(sorted(set(s))))
    .unstack(fill_value="")         # roleÏùÑ Ïª¨ÎüºÏúºÎ°ú ÌîºÎ≤ó
    .reset_index()
)

kp_final_df = kp_final_df.merge(wirters_pivot, on="series_id", how="left")

In [17]:
painters_pivot = (
    Painter_DF
    .groupby(["series_id", "role"])["name"]
    .agg(lambda s: ", ".join(sorted(set(s))))
    .unstack(fill_value="")         # roleÏùÑ Ïª¨ÎüºÏúºÎ°ú ÌîºÎ≤ó
    .reset_index()
)

kp_final_df = kp_final_df.merge(painters_pivot, on="series_id", how="left")

In [18]:
original_pivot = (
    Original_Author_DF
    .groupby(["series_id", "role"])["name"]
    .agg(lambda s: ", ".join(sorted(set(s))))
    .unstack(fill_value="")         # roleÏùÑ Ïª¨ÎüºÏúºÎ°ú ÌîºÎ≤ó
    .reset_index()
)

kp_final_df = kp_final_df.merge(original_pivot, on="series_id", how="left")

In [19]:
kp_final_df

Unnamed: 0,series_id,title,url,category,views,thumbnailUrl,description,publisher,age_limit_detail,weekday,writer,illustrator,original_author
0,46609100,ÏùÄÎèôÏä§+ÏòπÎèôÏä§,https://page.kakao.com/content/46609100,ÎìúÎùºÎßà,"3,388.5Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïò§Îπ†ÏôÄ Ïù¥Î≥ÑÌñàÏßÄÎßå Í∑ÄÏóΩÍ≥† ÏàúÏàòÌïú ÏûëÏùÄ Í≥†ÏñëÏù¥ ÏùÄÎèôÏùÄ Ïò§ÎäòÎèÑ ÏóÑÎßàÏôÄ Ìï®Íªò Î∞ùÍ≥† Ïî©Ïî©Ìïò...,snowcat,All,Í∏à,Ïä§ÎÖ∏Ïö∞Ï∫£,Ïä§ÎÖ∏Ïö∞Ï∫£,
1,47196143,Îã¨ÎπõÏ°∞Í∞ÅÏÇ¨,https://page.kakao.com/content/47196143,"ÌåêÌÉÄÏßÄ, Í≤åÏûÑ, Ï†ÑÎ¨∏ÏßÅ, Ìì®Ï†Ñ, Ï†ÑÏüÅ, ÏÉùÏ°¥, ÍπΩÌåê, Î®ºÏπòÌÇ®, ÏãúÏä§ÌÖú, Ï≤úÏû¨, ÎÖ∏Î†•...",3.4Ïñµ,//page-images.kakaoentcdn.com/download/resourc...,"ÏïÑÎ•¥Î∞îÏù¥Ìä∏Î•º Ï†ÑÏ†ÑÌïòÎ©∞ Ïñ¥Î†µÍ≤å ÏÇ¥Îçò ÏÜåÎÖÑÍ∞ÄÏû• Ïù¥ÌòÑ, Í∑∏Í∞Ä ‚ÄòÏúÑÎìú‚ÄôÍ∞Ä ÎêòÏñ¥ Í∞ÄÏÉÅÌòÑÏã§Í≤åÏûÑ...",Ïù∏ÌÉÄÏûÑ,All,Ìôî,"ÍπÄÏ§ÄÌòï, Ïù¥ÎèÑÍ≤Ω","ÍπÄÌÉúÌòï, Î∞ïÏ†ïÏó¥, Ïã†C",ÎÇ®Ìù¨ÏÑ±
2,47497773,Ìä∏Î†àÏù¥Ïä§ - Ïã†Îì§Ïùò ÏãúÏÑ†,https://page.kakao.com/content/47497773,"Ïï°ÏÖò, Ìì®Ï†Ñ, Î®ºÏπòÌÇ®, Ï¥àÎä•Î†•","4,191.7Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïñ¥ÎäêÎÇ† Í∞ëÏûêÍ∏∞ ÏÑ∏ÏÉÅÏóê ÎÇòÌÉÄÎÇò Ïù∏Í∞ÑÎì§ÏùÑ Í≥µÍ≤©ÌïòÍ∏∞ ÏãúÏûëÌïú Ï†ïÏ≤¥Î∂àÎ™ÖÏùò Ï°¥Ïû¨ Ìä∏Îü¨Î∏î.\nÌä∏...,Ïπ¥Ïπ¥Ïò§ÏõπÌà∞Ïä§ÌäúÎîîÏò§,All,Ìôî,ÎÑ§Ïä§Ìã∞Ï∫£,ÎÑ§Ïä§Ìã∞Ï∫£,
3,47740585,ÎÇòÏùò Í∑∏ÎÖÄÎäî Íµ¨ÎØ∏Ìò∏,https://page.kakao.com/content/47740585,ÌåêÌÉÄÏßÄ,"3,434.9Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïñ¥ÎäêÎÇ† ÎÇòÌÉÄÎÇú ÏÇ¨ÎûåÏùÑ Îπ®ÏïÑ Î®πÎäî ÎÇòÎ¨¥ÏöîÍ¥¥ ÎØ∏ÎÖÄ..\nÍ∑∏Î¶¨Í≥† ÏñëÍ∏∞Í∞Ä Ï∂©ÎßåÌïú Ï£ºÏù∏Í≥µ Î¥â...,Îã§Ïò®ÌÅ¨Î¶¨ÏóêÏù¥Ìã∞Î∏å,Fifteen,Ïàò,ÌîÑÎ¶¨,ÏÑúÎùΩ,
4,47936014,Ïã†Ïù∏ÏôïÏ¢å,https://page.kakao.com/content/47936014,"ÌåêÌÉÄÏßÄ, Ï≤úÏû¨, Í≤ÄÏÇ¨, ÎßàÎ≤ïÏÇ¨, Í≤©Ìà¨Í∞Ä, Ïö©Î≥ë, Ï†ïÎ†πÏà†ÏÇ¨, Ïö©ÏÇ¨, ÌûêÎü¨","4,985.7Îßå",//page-images.kakaoentcdn.com/download/resourc...,Ïú†Î™Ö ÌåêÌÉÄÏßÄ ÏÜåÏÑ§ÏùÑ ÎßåÌôîÌôî Ìïú ÏûëÌíà!\n\nÎßàÏ°±Ïù¥ Í∞ïÏÑ∏ÌïòÍ≥† Ïù∏Î•òÎäî Î©∏Ï¢Ö ÏúÑÍ∏∞Ïóê ÏßÅÎ©¥...,Îã§Ïò®ÌÅ¨Î¶¨ÏóêÏù¥Ìã∞Î∏å,All,ÌÜ†,Sariel,Sariel,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1492,68144406,Ïù¥Î¶¨Ï¥àÌååÎûë,https://page.kakao.com/content/68144406,"Î¨¥Ìòë, Ï†ïÌÜµ, ÏÉùÏ°¥, Î≥µÏàò, Î¨¥Î¶º, Ï†ïÌåå, ÌòëÍ∞ù",1.6Îßå,//page-images.kakaoentcdn.com/download/resourc...,"""Î≥¥Íµ≠Íµ¨ÎåÄÎ¨∏(‰øùÂúã‰πùÂ§ßÈñÄ)Ïùò ÏòÅÏõÖÎì§Ïù¥Ïó¨---!!‚Äù\n\nÎ©∏Íµ≠Ïùò ÏúÑÍ∏∞Ïóê ÏùòÍ∏∞Î°ú ÏùºÏñ¥ÏÑ†\...",ÎØ∏Ïä§ÌÑ∞Î∏îÎ£®,Fifteen,Í∏à,ÏÇ¨ÎßàÎã¨,ÏÇ¨ÎßàÎã¨,
1493,68148725,Î∂âÏùÄ Îã¨Î∞§Ïùò ÎÅùÏûêÎùΩÏóêÏÑú,https://page.kakao.com/content/68148725,"BL, ÎèôÏñëÌíç, Ïö¥Î™Ö, Îã§Ï†ï, ÏÇ¨ÎûëÍæº, Ï†àÎåÄÍ∞ï, ÏßëÏ∞©, ÏÜåÏã¨, ÏïÑÎ∞©, ÏÜåÏú†Ïöï, Ïö¥Î™Ö...",12Îßå,//page-images.kakaoentcdn.com/download/resourc...,"Ï†ÄÏ£ºÎ∞õÏùÄ ÏïÑÏù¥Îùº ÏÜêÍ∞ÄÎùΩÏßà Î∞õÏúºÎ©∞ Ïô∏Î°≠Í≤å ÏÇ¥ÏïÑÏò® ÏûêÏö¥.\n\nÏñ¥Îäê ÎÇ†, Í¥¥ÏßàÏù¥ Ïò® Îßà...",Ïó∞Îã¥ | ÌïÑÎã¥,Fifteen,ÌÜ†,Ïò§Í≥µÏãú,Ïò§Í≥µÏãú,
1494,68148783,Î∂âÏùÄ Îã¨Î∞§Ïùò ÎÅùÏûêÎùΩÏóêÏÑú [19ÏÑ∏ ÏôÑÏ†ÑÌåê],https://page.kakao.com/content/68148783,"BL, ÎèôÏñëÌíç, Ïö¥Î™Ö, Îã§Ï†ï, ÏÇ¨ÎûëÍæº, Ï†àÎåÄÍ∞ï, ÏßëÏ∞©, ÏÜåÏã¨, ÏïÑÎ∞©, ÏÜåÏú†Ïöï, ÏßàÌà¨...",4Îßå,//page-images.kakaoentcdn.com/download/resourc...,"Ï†ÄÏ£ºÎ∞õÏùÄ ÏïÑÏù¥Îùº ÏÜêÍ∞ÄÎùΩÏßà Î∞õÏúºÎ©∞ Ïô∏Î°≠Í≤å ÏÇ¥ÏïÑÏò® ÏûêÏö¥.\n\nÏñ¥Îäê ÎÇ†, Í¥¥ÏßàÏù¥ Ïò® Îßà...",Ïó∞Îã¥ | ÌïÑÎã¥,Nineteen,ÌÜ†,Ïò§Í≥µÏãú,Ïò§Í≥µÏãú,
1495,68187457,ÎìúÎûòÍ≥§ÏùÑ Ïú†Í¥¥ÌïòÎã§,https://page.kakao.com/content/68187457,"ÌåêÌÉÄÏßÄ, Ìì®Ï†Ñ, ÌöåÍ∑Ä, Î®ºÏπòÌÇ®, Ïú°ÏïÑ, ÏÑ±Ïû•, Ïù¥Ï¢ÖÏ°±",25.6Îßå,//page-images.kakaoentcdn.com/download/resourc...,ÏµúÍ∞ïÏùò ÌûòÏùÑ Í∞ÄÏßÑ ÌöåÍ∑ÄÏûê Ïú†ÏßÄÌÉú.\nÍ∑∏ÎßåÌïú ÌûòÏùÑ Í∞ÄÏßÄÍ≥† Í±∞Îì≠ ÌöåÍ∑ÄÎ•º ÌñàÏùåÏóêÎèÑ ÏßÄÍµ¨Ïùò...,MSTORYHUB8125,Fifteen,Î™©,ÏµúÏßÑÌòï,Ï£ºÌô©ÏÉà,Ïú†Ï£º


In [20]:
kp_final_df2 = kp_final_df.drop(["views", "publisher"], axis=1)

In [21]:
kp_final_df2.to_csv("kakao_page_webtoons.csv", index=False)

In [40]:
kp_final_df2

Unnamed: 0,series_id,title,url,category,thumbnail_url,description,age_limit_detail,weekday,writer,illustrator,original_author
0,46609100,ÏùÄÎèôÏä§+ÏòπÎèôÏä§,https://page.kakao.com/content/46609100,ÎìúÎùºÎßà,//dn-img-page.kakao.com/download/resource?kid=...,Ïò§Îπ†ÏôÄ Ïù¥Î≥ÑÌñàÏßÄÎßå Í∑ÄÏóΩÍ≥† ÏàúÏàòÌïú ÏûëÏùÄ Í≥†ÏñëÏù¥ ÏùÄÎèôÏùÄ Ïò§ÎäòÎèÑ ÏóÑÎßàÏôÄ Ìï®Íªò Î∞ùÍ≥† Ïî©Ïî©Ìïò...,All,Í∏à,Ïä§ÎÖ∏Ïö∞Ï∫£,Ïä§ÎÖ∏Ïö∞Ï∫£,
1,47196143,Îã¨ÎπõÏ°∞Í∞ÅÏÇ¨,https://page.kakao.com/content/47196143,"ÌåêÌÉÄÏßÄ, Í≤åÏûÑ, Ï†ÑÎ¨∏ÏßÅ, Ìì®Ï†Ñ, Ï†ÑÏüÅ, ÏÉùÏ°¥, ÍπΩÌåê, Î®ºÏπòÌÇ®, ÏãúÏä§ÌÖú, Ï≤úÏû¨, ÎÖ∏Î†•...",//dn-img-page.kakao.com/download/resource?kid=...,"ÏïÑÎ•¥Î∞îÏù¥Ìä∏Î•º Ï†ÑÏ†ÑÌïòÎ©∞ Ïñ¥Î†µÍ≤å ÏÇ¥Îçò ÏÜåÎÖÑÍ∞ÄÏû• Ïù¥ÌòÑ, Í∑∏Í∞Ä ‚ÄòÏúÑÎìú‚ÄôÍ∞Ä ÎêòÏñ¥ Í∞ÄÏÉÅÌòÑÏã§Í≤åÏûÑ...",All,Ìôî,"ÍπÄÏ§ÄÌòï, Ïù¥ÎèÑÍ≤Ω","ÍπÄÌÉúÌòï, Î∞ïÏ†ïÏó¥, Ïã†C",ÎÇ®Ìù¨ÏÑ±
2,47497773,Ìä∏Î†àÏù¥Ïä§ - Ïã†Îì§Ïùò ÏãúÏÑ†,https://page.kakao.com/content/47497773,"Ïï°ÏÖò, Ìì®Ï†Ñ, Î®ºÏπòÌÇ®, Ï¥àÎä•Î†•",//dn-img-page.kakao.com/download/resource?kid=...,Ïñ¥ÎäêÎÇ† Í∞ëÏûêÍ∏∞ ÏÑ∏ÏÉÅÏóê ÎÇòÌÉÄÎÇò Ïù∏Í∞ÑÎì§ÏùÑ Í≥µÍ≤©ÌïòÍ∏∞ ÏãúÏûëÌïú Ï†ïÏ≤¥Î∂àÎ™ÖÏùò Ï°¥Ïû¨ Ìä∏Îü¨Î∏î.\nÌä∏...,All,Ìôî,ÎÑ§Ïä§Ìã∞Ï∫£,ÎÑ§Ïä§Ìã∞Ï∫£,
3,47740585,ÎÇòÏùò Í∑∏ÎÖÄÎäî Íµ¨ÎØ∏Ìò∏,https://page.kakao.com/content/47740585,ÌåêÌÉÄÏßÄ,//dn-img-page.kakao.com/download/resource?kid=...,Ïñ¥ÎäêÎÇ† ÎÇòÌÉÄÎÇú ÏÇ¨ÎûåÏùÑ Îπ®ÏïÑ Î®πÎäî ÎÇòÎ¨¥ÏöîÍ¥¥ ÎØ∏ÎÖÄ..\nÍ∑∏Î¶¨Í≥† ÏñëÍ∏∞Í∞Ä Ï∂©ÎßåÌïú Ï£ºÏù∏Í≥µ Î¥â...,Fifteen,Ïàò,ÌîÑÎ¶¨,ÏÑúÎùΩ,
4,47936014,Ïã†Ïù∏ÏôïÏ¢å,https://page.kakao.com/content/47936014,"ÌåêÌÉÄÏßÄ, Ï≤úÏû¨, Í≤ÄÏÇ¨, ÎßàÎ≤ïÏÇ¨, Í≤©Ìà¨Í∞Ä, Ïö©Î≥ë, Ï†ïÎ†πÏà†ÏÇ¨, Ïö©ÏÇ¨, ÌûêÎü¨",//dn-img-page.kakao.com/download/resource?kid=...,Ïú†Î™Ö ÌåêÌÉÄÏßÄ ÏÜåÏÑ§ÏùÑ ÎßåÌôîÌôî Ìïú ÏûëÌíà!\n\nÎßàÏ°±Ïù¥ Í∞ïÏÑ∏ÌïòÍ≥† Ïù∏Î•òÎäî Î©∏Ï¢Ö ÏúÑÍ∏∞Ïóê ÏßÅÎ©¥...,All,ÌÜ†,Sariel,Sariel,
...,...,...,...,...,...,...,...,...,...,...,...
1493,68138491,ÎÇ¥Í∞Ä Ï£ºÏù∏Í≥µÏù∏ Ï§Ñ ÏïåÏïòÎã§,https://page.kakao.com/content/68138491,"Î°úÌåê, Î°úÎß®Ïä§ÌåêÌÉÄÏßÄ, Ïò§Ìï¥Î¨º, ÎπôÏùòÎ¨º, Îä•Í∏ÄÎÇ®, Ï∏§Îç∞Î†àÎÇ®, ÏßëÏ∞©ÎÇ®, Îã§Ï†ïÎÇ®, ÏôïÏ°±/...",//dn-img-page.kakao.com/download/resource?kid=...,Î°úÌåê ÏÜåÏÑ§Ïóê ÎπôÏùòÌñàÎã§.\nÏù¥Î¶ÑÏ°∞Ï∞® Îì±Ïû•ÌïòÏßÄ ÏïäÎäî ÏóëÏä§Ìä∏ÎùºÎ°ú.\n\nÏù¥Í≤å Î∞îÎ°ú ÏóëÏä§Ìä∏...,All,Ïàò,ÏÜîÎùº,ÎπÑÎ°ú,ÏÑúÍ∞ÄÎ¶∞
1494,68144406,Ïù¥Î¶¨Ï¥àÌååÎûë,https://page.kakao.com/content/68144406,"Î¨¥Ìòë, Ï†ïÌÜµ, ÏÉùÏ°¥, Î≥µÏàò, Î¨¥Î¶º, Ï†ïÌåå, ÌòëÍ∞ù",//dn-img-page.kakao.com/download/resource?kid=...,"""Î≥¥Íµ≠Íµ¨ÎåÄÎ¨∏(‰øùÂúã‰πùÂ§ßÈñÄ)Ïùò ÏòÅÏõÖÎì§Ïù¥Ïó¨---!!‚Äù\n\nÎ©∏Íµ≠Ïùò ÏúÑÍ∏∞Ïóê ÏùòÍ∏∞Î°ú ÏùºÏñ¥ÏÑ†\...",Fifteen,Í∏à,ÏÇ¨ÎßàÎã¨,ÏÇ¨ÎßàÎã¨,
1495,68148725,Î∂âÏùÄ Îã¨Î∞§Ïùò ÎÅùÏûêÎùΩÏóêÏÑú,https://page.kakao.com/content/68148725,"BL, ÎèôÏñëÌíç, Ïö¥Î™Ö, Îã§Ï†ï, ÏÇ¨ÎûëÍæº, Ï†àÎåÄÍ∞ï, ÏßëÏ∞©, ÏÜåÏã¨, ÏïÑÎ∞©, ÏÜåÏú†Ïöï, Ïö¥Î™Ö...",//dn-img-page.kakao.com/download/resource?kid=...,"Ï†ÄÏ£ºÎ∞õÏùÄ ÏïÑÏù¥Îùº ÏÜêÍ∞ÄÎùΩÏßà Î∞õÏúºÎ©∞ Ïô∏Î°≠Í≤å ÏÇ¥ÏïÑÏò® ÏûêÏö¥.\n\nÏñ¥Îäê ÎÇ†, Í¥¥ÏßàÏù¥ Ïò® Îßà...",Fifteen,ÌÜ†,Ïò§Í≥µÏãú,Ïò§Í≥µÏãú,
1496,68148783,Î∂âÏùÄ Îã¨Î∞§Ïùò ÎÅùÏûêÎùΩÏóêÏÑú [19ÏÑ∏ ÏôÑÏ†ÑÌåê],https://page.kakao.com/content/68148783,"BL, ÎèôÏñëÌíç, Ïö¥Î™Ö, Îã§Ï†ï, ÏÇ¨ÎûëÍæº, Ï†àÎåÄÍ∞ï, ÏßëÏ∞©, ÏÜåÏã¨, ÏïÑÎ∞©, ÏÜåÏú†Ïöï, ÏßàÌà¨...",//dn-img-page.kakao.com/download/resource?kid=...,"Ï†ÄÏ£ºÎ∞õÏùÄ ÏïÑÏù¥Îùº ÏÜêÍ∞ÄÎùΩÏßà Î∞õÏúºÎ©∞ Ïô∏Î°≠Í≤å ÏÇ¥ÏïÑÏò® ÏûêÏö¥.\n\nÏñ¥Îäê ÎÇ†, Í¥¥ÏßàÏù¥ Ïò® Îßà...",Nineteen,ÌÜ†,Ïò§Í≥µÏãú,Ïò§Í≥µÏãú,
