In [None]:
import requests
import json
from datetime import datetime
import pandas as pd
import time

class CtripCrawler:
    def __init__(self):
        self.base_url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Content-Type': 'application/json',
            'Referer': 'https://m.ctrip.com/',
            'Origin': 'https://m.ctrip.com'
        }

    def _create_payload(self, page_index, poi_id):
        return {
            "arg": {
                "channelType": 2,
                "collapseType": 0,
                "commentTagId": 0,
                "pageIndex": page_index,
                "pageSize": 10,
                "poiId": poi_id,
                "sourceType": 1,
                "sortType": 3,
                "starType": 0
            },
            "head": {
                "cid": "09031058213352606128",
                "ctok": "",
                "cver": "1.0",
                "lang": "01",
                "sid": "8888",
                "syscode": "09",
                "auth": "",
                "xsid": "",
                "extension": []
            }
        }

    def _parse_review(self, review):
        """Parse a single review"""
        try:
            publish_time = datetime.fromtimestamp(
                int(review.get('publishTime', '').replace('/Date(', '').replace('+0800)/', '')) / 1000
            ).strftime('%Y-%m-%d %H:%M:%S')
        except:
            publish_time = None

        images = []
        for img in review.get('images', []):
            images.append(img.get('imageSrcUrl'))

        scores = {}
        for score in review.get('scores', []):
            scores[score.get('name')] = score.get('score')

        return {
            'comment_id': review.get('commentId'),
            'user_nick': review.get('userInfo', {}).get('userNick'),
            'user_member': review.get('userInfo', {}).get('userMember'),
            'content': review.get('content'),
            'publish_time': publish_time,
            'score': review.get('score'),
            'useful_count': review.get('usefulCount'),
            'reply_count': review.get('replyCount'),
            'images': images,
            'scores': scores
        }

    def crawl_reviews(self, poi_id, max_pages=10):
        """Crawl reviews for a specific POI"""
        all_reviews = []

        for page in range(1, max_pages + 1):
            try:
                payload = self._create_payload(page, poi_id)
                response = requests.post(
                    self.base_url,
                    headers=self.headers,
                    json=payload
                )

                if response.status_code == 200:
                    data = response.json()

                    if data.get('code') == 200:
                        reviews = data.get('result', {}).get('items', [])

                        if not reviews:  # No more reviews
                            break

                        for review in reviews:
                            parsed_review = self._parse_review(review)
                            all_reviews.append(parsed_review)

                        print(f"Successfully crawled page {page}")
                    else:
                        print(f"Error in API response: {data.get('msg')}")
                        break

                else:
                    print(f"HTTP Error: {response.status_code}")
                    break

                time.sleep(2)  # Delay between requests to avoid rate limiting

            except Exception as e:
                print(f"Error crawling page {page}: {str(e)}")
                break

        return all_reviews

    def save_to_csv(self, reviews, filename):
        """Save reviews to CSV file"""
        df = pd.DataFrame(reviews)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"Saved {len(reviews)} reviews to {filename}")

# Usage example
if __name__ == "__main__":
    crawler = CtripCrawler()
    poi_id = 95261  # Example POI ID
    reviews = crawler.crawl_reviews(poi_id, max_pages=5)
    crawler.save_to_csv(reviews, f'ctrip_reviews_{poi_id}.csv')

Successfully crawled page 1
Successfully crawled page 2
Successfully crawled page 3
Successfully crawled page 4
Successfully crawled page 5
Saved 50 reviews to ctrip_reviews_95261.csv


In [None]:
# prompt: ctrip_reviews_95261.csv 파일을 판다스 데이터 프레임으로 가져오기

import pandas as pd

df = pd.read_csv('ctrip_reviews_95261.csv')

In [None]:
df

Unnamed: 0,comment_id,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores
0,118149860,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
1,154148658,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}"
2,162370051,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}"
3,158538016,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
4,159649599,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
5,175135680,小小指,,济州岛牧官衙区域是爱好cosplay的旅行者最喜欢去的地方，这里有很多家提供韩服租赁的小店，...,2023-03-09 15:47:58,5.0,1,0,['https://dimg04.c-ctrip.com/images/1mh0u12000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
6,161406152,小小指,,公交车站观德亭站下车，就在这边。门票1500韩币，儿童400韩币，地方不大，但挺有韩国特色。...,2019-10-06 15:00:52,5.0,1,0,['https://dimg04.c-ctrip.com/images/1006hk124g...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
7,113295799,晶晶-Anna,,济州以前的衙门，一个保存得很完整的韩式建筑群！,2017-07-25 03:21:47,5.0,3,0,['https://dimg04.c-ctrip.com/images/100j0i0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
8,59811965,月月鸟的小窝,钻石贵宾,济州市内景点不多，这个古迹算是不错的去处，占地并不大，走走逛逛半小时差不多看完！建筑都是韩国...,2015-11-03 23:01:42,5.0,2,0,['https://dimg04.c-ctrip.com/images/fd/tg/g3/M...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
9,154130751,E03****23,铂金贵宾,济州牧官衙有点像中式的古建筑，不是很大，是朝鲜时代济州岛的政治中心，曾经被损毁，现在看到的是...,2018-03-27 09:57:29,5.0,2,0,['https://dimg04.c-ctrip.com/images/10090q0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"


In [None]:
import requests
import json
import os
from datetime import datetime
import pandas as pd
import time
from urllib.parse import urlparse
from pathlib import Path

class CtripCrawler:
    def __init__(self):
        self.base_url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Content-Type': 'application/json',
            'Referer': 'https://m.ctrip.com/',
            'Origin': 'https://m.ctrip.com'
        }

    def _create_payload(self, page_index, poi_id):
        return {
            "arg": {
                "channelType": 2,
                "collapseType": 0,
                "commentTagId": 0,
                "pageIndex": page_index,
                "pageSize": 10,
                "poiId": poi_id,
                "sourceType": 1,
                "sortType": 3,
                "starType": 0
            },
            "head": {
                "cid": "09031058213352606128",
                "ctok": "",
                "cver": "1.0",
                "lang": "01",
                "sid": "8888",
                "syscode": "09",
                "auth": "",
                "xsid": "",
                "extension": []
            }
        }

    def _parse_datetime(self, date_str):
        try:
            timestamp = int(date_str.replace('/Date(', '').replace('+0800)/', ''))
            return datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
        except:
            return None

    def _download_image(self, url, save_dir, comment_id):
        try:
            response = requests.get(url)
            if response.status_code == 200:
                # Extract file extension from URL
                ext = os.path.splitext(urlparse(url).path)[1]
                if not ext:
                    ext = '.jpg'  # Default extension

                # Create filename using comment_id and counter
                filename = f"{comment_id}_{int(time.time() * 1000)}{ext}"
                filepath = os.path.join(save_dir, filename)

                # Save image
                with open(filepath, 'wb') as f:
                    f.write(response.content)
                return filepath
        except Exception as e:
            print(f"Error downloading image {url}: {str(e)}")
        return None

    def _parse_review(self, review, download_images=False, save_dir=None):
        """Parse a single review with enhanced information"""
        parsed = {
            'comment_id': review.get('commentId'),
            'resource_id': review.get('resourceId'),
            'business_id': review.get('businessId'),
            'district_id': review.get('districtId'),

            # User information
            'user_id': review.get('userInfo', {}).get('userId'),
            'user_nick': review.get('userInfo', {}).get('userNick'),
            'user_member': review.get('userInfo', {}).get('userMember'),
            'user_member_code': review.get('userInfo', {}).get('userMemberCode'),

            # Content
            'content': review.get('content'),
            'language_type': review.get('languageType'),
            'translate_content': review.get('translateContent'),

            # Timestamps
            'publish_time': self._parse_datetime(review.get('publishTime', '')),

            # Metrics
            'score': review.get('score'),
            'useful_count': review.get('usefulCount'),
            'reply_count': review.get('replyCount'),
            'collect_count': review.get('collectCnt'),

            # Status flags
            'is_good': review.get('isGood'),
            'is_picked': review.get('isPicked'),
            'has_collected': review.get('hasCollected'),
            'publish_status': review.get('publishStatus')
        }

        # Process scores
        scores = {}
        for score in review.get('scores', []):
            scores[score.get('name')] = score.get('score')
        parsed['scores'] = json.dumps(scores, ensure_ascii=False)

        # Process images
        images = []
        saved_images = []
        for img in review.get('images', []):
            image_info = {
                'id': img.get('imageId'),
                'height': img.get('height'),
                'width': img.get('width'),
                'url': img.get('imageSrcUrl')
            }
            images.append(image_info)

            # Download images if requested
            if download_images and save_dir and img.get('imageSrcUrl'):
                saved_path = self._download_image(
                    img.get('imageSrcUrl'),
                    save_dir,
                    review.get('commentId')
                )
                if saved_path:
                    saved_images.append(saved_path)

        parsed['images'] = json.dumps(images, ensure_ascii=False)
        parsed['saved_image_paths'] = json.dumps(saved_images, ensure_ascii=False) if saved_images else None

        return parsed

    def crawl_reviews(self, poi_id, max_pages=10, download_images=False):
        """Crawl reviews with option to download images"""
        all_reviews = []

        # Create directory for images if needed
        save_dir = None
        if download_images:
            save_dir = os.path.join('ctrip_images', f'poi_{poi_id}')
            Path(save_dir).mkdir(parents=True, exist_ok=True)

        for page in range(1, max_pages + 1):
            try:
                print(f"Crawling page {page}...")
                payload = self._create_payload(page, poi_id)
                response = requests.post(
                    self.base_url,
                    headers=self.headers,
                    json=payload
                )

                if response.status_code == 200:
                    data = response.json()

                    if data.get('code') == 200:
                        reviews = data.get('result', {}).get('items', [])

                        if not reviews:
                            print("No more reviews found.")
                            break

                        for review in reviews:
                            parsed_review = self._parse_review(
                                review,
                                download_images=download_images,
                                save_dir=save_dir
                            )
                            all_reviews.append(parsed_review)

                        print(f"Successfully crawled {len(reviews)} reviews from page {page}")
                    else:
                        print(f"Error in API response: {data.get('msg')}")
                        break

                else:
                    print(f"HTTP Error: {response.status_code}")
                    break

                time.sleep(2)  # Rate limiting

            except Exception as e:
                print(f"Error crawling page {page}: {str(e)}")
                break

        return all_reviews

    def save_to_csv(self, reviews, filename):
        """Save reviews to CSV file with proper encoding"""
        df = pd.DataFrame(reviews)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"Saved {len(reviews)} reviews to {filename}")

def main():
    crawler = CtripCrawler()

    # Configuration
    poi_id = 38808730  # Example POI ID
    max_pages = 5
    download_images = True  # Set to True to download images

    # Crawl reviews
    reviews = crawler.crawl_reviews(
        poi_id=poi_id,
        max_pages=max_pages,
        download_images=download_images
    )

    # Save results
    output_file = f'ctrip_reviews_{poi_id}_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    crawler.save_to_csv(reviews, output_file)

if __name__ == "__main__":
    main()

Crawling page 1...
Successfully crawled 10 reviews from page 1
Crawling page 2...
Successfully crawled 4 reviews from page 2
Crawling page 3...
No more reviews found.
Saved 14 reviews to ctrip_reviews_38808730_20250121_120709.csv


In [None]:
# prompt: ctrip_reviews_95261.csv 파일을 판다스 데이터 프레임으로 불러오기 이름은 df2로

df2 = pd.read_csv('ctrip_reviews_95261.csv')

In [None]:
df2

Unnamed: 0,comment_id,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores
0,118149860,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
1,154148658,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}"
2,162370051,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}"
3,158538016,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
4,159649599,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
5,175135680,小小指,,济州岛牧官衙区域是爱好cosplay的旅行者最喜欢去的地方，这里有很多家提供韩服租赁的小店，...,2023-03-09 15:47:58,5.0,1,0,['https://dimg04.c-ctrip.com/images/1mh0u12000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
6,161406152,小小指,,公交车站观德亭站下车，就在这边。门票1500韩币，儿童400韩币，地方不大，但挺有韩国特色。...,2019-10-06 15:00:52,5.0,1,0,['https://dimg04.c-ctrip.com/images/1006hk124g...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
7,113295799,晶晶-Anna,,济州以前的衙门，一个保存得很完整的韩式建筑群！,2017-07-25 03:21:47,5.0,3,0,['https://dimg04.c-ctrip.com/images/100j0i0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
8,59811965,月月鸟的小窝,钻石贵宾,济州市内景点不多，这个古迹算是不错的去处，占地并不大，走走逛逛半小时差不多看完！建筑都是韩国...,2015-11-03 23:01:42,5.0,2,0,['https://dimg04.c-ctrip.com/images/fd/tg/g3/M...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"
9,154130751,E03****23,铂金贵宾,济州牧官衙有点像中式的古建筑，不是很大，是朝鲜时代济州岛的政治中心，曾经被损毁，现在看到的是...,2018-03-27 09:57:29,5.0,2,0,['https://dimg04.c-ctrip.com/images/10090q0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}"


In [None]:
import requests
import json
import pandas as pd
from datetime import datetime
import time
import random

class CtripCrawler:
    def __init__(self):
        self.base_url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }

    def _generate_payload(self, page_index, poi_id):
        return {
            "arg": {
                "channelType": 2,
                "collapseType": 0,
                "commentTagId": 0,
                "pageIndex": page_index,
                "pageSize": 10,
                "poiId": poi_id,
                "sourceType": 1,
                "sortType": 3,
                "starType": 0
            },
            "head": {
                "cid": "09031058213352606128",
                "ctok": "",
                "cver": "1.0",
                "lang": "01",
                "sid": "8888",
                "syscode": "09",
                "auth": "",
                "xsid": "",
                "extension": []
            }
        }

    def _parse_review(self, review):
        """Parse individual review data"""
        parsed_data = {
            'comment_id': review.get('commentId'),
            'user_nick': review.get('userInfo', {}).get('userNick'),
            'user_member': review.get('userInfo', {}).get('userMember'),
            'score': review.get('score'),
            'content': review.get('content'),
            'publish_time': self._parse_date(review.get('publishTime')),
            'useful_count': review.get('usefulCount'),
            'reply_count': review.get('replyCount'),
            'images_count': len(review.get('images', [])),
            'image_urls': [img.get('imageSrcUrl') for img in review.get('images', [])]
        }

        # Parse scores if available
        scores = review.get('scores', [])
        for score in scores:
            score_name = score.get('name')
            if score_name:
                parsed_data[f'score_{score_name}'] = score.get('score')

        return parsed_data

    def _parse_date(self, date_str):
        """Parse Ctrip date format to readable datetime"""
        if date_str and '/Date(' in date_str:
            timestamp = int(date_str.replace('/Date(', '').replace('+0800)/', '')) / 1000
            return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
        return None

    def crawl_reviews(self, poi_id, max_pages=10):
        """Crawl reviews for a specific POI"""
        all_reviews = []

        for page in range(1, max_pages + 1):
            try:
                payload = self._generate_payload(page, poi_id)
                response = requests.post(
                    self.base_url,
                    headers=self.headers,
                    json=payload
                )

                if response.status_code == 200:
                    data = response.json()
                    reviews = data.get('result', {}).get('items', [])

                    if not reviews:
                        break

                    for review in reviews:
                        parsed_review = self._parse_review(review)
                        all_reviews.append(parsed_review)

                    print(f"Successfully crawled page {page}")

                    # Random delay between requests
                    time.sleep(random.uniform(1, 3))
                else:
                    print(f"Failed to get page {page}: Status code {response.status_code}")
                    break

            except Exception as e:
                print(f"Error crawling page {page}: {str(e)}")
                break

        return all_reviews

    def save_to_csv(self, reviews, filename='ctrip_reviews.csv'):
        """Save reviews to CSV file"""
        df = pd.DataFrame(reviews)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"Saved {len(reviews)} reviews to {filename}")

def main():
    # Initialize crawler
    crawler = CtripCrawler()

    # Example POI ID (can be changed)
    poi_id = 101904

    # Crawl reviews
    reviews = crawler.crawl_reviews(poi_id, max_pages=5)

    # Save to CSV
    crawler.save_to_csv(reviews, f'ctrip_reviews_{poi_id}.csv')

if __name__ == "__main__":
    main()

Successfully crawled page 1
Successfully crawled page 2
Successfully crawled page 3
Successfully crawled page 4
Successfully crawled page 5
Saved 50 reviews to ctrip_reviews_101904.csv


In [None]:
# prompt: ctrip_reviews_101904.csv 파일을 판다스로 불러오기 이름은 df3

import pandas as pd

df3 = pd.read_csv('ctrip_reviews_101904.csv')
df3

Unnamed: 0,comment_id,user_nick,user_member,score,content,publish_time,useful_count,reply_count,images_count,image_urls,score_景色,score_趣味,score_性价比
0,158768954,_WeCh****58078,铂金贵宾,5.0,就是了解海苔海带的地方，海带味道还是不错的，一包160，回家泡泡熬汤喝，不过没买，海苔各种口...,2019-05-11 01:41:10,3,0,8,['https://dimg04.c-ctrip.com/images/1004140000...,4.0,4.0,3.0
1,93144093,Totolong127,黄金贵宾,4.0,紫菜博物馆就是一个购物点，讲解人员也是中国人，参观上楼，介绍各种味道的海苔。试吃各种味道草莓...,2016-12-12 15:01:02,6,0,3,['https://dimg04.c-ctrip.com/images/100o0b0000...,3.0,5.0,4.0
2,74258841,182****9502,,5.0,里面海苔的种类很多，很不错，挺好吃的，还可以穿韩服拍照挺好的，一定要尽快，不然衣服都不好看了,2016-05-17 05:29:47,0,0,5,['https://dimg04.c-ctrip.com/images/fd/tg/g6/M...,5.0,5.0,5.0
3,90039083,甜心,黄金贵宾,5.0,紫菜馆很棒，有人带领讲解缘由，还可以带领大家现场制作海苔，品尝海苔、还有韩服体验，非常有意思...,2016-10-22 20:57:33,4,0,1,['https://dimg04.c-ctrip.com/images/1003090000...,5.0,5.0,5.0
4,155098414,龟龟蔡,,5.0,紫菜博物馆一般跟团的话都会有的景点，游客可以在这里购买手信，可以看到紫菜的制作过程，当然如果...,2018-06-01 03:14:20,1,0,3,['https://dimg04.c-ctrip.com/images/100q0o0000...,5.0,5.0,5.0
5,159198683,whoisangel,铂金贵宾,5.0,这个博物馆的面积不算大里面几乎所有的工作人员都是中国人感觉是他们为我们中国人开设的博物馆，大...,2019-06-07 06:17:19,0,0,3,['https://dimg04.c-ctrip.com/images/1003150000...,5.0,5.0,5.0
6,84096719,_CFT01****3994902,,5.0,有免费韩服可穿，自己拍照，紫菜口味也很多，绿茶味味道不错，多买几包还有送，孩子很喜欢。,2016-07-27 16:51:31,0,0,4,['https://dimg04.c-ctrip.com/images/100b060000...,4.0,5.0,4.0
7,86570600,纽约漫时光,钻石贵宾,4.0,紫菜博物展示馆就在体育馆里，分割出几个屋子，在里面会教大家做泡菜，品尝泡菜，介绍紫菜，然后出...,2016-08-25 13:31:56,2,0,1,['https://dimg04.c-ctrip.com/images/100n070000...,4.0,4.0,4.0
8,159304123,小思文,,4.0,济州岛的好产品之一就是紫菜，有专门的紫菜的展示馆，介绍紫菜的开采和加工的过程，紫菜品种都是很...,2019-06-15 01:29:13,1,0,3,['https://dimg04.c-ctrip.com/images/100r150000...,3.0,5.0,4.0
9,91754442,__赛__,,5.0,紫菜展览馆就在一个体育馆里面，其实是旅游团专属的购物点之一 里面有人教大家做紫菜也有一些微缩...,2016-11-19 03:33:16,2,0,1,['https://dimg04.c-ctrip.com/images/100q0a0000...,5.0,5.0,5.0


In [None]:
# prompt: df,df2,df3를 하나의 데이터 프레임으로 머지 데이터 프레임이름은 hanfu

import pandas as pd

# Assuming df, df2, and df3 are already defined as pandas DataFrames
# from the provided code.

try:
  hanfu = pd.concat([df, df2, df3], ignore_index=True)
  print(hanfu)
except NameError:
  print("One or more of the dataframes (df, df2, df3) are not defined.")
except Exception as e:
  print(f"An error occurred: {e}")

     comment_id       user_nick user_member  \
0     118149860             海星j        钻石贵宾   
1     154148658  M515shunyi1618         NaN   
2     162370051           超级ctt        黄金贵宾   
3     158538016     M30****3226         NaN   
4     159649599         笑看人生218         NaN   
..          ...             ...         ...   
145    97952664      M16****288        黄金贵宾   
146    84468231   _NET****46026         NaN   
147   105997783            寒潭秋月         NaN   
148    70456967          米米米米米啦        黄金贵宾   
149   152360263         太行山下小江南         NaN   

                                               content         publish_time  \
0    济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...  2017-08-04 14:44:37   
1    济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...  2018-03-28 12:46:05   
2    牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...  2019-12-22 02:54:44   
3    在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...  2019-04-25 14:34:51   
4    济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭

In [None]:
hanfu


Unnamed: 0,comment_id,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores,images_count,image_urls,score_景色,score_趣味,score_性价比
0,118149860,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
1,154148658,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",,,,,
2,162370051,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",,,,,
3,158538016,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
4,159649599,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,97952664,M16****288,黄金贵宾,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,2017-02-26 01:04:50,5.0,0,0,,,0.0,[],5.0,5.0,5.0
146,84468231,_NET****46026,,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,2016-08-16 03:46:58,4.0,0,0,,,0.0,[],4.0,4.0,4.0
147,105997783,寒潭秋月,,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,2017-05-09 14:43:55,5.0,0,0,,,0.0,[],5.0,5.0,5.0
148,70456967,米米米米米啦,黄金贵宾,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,2016-01-28 15:54:55,4.0,1,0,,,0.0,[],4.0,5.0,4.0


In [None]:
hanfu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   comment_id    150 non-null    int64  
 1   user_nick     150 non-null    object 
 2   user_member   68 non-null     object 
 3   content       150 non-null    object 
 4   publish_time  150 non-null    object 
 5   score         150 non-null    float64
 6   useful_count  150 non-null    int64  
 7   reply_count   150 non-null    int64  
 8   images        100 non-null    object 
 9   scores        100 non-null    object 
 10  images_count  50 non-null     float64
 11  image_urls    50 non-null     object 
 12  score_景色      47 non-null     float64
 13  score_趣味      47 non-null     float64
 14  score_性价比     47 non-null     float64
dtypes: float64(5), int64(3), object(7)
memory usage: 17.7+ KB


In [None]:
hanfu.head()

Unnamed: 0,comment_id,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores,images_count,image_urls,score_景色,score_趣味,score_性价比
0,118149860,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
1,154148658,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",,,,,
2,162370051,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",,,,,
3,158538016,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
4,159649599,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,


In [None]:
# prompt: "publish_time"을 날짜 타입으로 변경

# Convert 'publish_time' column to datetime objects
hanfu['publish_time'] = pd.to_datetime(hanfu['publish_time'], errors='coerce')

# Display info to check if the conversion was successful
hanfu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   comment_id    150 non-null    int64         
 1   user_nick     150 non-null    object        
 2   user_member   68 non-null     object        
 3   content       150 non-null    object        
 4   publish_time  150 non-null    datetime64[ns]
 5   score         150 non-null    float64       
 6   useful_count  150 non-null    int64         
 7   reply_count   150 non-null    int64         
 8   images        100 non-null    object        
 9   scores        100 non-null    object        
 10  images_count  50 non-null     float64       
 11  image_urls    50 non-null     object        
 12  score_景色      47 non-null     float64       
 13  score_趣味      47 non-null     float64       
 14  score_性价比     47 non-null     float64       
dtypes: datetime64[ns](1), float64(5), int64(

In [None]:
hanfu.head()

Unnamed: 0,comment_id,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores,images_count,image_urls,score_景色,score_趣味,score_性价比
0,118149860,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
1,154148658,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",,,,,
2,162370051,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",,,,,
3,158538016,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,
4,159649599,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",,,,,


In [None]:
# prompt: "publish_time" 컬럼을 기반으로 년,월,날 컬럼생성

# Extract year, month, and day from the 'publish_time' column
hanfu['year'] = hanfu['publish_time'].dt.year
hanfu['month'] = hanfu['publish_time'].dt.month
hanfu['day'] = hanfu['publish_time'].dt.day

In [None]:
hanfu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 18 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   comment_id    150 non-null    int64         
 1   user_nick     150 non-null    object        
 2   user_member   68 non-null     object        
 3   content       150 non-null    object        
 4   publish_time  150 non-null    datetime64[ns]
 5   score         150 non-null    float64       
 6   useful_count  150 non-null    int64         
 7   reply_count   150 non-null    int64         
 8   images        100 non-null    object        
 9   scores        100 non-null    object        
 10  images_count  50 non-null     float64       
 11  image_urls    50 non-null     object        
 12  score_景色      47 non-null     float64       
 13  score_趣味      47 non-null     float64       
 14  score_性价比     47 non-null     float64       
 15  year          150 non-null    int32     

In [None]:
# prompt: hanfu 데이터 프레임에서 "comment_id", "images_count","image_urls","score_景色","score_趣味","score_性价比" 컬럼 제거

# Drop specified columns
hanfu = hanfu.drop(columns=["comment_id", "images_count", "image_urls", "score_景色", "score_趣味", "score_性价比"], errors='ignore')

# Display the updated DataFrame
hanfu

Unnamed: 0,user_nick,user_member,content,publish_time,score,useful_count,reply_count,images,scores,year,month,day
0,海星j,钻石贵宾,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,2017-08-04 14:44:37,5.0,3,0,['https://dimg04.c-ctrip.com/images/100u0j0000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2017,8,4
1,M515shunyi1618,,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,2018-03-28 12:46:05,4.0,2,0,['https://dimg04.c-ctrip.com/images/100m0q0000...,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",2018,3,28
2,超级ctt,黄金贵宾,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,2019-12-22 02:54:44,4.0,0,0,['https://dimg04.c-ctrip.com/images/10061b0000...,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",2019,12,22
3,M30****3226,,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,2019-04-25 14:34:51,5.0,0,0,['https://dimg04.c-ctrip.com/images/100v130000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2019,4,25
4,笑看人生218,,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,2019-07-08 15:35:11,5.0,0,0,['https://dimg04.c-ctrip.com/images/100j160000...,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2019,7,8
...,...,...,...,...,...,...,...,...,...,...,...,...
145,M16****288,黄金贵宾,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,2017-02-26 01:04:50,5.0,0,0,,,2017,2,26
146,_NET****46026,,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,2016-08-16 03:46:58,4.0,0,0,,,2016,8,16
147,寒潭秋月,,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,2017-05-09 14:43:55,5.0,0,0,,,2017,5,9
148,米米米米米啦,黄金贵宾,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,2016-01-28 15:54:55,4.0,1,0,,,2016,1,28


In [None]:
# prompt: hanfu 데이터 프레임에서  "user_member", "publish_time", "images" 컬럼 제거

# Drop specified columns
hanfu = hanfu.drop(columns=["user_member", "publish_time", "images"], errors='ignore')

# Display the updated DataFrame
hanfu

Unnamed: 0,user_nick,content,score,useful_count,reply_count,scores,year,month,day
0,海星j,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,5.0,3,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2017,8,4
1,M515shunyi1618,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,4.0,2,0,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",2018,3,28
2,超级ctt,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,4.0,0,0,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",2019,12,22
3,M30****3226,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,5.0,0,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2019,4,25
4,笑看人生218,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,5.0,0,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2019,7,8
...,...,...,...,...,...,...,...,...,...
145,M16****288,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,5.0,0,0,,2017,2,26
146,_NET****46026,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,4.0,0,0,,2016,8,16
147,寒潭秋月,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,5.0,0,0,,2017,5,9
148,米米米米米啦,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,4.0,1,0,,2016,1,28


In [None]:
# prompt: ["Year"] 컬럼의 고유값을 보여줘

print(hanfu['year'].unique())

[2017 2018 2019 2023 2015 2016 2021]


In [None]:
# prompt: ['year'] 컬럼에서 2015, 2016, 2017을 2020 으로 변경

# ... (Your existing code)

# Replace 2015, 2016, 2017 in 'year' column with 2020
hanfu.loc[hanfu['year'].isin([2015, 2016, 2017]), 'year'] = 2020

print(hanfu['year'].unique())

# ... (Rest of your code)

[2020 2018 2019 2023 2021]


In [None]:
# prompt: ['year'] 컬럼에서 2018, 2019를 2022로 변경

# Replace 2018 and 2019 in 'year' column with 2022
hanfu.loc[hanfu['year'].isin([2018, 2019]), 'year'] = 2022

print(hanfu['year'].unique())

[2020 2022 2023 2021]


In [None]:
# prompt: df.head()

hanfu.head()

Unnamed: 0,user_nick,content,score,useful_count,reply_count,scores,year,month,day
0,海星j,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,5.0,3,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2020,8,4
1,M515shunyi1618,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,4.0,2,0,"{'景色': 4.0, '趣味': 4.0, '性价比': 4.0}",2022,3,28
2,超级ctt,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,4.0,0,0,"{'景色': 5.0, '趣味': 4.0, '性价比': 5.0}",2022,12,22
3,M30****3226,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,5.0,0,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2022,4,25
4,笑看人生218,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,5.0,0,0,"{'景色': 5.0, '趣味': 5.0, '性价比': 5.0}",2022,7,8


In [None]:
# prompt: hanfu 데이터 프레임에서 "useful_count", "reply_count", "scores", "day" 컬럼 제거

# Drop specified columns
hanfu = hanfu.drop(columns=["useful_count", "reply_count", "scores", "day"], errors='ignore')

# Display the updated DataFrame
hanfu

Unnamed: 0,user_nick,content,score,year,month
0,海星j,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,5.0,2020,8
1,M515shunyi1618,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,4.0,2022,3
2,超级ctt,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,4.0,2022,12
3,M30****3226,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,5.0,2022,4
4,笑看人生218,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,5.0,2022,7
...,...,...,...,...,...
145,M16****288,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,5.0,2020,2
146,_NET****46026,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,4.0,2020,8
147,寒潭秋月,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,5.0,2020,5
148,米米米米米啦,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,4.0,2020,1


In [None]:
# prompt: hanfu 데이터 프레임을  csv형식으로 로컬 컴퓨터에 저장 파일명은 hanbok

from google.colab import files
hanfu.to_csv('hanbok.csv', encoding='utf-8-sig')
files.download('hanbok.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# prompt: hanbok.csv 파일을 판다스 데이터 프레임으로 불러오기

import pandas as pd

hanbok = pd.read_csv('hanbok.csv')
hanbok

Unnamed: 0.1,Unnamed: 0,user_nick,content,score,year,month
0,0,海星j,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,5.0,2020,8
1,1,M515shunyi1618,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,4.0,2022,3
2,2,超级ctt,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,4.0,2022,12
3,3,M30****3226,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,5.0,2022,4
4,4,笑看人生218,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,5.0,2022,7
...,...,...,...,...,...,...
145,145,M16****288,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,5.0,2020,2
146,146,_NET****46026,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,4.0,2020,8
147,147,寒潭秋月,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,5.0,2020,5
148,148,米米米米米啦,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,4.0,2020,1


In [None]:
hanbok.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  150 non-null    int64  
 1   user_nick   150 non-null    object 
 2   content     150 non-null    object 
 3   score       150 non-null    float64
 4   year        150 non-null    int64  
 5   month       150 non-null    int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 7.2+ KB


In [None]:
# prompt: "unnamed" 컬럼 제거후 인덱스 초기화

# Remove unnamed columns and reset index
hanbok = hanbok.loc[:, ~hanbok.columns.str.contains('^Unnamed')]
hanbok = hanbok.reset_index(drop=True)
hanbok

Unnamed: 0,user_nick,content,score,year,month
0,海星j,济州牧官衙，作为朝鲜王朝（1392-1910）地方官员的衙门，在耽罗(Tamna)王朝期间一...,5.0,2020,8
1,M515shunyi1618,济州牧宫衙是朝鲜时期济州的衙门，这里曾经在战争中被损毁后来经过多次修整和重建，现在里面主要展...,4.0,2022,3
2,超级ctt,牧官衙是古时济州道的行政中心，经过多次的修缮和重建，基本保留了原有的风貌。 地方不大，麻雀虽...,4.0,2022,12
3,M30****3226,在东门市场附近，走路可到，环城旅游巴士也有这一站，去之前先看一下时间，四点以后貌似就不可以进...,5.0,2022,4
4,笑看人生218,济州牧官衙是济州岛蛮有名的的地标建筑，看门票便宜就进去看了看，建筑跟庭院都值得看看，在整个朝...,5.0,2022,7
...,...,...,...,...,...
145,M16****288,买了一千多块钱的海带和海苔，亲手制作了泡菜，还看到了韩服,5.0,2020,2
146,_NET****46026,紫菜博物馆对我来说没有一样想买的，我家乡就有各种紫菜海苔特产，何必大包小包往回运呢。孩子很喜欢。,4.0,2020,8
147,寒潭秋月,好吧好吧，实在没有办法就去看看紫菜博物馆吧。说不上好。,5.0,2020,5
148,米米米米米啦,各种味道的海苔品尝到撑，眼看着现场刚做好的海苔就塞进嘴巴里的感觉还是不错的,4.0,2020,1


In [None]:
# prompt: 최종 데이터 프레임을 로컬 컴퓨터에 저장 파일명은 Jeju

# Download the 'hanbok.csv' file to your local machine
files.download('hanbok.csv')