In [23]:
import pymongo
import pandas as pd
import math
from datetime import datetime

# import dask.dataframe as dd ## 대규모 데이터 처리
# import vaex ## 대규모 데이터 처리

import os
from dotenv import load_dotenv

In [2]:
# .env 파일에서 환경 변수 로드
load_dotenv('C:/py_src/awake/env')

# 환경 변수에서 MongoDB 연결 정보 가져오기
mongo_password = os.getenv('MONGO_PASSWORD')
mongo_user = os.getenv('MONGO_USER')

In [3]:
# MongoDB 연결 URL
url = f"mongodb+srv://{mongo_user}:{mongo_password}@meercat-external.udyfs.mongodb.net/?retryWrites=true&w=majority&appName=meercat-external"
client = pymongo.MongoClient(url, serverSelectionTimeoutMS=100000)

In [4]:
# 연결 확인
from pymongo.errors import ServerSelectionTimeoutError

try:
    # 데이터베이스 목록을 가져오는 쿼리
    databases = client.list_database_names()
    print("Connected successfully. Databases:", databases)
    
except ServerSelectionTimeoutError as err:
    print("Connection failed:", err)

Connected successfully. Databases: ['Test', 'admin', 'config', 'local']


In [6]:
# Test DB 선택
db = client.Test

# Test DB 컬렉션 확인
collections = db.list_collection_names()

In [8]:
# 단위 환산
def convert_bytes(num):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return f"{num:.2f} {x}"
        num /= 1024.0

In [9]:
for collection_name in collections:
    # 컬렉션 통계 정보 가져오기
    stats = db.command("collStats", collection_name)

    # 컬렉션의 크기와 문서 수 출력    
    print(f"Collection '{collection_name}' size: {convert_bytes(stats['size'])}")
    print(f"Collection '{collection_name}' document count: {stats['count']}")
    print(f"Total index size: {convert_bytes(stats['totalIndexSize'])}")
    print('--------------------------------------------------------------------')

Collection 'hashtag_hashtag' size: 13.39 MB
Collection 'hashtag_hashtag' document count: 3052
Total index size: 172.00 KB
--------------------------------------------------------------------
Collection 'youtube_revenue' size: 259.47 MB
Collection 'youtube_revenue' document count: 811499
Total index size: 46.44 MB
--------------------------------------------------------------------
Collection 'youtube_report_v2' size: 4.60 MB
Collection 'youtube_report_v2' document count: 5303
Total index size: 484.00 KB
--------------------------------------------------------------------
Collection 'influencer_datas' size: 7.78 GB
Collection 'influencer_datas' document count: 3340624
Total index size: 246.63 MB
--------------------------------------------------------------------
Collection 'youtube_channel_demographics' size: 59.67 MB
Collection 'youtube_channel_demographics' document count: 297683
Total index size: 4.69 MB
--------------------------------------------------------------------
Collection

In [10]:
## 인스타 관련 데이터 제외
## 'youtube_videos'/'youtube_datas' 따로 수집
## 'youtube_report_v2'/'youtube_report' 날짜 정보불분명, 다른 테이블 정보와 중복
collections_need = [
    'youtube_revenue',
    'youtube_channel_demographics',
    'youtube_users',
    'youtube_channel_locations',
    'youtube_daily_channel_basics',
    'youtube_subscriber',
    ]

In [11]:
# 데이터 로드
youtube_dict={}
for collection_name in collections_need:

    youtube_dict[collection_name] = pd.DataFrame(list(db[collection_name].find()))
    
    print(collection_name)
    print(youtube_dict[collection_name].columns)
    print(">> Success")
    print("--------------------------------------")
    print("")

youtube_revenue
Index(['_id', 'estimated_revenue', 'estimated_ad_revenue',
       'estimated_red_partner_revenue', 'gross_revenue', 'cpm',
       'ad_impressions', 'monetized_playbacks', 'playback_based_cpm',
       'youtube_user_id', 'data_created_at', '__v', 'created_at',
       'updated_at'],
      dtype='object')
>> Success
--------------------------------------

youtube_channel_demographics
Index(['_id', 'youtube_user_id', 'end_date', 'created_at', 'updated_at', '__v',
       'demographics'],
      dtype='object')
>> Success
--------------------------------------

youtube_users
Index(['_id', 'country', 'phone_num', 'kakao_nick', 'kakao_account_id',
       'user_kind', 'created_at', '__v', 'channel_title', 'channel_id',
       'thumbnail_url', 'published_at', 'subscriber_count', 'is_rev_saved',
       'is_subs_saved', 'updated_at', 'brandingSettings', 'contentDetails',
       'contentOwnerDetails', 'etag', 'id', 'kind', 'snippet', 'statistics',
       'status', 'topicDetails', 'con

In [12]:
# 키값 형식 맞추기
for key, df in youtube_dict.items():
    # 각 딕셔너리의 value가 DataFrame인지 확인
    if isinstance(df, pd.DataFrame):
        # '_id' 컬럼이 있는지 확인하고 문자열로 변환
        if '_id' in df.columns:
            df['_id'] = df['_id'].astype(str)
        
        # 'youtube_user_id' 컬럼이 있는지 확인하고 문자열로 변환
        if 'youtube_user_id' in df.columns:
            df['youtube_user_id'] = df['youtube_user_id'].astype(str)

        # 변경된 DataFrame을 다시 딕셔너리에 저장
        youtube_dict[key] = df

In [13]:
del df

In [14]:
youtube_dict.keys()

dict_keys(['youtube_revenue', 'youtube_channel_demographics', 'youtube_users', 'youtube_channel_locations', 'youtube_daily_channel_basics', 'youtube_subscriber'])

### youtube_revenue

In [15]:
youtube_revenue = youtube_dict['youtube_revenue']

In [16]:
len(youtube_revenue['youtube_user_id'].unique())
## 유튜버 계정 : 265개

265

In [17]:
# 계정별 일일 수익
num_col = youtube_revenue.select_dtypes(['float64','int64']).columns ## 숫자형 컬럼 추출
youtube_revenue = youtube_revenue.groupby(['youtube_user_id','data_created_at'])[num_col].sum().reset_index()
youtube_revenue = youtube_revenue.drop('__v', axis=1)

In [18]:
youtube_revenue

Unnamed: 0,youtube_user_id,data_created_at,estimated_revenue,estimated_ad_revenue,estimated_red_partner_revenue,gross_revenue,cpm,ad_impressions,monetized_playbacks,playback_based_cpm
0,627cb611aa6f212355e0b617,2013-07-31,0.000,0.0,0.000,0.0,0,0.0,0,0.0
1,627cb611aa6f212355e0b617,2013-08-01,0.000,0.0,0.000,0.0,0,0.0,0,0.0
2,627cb611aa6f212355e0b617,2013-08-02,0.000,0.0,0.000,0.0,0,0.0,0,0.0
3,627cb611aa6f212355e0b617,2013-08-03,0.000,0.0,0.000,0.0,0,0.0,0,0.0
4,627cb611aa6f212355e0b617,2013-08-04,0.000,0.0,0.000,0.0,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...
507258,66230ee6d8da110bb0744b2d,2024-04-29,32797.134,0.0,2522.759,0.0,0,0.0,0,0.0
507259,66230ee6d8da110bb0744b2d,2024-04-30,40680.753,0.0,2770.255,0.0,0,0.0,0,0.0
507260,66230ee6d8da110bb0744b2d,2024-05-01,35864.687,0.0,2579.477,0.0,0,0.0,0,0.0
507261,66230ee6d8da110bb0744b2d,2024-05-02,38089.134,0.0,2414.442,0.0,0,0.0,0,0.0


### youtube_report_v2

In [None]:
youtube_report_v2 = youtube_dict['youtube_report_v2']

In [None]:
len(youtube_report_v2['youtube_user_id'].unique())
## 유튜버 계정 : 276개

276

In [None]:
# del youtube_report_v2

In [21]:
# 필요 컬럼 추출
youtube_report_v2 = youtube_report_v2[['youtube_user_id','content','phone_number','requested']].drop_duplicates().sort_values('youtube_user_id').reset_index(drop=True)

In [22]:
# 날짜형식 변경
youtube_report_v2['date'] = youtube_report_v2['content'].str.split('\n').str[2].str.split(':').str[1].str.strip().str.split(' ').str[:2].apply(''.join)
youtube_report_v2['date'] = youtube_report_v2['date'].apply(lambda x : datetime.strptime(x, '%m월%d일').replace(year=2024))

In [23]:
# 필요 정보 추출
youtube_report_v2['subscriber_count'] = youtube_report_v2['content'].str.split('\n').str[5]
youtube_report_v2['views'] = youtube_report_v2['content'].str.split('\n').str[7]
youtube_report_v2['watched_sum'] = youtube_report_v2['content'].str.split('\n').str[9]
youtube_report_v2['watched_avg'] = youtube_report_v2['content'].str.split('\n').str[11]

youtube_report_v2 = youtube_report_v2.drop('content',axis=1)

In [24]:
# 최종 데이터셋 생성
youtube_report_v2 = youtube_report_v2[['youtube_user_id', 'date', 'phone_number', 'subscriber_count', 'views', 'watched_sum', 'watched_avg', 'requested']]
youtube_report_v2 = youtube_report_v2.sort_values(['youtube_user_id','date']).drop_duplicates().reset_index(drop=True)

### youtube_channel_demographics
- 유튜버계정 일일 연령대, 성별 분포
- demographics 컬럼 분포 합 100%

In [19]:
youtube_channel_demographics = youtube_dict['youtube_channel_demographics']

In [20]:
len(youtube_channel_demographics['youtube_user_id'].unique())
## 유튜버 계정 : 905개

905

In [21]:
# 필요 컬럼 추출
youtube_channel_demographics = youtube_channel_demographics[~youtube_channel_demographics['demographics'].isnull()].sort_values(['youtube_user_id','end_date']).reset_index(drop=True)
youtube_channel_demographics = youtube_channel_demographics[['youtube_user_id', 'end_date', 'demographics']]

In [22]:
# 최종 데이터셋 생성
youtube_channel_demographics = pd.concat([youtube_channel_demographics, pd.json_normalize(youtube_channel_demographics['demographics'])],axis=1)
youtube_channel_demographics = youtube_channel_demographics.drop('demographics',axis=1)
youtube_channel_demographics = youtube_channel_demographics[youtube_channel_demographics.columns[:16]]
youtube_channel_demographics = youtube_channel_demographics.sort_values(['youtube_user_id', 'end_date']).drop_duplicates().reset_index(drop=True)

### youtube_report

In [None]:
youtube_report = youtube_dict['youtube_report']

In [None]:
len(youtube_report['youtube_user_id'].unique())
## 유튜버 계정 : 194개

194

In [33]:
youtube_report = youtube_report[['youtube_user_id','contents','phone_number','request']].drop_duplicates().sort_values('youtube_user_id').reset_index(drop=True)

In [34]:
# 날짜형식 변경
youtube_report['date'] = youtube_report['contents'].str.split('\n').str[2].str.split(':').str[1].str.strip().str.split(' ').str[:2].apply(''.join)
youtube_report['date'] = youtube_report['date'].apply(lambda x : datetime.strptime(x, '%m월%d일').replace(year=2024))

In [35]:
# 필요 정보 추출
youtube_report['subscriber_count'] = youtube_report['contents'].str.split('\n').str[5]
youtube_report['views'] = youtube_report['contents'].str.split('\n').str[7]
youtube_report['watched_sum'] = youtube_report['contents'].str.split('\n').str[9]
youtube_report['watched_avg'] = youtube_report['contents'].str.split('\n').str[11]

youtube_report = youtube_report.drop('contents',axis=1)

In [36]:
# 최종 데이터셋 생성
youtube_report = youtube_report[['youtube_user_id', 'date', 'phone_number', 'subscriber_count', 'views', 'watched_sum', 'watched_avg', 'request']]
youtube_report = youtube_report.sort_values(['youtube_user_id','date']).drop_duplicates().reset_index(drop=True)

### youtube_users

In [23]:
youtube_users = youtube_dict['youtube_users']

In [24]:
len(youtube_users['channel_id'].unique())
## 유튜버 계정 : 883개

883

In [25]:
# 필요컬럼추출
youtube_users = youtube_users[['channel_id', 'channel_title', 'phone_num', 'report_user_id', 'statistics','published_at']] 
## published_at : 유튜브 가입일
## 'statistics' 컬럼의 'subscriberCount' 정보와 'subscriber_count' 컬럼 정보가 다름 --> 구독자 수
## 'channel_id' 컬럼, 'contentDetails' 컬럼의 'uploads' 같은 정보

In [26]:
# cast 하여 최종 데이터셋 생성
youtube_users = pd.concat([youtube_users, pd.json_normalize(youtube_users['statistics'])],axis=1)
youtube_users = youtube_users.drop(['statistics','hiddenSubscriberCount'],axis=1)
youtube_users = youtube_users.dropna(how = 'all')

# null 값 0으로 대체
youtube_users[['viewCount', 'subscriberCount', 'videoCount']] = youtube_users[['viewCount', 'subscriberCount', 'videoCount']].fillna(0)

youtube_users = youtube_users.sort_values('channel_id').drop_duplicates().reset_index(drop=True)

### youtube_channel_locations
- 채널 구독자 위치

In [27]:
youtube_channel_locations = youtube_dict['youtube_channel_locations']

In [28]:
len(youtube_channel_locations['youtube_user_id'].unique())
## 유튜버 계정 : 906개

906

In [29]:
# 필요컬럼추출
youtube_channel_locations = youtube_channel_locations[youtube_channel_locations['locations'].apply(lambda x: len(x) > 0)]
youtube_channel_locations = youtube_channel_locations[['youtube_user_id','end_date','locations']]

In [30]:
# melt, cast 하여 최종데이터셋 생성
youtube_channel_locations = youtube_channel_locations.explode(['locations']).reset_index(drop=True)

# cast 하여 최종 데이터셋 생성
youtube_channel_locations = pd.concat([youtube_channel_locations,pd.json_normalize(youtube_channel_locations['locations'])], axis=1)
youtube_channel_locations = youtube_channel_locations.drop(['locations','subscribersGained','subscribersLost'],axis=1)
youtube_channel_locations = youtube_channel_locations[youtube_channel_locations[youtube_channel_locations.columns[3:]].apply(sum,axis=1)!=0] ## 모두 0인 행 제거
youtube_channel_locations = youtube_channel_locations.drop_duplicates().sort_values(['youtube_user_id','end_date']).reset_index(drop=True)

### youtube_daily_channel_basics

In [31]:
youtube_daily_channel_basics = youtube_dict['youtube_daily_channel_basics']

In [32]:
len(youtube_daily_channel_basics['youtube_user_id'].unique())
## 유튜버 계정 : 906개

906

In [33]:
# 필요컬럼추출
youtube_daily_channel_basics = youtube_daily_channel_basics[youtube_daily_channel_basics['daily_basics'].apply(lambda x: len(x) > 0)]
youtube_daily_channel_basics = youtube_daily_channel_basics[['youtube_user_id','daily_basics']] ## 'end_date' --> 'day' 컬럼 활용
youtube_daily_channel_basics = youtube_daily_channel_basics.explode(['daily_basics']).reset_index(drop=True) ## melt

In [34]:
# daily_basics 컬럼 형식 맞추기 --> 딕셔너리 값들 cast 형태로 맞추기
youtube_daily_channel_basics_cast = []
for item in youtube_daily_channel_basics['daily_basics']:
    if isinstance(item, list):
        youtube_daily_channel_basics_cast.extend(item)
    else:
        youtube_daily_channel_basics_cast.append(item)

youtube_daily_channel_basics_cast = pd.json_normalize(youtube_daily_channel_basics_cast)

In [35]:
# melt, cast 하여 최종 데이터셋 생성
youtube_daily_channel_basics = pd.concat([youtube_daily_channel_basics, youtube_daily_channel_basics_cast],axis=1)
youtube_daily_channel_basics = youtube_daily_channel_basics.drop('daily_basics',axis=1)
youtube_daily_channel_basics = youtube_daily_channel_basics.fillna(0) ## null값 0으로 대체
youtube_daily_channel_basics = youtube_daily_channel_basics[youtube_daily_channel_basics[youtube_daily_channel_basics.columns[3:]].sum(axis=1)!=0] ## 모두 0인 행 제거
youtube_daily_channel_basics = youtube_daily_channel_basics.sort_values(['youtube_user_id','day']).reset_index(drop=True)

del youtube_daily_channel_basics_cast

### youtube_subscriber

In [36]:
youtube_subscriber = youtube_dict['youtube_subscriber']

In [37]:
len(youtube_subscriber['youtube_user_id'].unique())
## 유튜버 계정 : 994개

994

In [38]:
# 필요컬럼추출
youtube_subscriber = youtube_subscriber[['youtube_user_id','data_created_at','subscribers_gained','subscribers_lost','subscribers_count']]
youtube_subscriber = youtube_subscriber.drop_duplicates()

youtube_subscriber = youtube_subscriber[youtube_subscriber[youtube_subscriber.columns[2:]].sum(axis=1)!=0] ## 모두 0인 행 제거
youtube_subscriber = youtube_subscriber.sort_values(['youtube_user_id','data_created_at']).reset_index(drop=True)

In [39]:
# 불필요 데이터 삭제 - 메모리 최소화
del youtube_dict

In [11]:
# 데이터 필터링 기준
# 날짜 기준
first_date = datetime(2023, 3, 26)
last_date = datetime(2024, 5, 3)

In [41]:
# 최종데이터셋 기준 필요 계정 수 추출 - report, report_v2 제외
youtube_user_id_outer = list(set(list(youtube_revenue['youtube_user_id']) +
                                #  list(youtube_report_v2['youtube_user_id']) +
                                 list(youtube_channel_demographics['youtube_user_id']) +
                                #  list(youtube_report['youtube_user_id']) +
                                 # list(youtube_users['youtube_user_id']) +
                                 list(youtube_channel_locations['youtube_user_id']) +
                                 list(youtube_daily_channel_basics['youtube_user_id']) +
                                 list(youtube_subscriber['youtube_user_id']) +
                                 list(youtube_revenue['youtube_user_id'])))
print('youtube_user_id_outer', len(youtube_user_id_outer))
## youtube_user_id 모두 포함 912개

youtube_user_id_inner = list(set(youtube_revenue['youtube_user_id']) &
                            #  set(youtube_report_v2['youtube_user_id']) &
                             set(youtube_channel_demographics['youtube_user_id']) &
                            #  set(youtube_report['youtube_user_id']) &
                            #  set(youtube_users['youtube_user_id']) &
                             set(youtube_channel_locations['youtube_user_id']) &
                             set(youtube_daily_channel_basics['youtube_user_id']) &
                             set(youtube_subscriber['youtube_user_id']) &
                             set(youtube_revenue['youtube_user_id']))

print('youtube_user_id_inner',len(youtube_user_id_inner))
## youtube_user_id  공통 포함 250개

youtube_user_id_outer 912
youtube_user_id_inner 250


### 대용량 테이블 로드
- youtube_videos
- youtube_datas

#### youtube_videos
- 계정별 일일 콘텐츠 정보

In [18]:
collection = db['youtube_videos']  # 컬렉션 선택

# 파이프라인 정의
pipeline = [
    {
        "$match": {
            "youtube_user_id": {
                "$in": youtube_user_id_inner
            },
            "videos": {"$ne": []},
            "end_date": {
                "$gte": first_date,
                "$lte": last_date
            }
        }
    },
    {
        "$sort": {
            "youtube_user_id": 1,
            "end_date": 1
        }
    },
    {
        "$project": {
            "youtube_user_id": 1,
            "end_date": 1,
            "videos": 1
        }
    }
]

# 파이프라인 실행
result = list(collection.aggregate(pipeline, allowDiskUse=True))

# 결과를 Pandas 데이터프레임으로 변환
youtube_videos = pd.DataFrame(result)

MemoryError: 

In [None]:
len(youtube_videos['youtube_user_id'].unique())

In [62]:
# melt, cast하여 최종데이터셋 생성
# melt
youtube_videos = youtube_videos.explode(['videos']).reset_index(drop=True)

#cast
youtube_videos = pd.concat([youtube_videos, pd.json_normalize(youtube_videos['videos'])],axis=1)

youtube_videos = youtube_videos.drop(['_id','videos'],axis=1)
youtube_videos = youtube_videos.fillna(0) ## null값 0으로 대체
youtube_videos = youtube_videos[youtube_videos[youtube_videos.columns[4:]].sum(axis=1)!=0] ## 모두 0인 행 제거
youtube_videos = youtube_videos.drop_duplicates().sort_values(['youtube_user_id', 'end_date']).reset_index(drop=True)

In [None]:
# 내보내기
# youtube_videos.to_csv('C:/py_src/awake/data/youtube_videos.csv',encoding='utf-8-sig',index=False)

#### youtube_datas

In [42]:
collection = db['youtube_datas']  # 컬렉션 선택

# 파이프라인 정의
pipeline = [
    {
        "$match": {
            "youtube_user_id": {
                "$in": youtube_user_id_inner
            },
            # "end_date": {
            # "$gte": first_date,
            # "$lte": last_date
            # }
        }
    },
    {
        "$sort": {
            "youtube_user_id": 1,
            "data_created_at": 1
        }
    },
    {
        "$project": {
            'youtube_user_id' : 1, 
            'data_created_at' : 1, 
            'published_at' : 1, 
            'channel_id' : 1, 
            'channel_title' : 1, 
            'traffic_source_type' : 1, 
            'yt_search_keyword' : 1, 
            'subscribed_status' : 1,
            'subscriber_count' : 1,
            'video_count' : 1,
            'view_count' : 1,
            'comment_count' : 1,
            'like_count' : 1,
            'dislike_count' : 1,
            'estimated_minutes_watched' : 1,
            'average_view_duration' : 1,
            'status_code' : 1,
            'red_view_count' : 1
        }
    }
]

# 파이프라인 실행
result = list(collection.aggregate(pipeline, allowDiskUse=True))

# 결과를 Pandas 데이터프레임으로 변환
youtube_datas = pd.DataFrame(result)

In [43]:
len(youtube_datas['youtube_user_id'].unique())

249

In [44]:
# 컬럼 순서 정리
need_col = ['youtube_user_id', 'data_created_at', 'published_at', 'channel_id', 'channel_title', 'traffic_source_type', 'yt_search_keyword', 'subscribed_status','subscriber_count',
            'video_count','view_count','comment_count','like_count','dislike_count','estimated_minutes_watched','average_view_duration','status_code','red_view_count']
youtube_datas = youtube_datas[need_col]

In [45]:
# cast 하여 데이터셋 생성
youtube_datas = pd.concat([youtube_datas, pd.json_normalize(youtube_datas['traffic_source_type']), pd.json_normalize(youtube_datas['subscribed_status'])],axis=1)
youtube_datas = youtube_datas.drop(['traffic_source_type','subscribed_status'],axis=1)

youtube_datas[youtube_datas.columns[6:]] = youtube_datas[youtube_datas.columns[6:]].fillna(0) # null값 0으로 대체
youtube_datas = youtube_datas[youtube_datas[youtube_datas.columns[6:]].sum(axis=1)!=0] ## 모두 0인 행 제거

youtube_datas = youtube_datas.sort_values(['youtube_user_id','data_created_at']).reset_index(drop=True)

In [46]:
# 날짜 기준 필터링
opt_date = (youtube_datas['data_created_at'] >= first_date) & (youtube_datas['data_created_at'] <= last_date)
youtube_datas = youtube_datas[opt_date].sort_values(['youtube_user_id', 'data_created_at']).reset_index(drop=True)
youtube_datas

Unnamed: 0,youtube_user_id,data_created_at,published_at,channel_id,channel_title,yt_search_keyword,subscriber_count,video_count,view_count,comment_count,...,YT_PLAYLIST_PAGE,ANNOTATION,NOTIFICATION,PRODUCT_PAGE,SOUND_PAGE,HASHTAGS,SHORTS,ADVERTISING,UNSUBSCRIBED,SUBSCRIBED
0,627cb611aa6f212355e0b617,2023-03-26,NaT,UCxuEudcvmg4zMQhW7isWi-w,,"{'성팩': 40, '빙수빙': 28, '원칩챌린지 먹방': 14, '수빙수': 1...",0.0,0.0,4235,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3356.0,879.0
1,627cb611aa6f212355e0b617,2023-03-27,NaT,UCxuEudcvmg4zMQhW7isWi-w,,"{'원칩챌린지 먹방': 19, '빙수빙': 18, '성팩': 18, 'one chi...",0.0,0.0,3390,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2714.0,676.0
2,627cb611aa6f212355e0b617,2023-03-28,NaT,UCxuEudcvmg4zMQhW7isWi-w,,"{'수빙수': 18, '빙수빙': 17, '성팩': 17, '원칩챌린지 먹방': 1...",0.0,0.0,3795,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3060.0,735.0
3,627cb611aa6f212355e0b617,2023-03-29,NaT,UCxuEudcvmg4zMQhW7isWi-w,,"{'성팩': 45, '빙수빙': 23, '수빙수': 21, '원칩': 13, '원칩...",0.0,0.0,4322,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3504.0,818.0
4,627cb611aa6f212355e0b617,2023-03-30,NaT,UCxuEudcvmg4zMQhW7isWi-w,,"{'원칩챌린지 먹방': 29, '빙수빙': 28, '원칩': 22, '성팩': 16...",0.0,0.0,4573,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3829.0,744.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82539,66230ee6d8da110bb0744b2d,2024-04-29,NaT,UCIPg5mOIS-KlyxYei_EI50A,,"{'김승연 회장': 290, '쇼츠': 164, '무타구치 렌야': 155, '나훈...",0.0,0.0,139021,26,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,132857.0,6164.0
82540,66230ee6d8da110bb0744b2d,2024-04-30,NaT,UCIPg5mOIS-KlyxYei_EI50A,,"{'쇼츠': 220, '무타구치 렌야': 178, '김승연 회장': 176, '장미...",0.0,0.0,171412,44,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165145.0,6267.0
82541,66230ee6d8da110bb0744b2d,2024-05-01,NaT,UCIPg5mOIS-KlyxYei_EI50A,,"{'나훈아': 152, '나훈아콘서트': 145, '쇼츠': 136, '김승연 회장...",0.0,0.0,150360,46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,145112.0,5248.0
82542,66230ee6d8da110bb0744b2d,2024-05-02,NaT,UCIPg5mOIS-KlyxYei_EI50A,,"{'나훈아': 358, '쇼츠': 126, '김승연 회장': 109, '무타구치 렌...",0.0,0.0,143511,28,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,137492.0,6019.0


In [None]:
# 내보내기
# df.to_csv('')