In [66]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from sklearn.metrics import roc_auc_score, accuracy_score
from datetime import datetime, timedelta

In [13]:
channel = pd.read_csv('https://raw.githubusercontent.com/Datamanim/datarepo/main/youtube/channelInfo.csv')
video = pd.read_csv('https://raw.githubusercontent.com/Datamanim/datarepo/main/youtube/videoInfo.csv')

In [15]:
video.head()

Unnamed: 0,videopk,viewcnt,likecnt,dislikecnt,favoritecnt,cmcnt,ct,videoname
0,c5JQp6xafqc,1667010,30474,706,0,6587,2021-10-10 15:20:03,공범 EP1
1,c5JQp6xafqc,1669089,30495,707,0,6589,2021-10-10 15:30:03,공범 EP1
2,c5JQp6xafqc,1674759,30522,711,0,6596,2021-10-10 15:40:02,공범 EP1
3,c5JQp6xafqc,1677026,30555,712,0,6604,2021-10-10 15:50:03,공범 EP1
4,c5JQp6xafqc,1681824,30585,713,0,6600,2021-10-10 16:00:03,공범 EP1


In [21]:
for df in [channel, video]:
    df.assign(ct=pd.to_datetime(df['ct']), inplace=True)

In [25]:
channel = channel.assign(ct=pd.to_datetime(channel['ct']))
video = video.assign(ct=pd.to_datetime(video['ct']))

In [24]:
video['videoname'].value_counts()

 공범 EP1    3492
 공범 EP2    3204
 공범 EP3    2568
 공범 EP4    2280
 공범 EP5    1562
 공범 EP6    1274
 공범 EP7     555
 공범 EP8     266
Name: videoname, dtype: int64

In [28]:
(video
.sort_values(by=['videoname', 'ct'], ascending=[True, False])
.drop_duplicates(subset='videoname')
)

Unnamed: 0,videopk,viewcnt,likecnt,dislikecnt,favoritecnt,cmcnt,ct,videoname
3491,c5JQp6xafqc,3180532,41043,1113,0,7674,2021-11-01 15:30:03,공범 EP1
6695,23QQ1ru9YQg,2199328,25566,763,0,9171,2021-11-01 15:30:03,공범 EP2
9263,GygoAk1hDU0,1671294,22719,508,0,12499,2021-11-01 15:30:03,공범 EP3
11543,bK0_3Vbfvs8,1818493,22703,408,0,16949,2021-11-01 15:30:03,공범 EP4
13105,hv7FBjskAu0,1503435,19926,421,0,12434,2021-11-01 15:30:04,공범 EP5
14934,AxGKHmXyuAE,1750222,24494,389,0,13025,2021-11-01 15:30:04,공범 EP6
13660,QUjQbgj5IXM,1630200,29335,400,0,18247,2021-11-01 15:30:05,공범 EP7
15200,yZt-h-KcmUE,1289088,25616,3621,0,31663,2021-11-01 15:30:05,공범 EP8


In [36]:
(channel[channel['ct'] >= '2021-10-03 00:00:00']
.sort_values(by='ct', ascending=True)
.drop_duplicates(subset='channelname')
.loc[:, ['channelname', 'subcnt']]
.set_index('channelname')
)

Unnamed: 0_level_0,subcnt
channelname,Unnamed: 1_level_1
논리왕 전기,922000
츄정ChuJeong,322000
야전삽짱재,257000
김농밀의 농밀한 삶,7520
와글와글 WagleWagle,55000
릴펄 Lilpearl,10100
형사!탐정되다,14900
조나단,215000
꽈뚜룹,1330000
Balming Tiger,54300


In [49]:
subcnt_first = (channel[channel['ct'].between('2021-10-03 03:00:00', '2021-11-01 15:00:00')]
.sort_values(by='ct', ascending=True)
.drop_duplicates('channelname')
.loc[:, ['channelname', 'subcnt']]
)
subcnt_last = (channel[channel['ct'].between('2021-10-03 03:00:00', '2021-11-01 15:00:00')]
.sort_values(by='ct', ascending=False)
.drop_duplicates('channelname')
.loc[:, ['channelname', 'subcnt']]
)
(subcnt_first
.merge(subcnt_last, on='channelname', suffixes=['first', 'last'], how='left')
.assign(diff=lambda df: df['subcntlast'] - df['subcntfirst'])
.loc[:, ['channelname', 'diff']]
)

Unnamed: 0,channelname,diff
0,논리왕 전기,-11000
1,츄정ChuJeong,1000
2,야전삽짱재,11000
3,김농밀의 농밀한 삶,1540
4,와글와글 WagleWagle,0
5,릴펄 Lilpearl,11000
6,형사!탐정되다,10300
7,조나단,12000
8,꽈뚜룹,70000
9,Balming Tiger,2500


In [74]:
ep_one = (video[video['videoname'] == ' 공범 EP1']
.sort_values(by='ct')
.reset_index(drop=True)
)
ep_one[(ep_one.ct.diff(1) >= timedelta(minutes=20)) | (ep_one.ct.diff(1) <= timedelta(minutes=5))]
ep_one.loc[[720, 721, 722, 723, 1635, 1636, 1637], :]

Unnamed: 0,videopk,viewcnt,likecnt,dislikecnt,favoritecnt,cmcnt,ct,videoname
720,c5JQp6xafqc,2153676,34104,830,0,7120,2021-10-12 19:20:03,공범 EP1
721,c5JQp6xafqc,2228250,34559,849,0,7191,2021-10-13 09:41:37,공범 EP1
722,c5JQp6xafqc,2228250,34559,849,0,7191,2021-10-13 09:41:37,공범 EP1
723,c5JQp6xafqc,2228669,34566,849,0,7194,2021-10-13 09:50:03,공범 EP1
1635,c5JQp6xafqc,2706691,37893,987,0,7410,2021-10-19 17:50:02,공범 EP1
1636,c5JQp6xafqc,2707933,37901,988,0,7410,2021-10-19 18:20:03,공범 EP1
1637,c5JQp6xafqc,2708448,37903,988,0,7410,2021-10-19 18:30:03,공범 EP1


In [88]:
(video
.sort_values(by='ct')
.drop_duplicates(subset='videoname')
.loc[:, ['videoname', 'ct']]
#.assign(ct=lambda df: df['ct'].dt.strftime('%Y-%m-%d'))
.assign(ct=lambda df: df['ct'].dt.date)
)

Unnamed: 0,videoname,ct
10,공범 EP1,2021-10-07
3496,공범 EP2,2021-10-09
6696,공범 EP3,2021-10-14
9264,공범 EP4,2021-10-16
11544,공범 EP5,2021-10-21
13661,공범 EP6,2021-10-23
13106,공범 EP7,2021-10-28
14935,공범 EP8,2021-10-30


In [105]:
ct_sort = video.sort_values(by='ct')
(ct_sort
[(ct_sort['ct'].dt.hour == 21) & (ct_sort['ct'].dt.minute == 0)]
.drop_duplicates('videoname')
.sort_values('viewcnt', ascending=False)
.loc[:, ['viewcnt', 'ct', 'videoname']]
)

Unnamed: 0,viewcnt,ct,videoname
14945,264029,2021-10-30 21:00:08,공범 EP8
13117,252032,2021-10-28 21:00:04,공범 EP7
9275,217674,2021-10-16 21:00:04,공범 EP4
13672,213899,2021-10-23 21:00:06,공범 EP6
11555,201179,2021-10-21 21:00:04,공범 EP5
3507,148144,2021-10-09 21:00:03,공범 EP2
6707,147183,2021-10-14 21:00:04,공범 EP3
21,117340,2021-10-07 21:00:03,공범 EP1


In [110]:
(video
.sort_values(by='ct')
.drop_duplicates(subset='videoname', keep='last')
.assign(ratio=video['dislikecnt'].div(video['likecnt']))
.sort_values(by='ratio')
.loc[:, ['videoname', 'ratio']]
)

Unnamed: 0,videoname,ratio
13660,공범 EP7,0.013636
14934,공범 EP6,0.015881
11543,공범 EP4,0.017971
13105,공범 EP5,0.021128
9263,공범 EP3,0.02236
3491,공범 EP1,0.027118
6695,공범 EP2,0.029844
15200,공범 EP8,0.141357


In [120]:
(video
[video['ct'].between('2021-11-01 00:00:00', '2021-11-01 15:00:00')]
.sort_values(by='ct')
.groupby('videoname')
.apply(lambda df: df['viewcnt'].iloc[-1] - df['viewcnt'].iloc[0])
.to_frame(name='viewcnt')
)

Unnamed: 0_level_0,viewcnt
videoname,Unnamed: 1_level_1
공범 EP1,13298
공범 EP2,10300
공범 EP3,9927
공범 EP4,9824
공범 EP5,10824
공범 EP6,14141
공범 EP7,26949
공범 EP8,89147


In [125]:
video.loc[list(set(video.index) - set(video.drop_duplicates().index)), ['ct', 'videoname']]

Unnamed: 0,ct,videoname
722,2021-10-13 09:41:37,공범 EP1
3927,2021-10-13 09:41:37,공범 EP2
