# Demo

## Setup

In [53]:
import IPython
import IPython.display as disp

IPython.core.interactiveshell.\
    InteractiveShell.ast_node_interactivity = "all"

def clear():
    disp.clear_output(wait = True)

def output(disp_os):
    for d in disp_os:
        disp.display(d)

def results_report(disp_os):
    output([
            disp.Markdown('---'), 
            disp.Markdown('# Results')
        ] + disp_os + 
        [disp.Markdown('---')]
    )

In [54]:
import sys

!{sys.executable} -m pip install --quiet --user pandas
!{sys.executable} -m pip install --quiet --user -r requirements.txt

In [55]:
from dcollect.plugins import fasthttp

from dcollect import api_tiktok as tiktok
from dcollect import api_youtube as youtube
from dcollect import api_youtubei as youtubei

import pandas as pd


modules = {'http': fasthttp()}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
}

def df_report_from_json(items):
    df = pd.json_normalize(items, sep = '.')

    results_report([
        disp.Markdown('## Data Preview'),
        df.head(),
        disp.Markdown('## Stats'),
        df.describe()
    ])

## Data Collection

### YouTube (United States)

Initial setup. Be sure to have your API key ready. For details on how to obtain an API key, read [YouTube Data API Overview, Introduction: Before you start](https://developers.google.com/youtube/v3/getting-started#before-you-start).

In [56]:
count = 10
# This key is for testing ONLY. DO NOT release to the public!
api_key_testing = 'AIzaSyBKsF33Y1McGDdBWemcfcTbVyJu23XDNIk'
api_key = api_key_testing or input('YouTube Data API Key: ')

Create a YouTube Web API object.

In [57]:
youtube_o = youtube.api(
    modules = modules,
    headers = headers,
    key = api_key
)

#### Categories

In [58]:
items = youtube_o.categories()

df_report_from_json(list(items))

---

# Results

## Data Preview

Unnamed: 0,id,title
0,1,Film & Animation
1,2,Autos & Vehicles
2,10,Music
3,15,Pets & Animals
4,17,Sports


## Stats

Unnamed: 0,id,title
count,32,32
unique,32,31
top,42,Comedy
freq,1,2


---

#### Trending

In [59]:
items = youtube_o.video.trending(
    count = count
)

df_report_from_json(list(items))


---

# Results

## Data Preview

Unnamed: 0,id,title,description,time,length,tags,creator.id,stats.like,stats.dislike,stats.comment,stats.view,video.quality,category.id
0,Fbr55An8VPI,HIGHLIGHTS | Canelo Alvarez vs. Avni Yildirim,"February 27th, 2021 -- Canelo Alvarez vs. Avni...",2021-02-28 04:39:19+00:00,0 days 00:08:06,"[dazn, boxing, highlights, full fight, ko, kno...",UCurvRE5fGcdUgCYWgh-BDsg,39239,3892,7456.0,2933849,HD,17
1,p0GUZVU_TTU,Welcome To Our New House,Subscribe: http://bit.ly/SubToRoman\r\nWelcome...,2021-02-27 19:35:40+00:00,0 days 00:11:49,"[Roman, Atwood, The Atwoods, Brittney, Family,...",UC-SV8-bUJfXjrRMnp7F8Wzw,268684,2949,34718.0,3140799,HD,22
2,l-ETi70TbxI,I bought $1000 worth of Bootleg merch,Buy dat merch. But not dat merch. I'm upset an...,2021-02-28 18:56:39+00:00,0 days 00:06:23,"[Bootleg merch, merch, Animation, Boot, leg, s...",UCvUmwreRrbxeR1mbmojj8fg,58807,628,7168.0,447587,HD,23
3,n2e3R3Fbl3w,I am NOT Ferran's Father...(Truth Revealed) | ...,I am NOT Ferran's Father...(Truth Revealed) Al...,2021-02-28 16:33:25+00:00,0 days 00:14:48,"[The Royalty Family, Royalty Family, Andrea Es...",UCja7QUMRG9AD8X2F_vXFb9A,181776,5262,,2118303,HD,22
4,vAbdZemOkiA,The Problem with Cheetahs,Check out The Secret Life of Cheetahs on Curio...,2021-02-28 15:59:07+00:00,0 days 00:08:17,,UCHsRtomD4twRf5WVHHk-cMw,67013,395,5694.0,734490,HD,27


## Stats

Unnamed: 0,length,stats.like,stats.dislike,stats.comment,stats.view
count,10,10.0,10.0,9.0,10.0
mean,0 days 00:14:24,100992.6,2148.2,9911.555556,1646355.0
std,0 days 00:10:18.879273813,74507.24239,1690.669479,9567.336922,1010435.0
min,0 days 00:04:05,39073.0,395.0,2797.0,447587.0
25%,0 days 00:08:03,51810.25,485.5,5694.0,700356.8
50%,0 days 00:10:03,69647.5,2186.0,7456.0,1782242.0
75%,0 days 00:18:48,126513.5,3081.0,8982.0,2293506.0
max,0 days 00:36:56,268684.0,5262.0,34718.0,3140799.0


---

#### Info

In [60]:
items = youtube_o.video.info(
    id = ['tH2tKigOPBU', '_uwNDiU04zE']
)

df_report_from_json(list(items))

---

# Results

## Data Preview

Unnamed: 0,id,title,description,time,length,tags,creator.id,stats.like,stats.dislike,stats.comment,stats.view,video.quality,category.id
0,tH2tKigOPBU,Mars Rover Landing Overview- NOW SAFE ON MARS!!!,This will be cooler than the Super Bowl. Thank...,2021-02-15 03:35:30+00:00,0 days 00:17:18,,UCY1kMZp36IQSyNx_9h4mpCg,495462,4809,35315,9247149,HD,28
1,_uwNDiU04zE,Press Subscribe For....,Subscribe and I'll Donate $0.10 To Charity\nPl...,2021-02-13 21:00:01+00:00,0 days 00:11:59,,UCX6OQ3DkcsbYNE6H8uQQuVA,1978493,15345,145168,24782009,HD,24


## Stats

Unnamed: 0,length,stats.like,stats.dislike,stats.comment,stats.view
count,2,2.0,2.0,2.0,2.0
mean,0 days 00:14:38.500000,1236978.0,10077.0,90241.5,17014580.0
std,0 days 00:03:45.567063198,1048661.0,7450.077047,77677.801234,10984800.0
min,0 days 00:11:59,495462.0,4809.0,35315.0,9247149.0
25%,0 days 00:13:18.750000,866219.8,7443.0,62778.25,13130860.0
50%,0 days 00:14:38.500000,1236978.0,10077.0,90241.5,17014580.0
75%,0 days 00:15:58.250000,1607735.0,12711.0,117704.75,20898290.0
max,0 days 00:17:18,1978493.0,15345.0,145168.0,24782010.0


---

#### Search

In [61]:
items = youtube_o.video.search(
    count = count,
    keyword = ''
)

df_report_from_json(list(items))

---

# Results

## Data Preview

Unnamed: 0,id,title,description,time,tags,creator.id
0,hhwwCrm4iCw,"NBA Top 10 Plays Of The Night | February 20, 2021",Check out the top 10 plays of the night from F...,2021-02-21 07:07:57+00:00,,UCWJ2lWNubArHWmf3FIHbfcQ
1,F4plEk_iufY,UFC Vegas 19: Derrick Lewis - &quot;I Like The...,UFC heavyweight contender Derrick Lewis talks ...,2021-02-21 04:53:39+00:00,,UCvgfXK4nTYKudb0rFR6noLA
2,PL3_SgjHhQwwL5deWscrUnGVxPtPjC2e8Q,nutella Ice Cream Rolls,nutella Ice Cream Rolls There are different wa...,2018-04-08 11:12:42+00:00,,UCAon55hNT6ESbdQ5y1hRWTA
3,tHJLvOiROxI,BTS (방탄소년단) &#39;Life Goes On&#39; (Video Call...,Connect with BTS: https://ibighit.com/bts http...,2021-02-20 10:20:05+00:00,,UCLkAepWjdylmXSltofFvsYQ
4,VD88tRnIigI,Arsenal v. Manchester City | PREMIER LEAGUE HI...,Want more Premier League? Check out Peacock Pr...,2021-02-21 19:13:19+00:00,,UCqZQlzSHbVJrwrn5XvzrzcA


## Stats

Unnamed: 0,id,title,description,time,tags,creator.id
count,10,10,10,10,0.0,10
unique,10,10,10,10,0.0,10
top,W8jfBsKpYnI,Hawks Give Up Wide Open Dunk Game Winner vs Ca...,A polêmica da rodada foi a expulsão do lateral...,2021-02-21 04:53:39+00:00,,UCnzbVkknmqhRqUKg7RDTDeg
freq,1,1,1,1,,1
first,,,,2018-04-08 11:12:42+00:00,,
last,,,,2021-02-24 13:38:11+00:00,,


---

#### Channels

In [62]:
items = youtube_o.channel.info(
    id = ['UC8Zo5A8qICfNAzVGDY_VT7w', 'UC0VOyT2OCBKdQhF3BAbZ-1g']
)

df_report_from_json(list(items))

---

# Results

## Data Preview

Unnamed: 0,id,title,description,time,stats.follower,stats.view,stats.post
0,UC8Zo5A8qICfNAzVGDY_VT7w,MALINDA,Hello friends! I am a singer/songwriter/actor...,2012-03-01 18:12:44+00:00,310000,20314751,76
1,UC0VOyT2OCBKdQhF3BAbZ-1g,ArianaGrandeVevo,Listen & download positions: https://arianagra...,2010-10-21 23:38:49+00:00,18100000,16203597447,134


## Stats

Unnamed: 0,stats.follower,stats.view,stats.post
count,2.0,2.0,2.0
mean,9205000.0,8111956000.0,105.0
std,12579430.0,11443310000.0,41.012193
min,310000.0,20314750.0,76.0
25%,4757500.0,4066135000.0,90.5
50%,9205000.0,8111956000.0,105.0
75%,13652500.0,12157780000.0,119.5
max,18100000.0,16203600000.0,134.0


---

## YouTube Internals

In [63]:
youtubei_o = youtubei.api(
    modules = modules,
    headers = headers
)

### Video Ads

In [64]:
items = youtubei_o.ad.placements(
    id = ['ur560pZKRfg', '_uwNDiU04zE']
)

res = []

for item in items:
    has_ad = False
    has_ad_at_beginning = False

    if not item == None:
        has_ad = True
        for ad in item['ads']:
            if ad['kind'] == youtubei.api.ad.kinds.START:
                has_ad_at_beginning = True
                break
    
    res.append({
        'id': item['id'],
        'has_ad': has_ad,
        'has_ad_at_beginning': has_ad_at_beginning
    })
    
df_report_from_json(res)

---

# Results

## Data Preview

Unnamed: 0,id,has_ad,has_ad_at_beginning
0,_uwNDiU04zE,True,True
1,ur560pZKRfg,True,True


## Stats

Unnamed: 0,id,has_ad,has_ad_at_beginning
count,2,2,2
unique,2,1,1
top,_uwNDiU04zE,True,True
freq,1,2,2


---

### TikTok (United States)

In [65]:
count = 10

tiktok_o = tiktok.api(
    modules = modules,
    headers = headers
)

#### Trending

In [66]:
items = tiktok_o.video.trending(
    count = count
)

df_report_from_json(list(items))

---

# Results

## Data Preview

Unnamed: 0,id,description,time,length,tags,creator.id,creator.title,creator.description,creator.stats.follower,creator.stats.following,creator.stats.like,creator.stats.view,creator.stats.post,stats.like,stats.comment,stats.view,stats.share,video.quality
0,6925559746128907526,The house on the left is Owned by #IceDerulo. ...,2021-02-04 15:49:07,0 days 00:00:17,"[, icederulo]",21609287,Jason Derulo,“Lifestyle” OUT NOW\nBooking Frank@23-manageme...,43500000,21,1914,992700000,651,2200000,12800,19700000,12000,HD
1,6932953172717686022,Reply to @therussiansoldier1 indeed,2021-02-24 13:59:22,0 days 00:00:52,[],6770811947673404422,Spice King,thank you for 10.1M ✨\nofficialspiceking@gmail...,10100000,666,18000,304000000,336,1600000,16200,8700000,2382,HD
2,6925850612991151365,The moment your life flashes before your👀,2021-02-05 10:37:47,0 days 00:00:51,,6813560925275128838,Kris HC,Daily Subpar Content\n🇨🇦\nKris@henrytalents.co...,23900000,1350,14400,964100000,992,8100000,51000,32700000,96200,HD
3,6904810145583287557,TY @tiredmamateacher !!,2020-12-10 17:50:07,0 days 00:00:13,[],6604157541621252101,Moonpie,Moonpie Starbox\n👀👇,4000000,1610,5402,46600000,374,1600000,19600,21500000,184500,HD
4,6909894241112050949,When mom gets mad at you for being a burrito #...,2020-12-24 10:38:57,0 days 00:00:20,"[mom, burrito, public, walmart, immaburrito]",6757815847807566853,AMYYWOAHH,IMMA BURRITO \nDANG IT !!\nbusiness inquires->...,11200000,18,3788,482700000,1019,930100,19100,6300000,3647,HD


## Stats

Unnamed: 0,length,creator.stats.follower,creator.stats.following,creator.stats.like,creator.stats.view,creator.stats.post,stats.like,stats.comment,stats.view,stats.share
count,10,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,0 days 00:00:41.700000,14221970.0,536.4,6828.0,376490000.0,602.3,2796630.0,25271.7,16870000.0,111742.0
std,0 days 00:00:19.020748904,12738620.0,563.796102,6741.991084,352253500.0,564.960382,2409834.0,14305.809543,9620701.0,174217.799179
min,0 days 00:00:13,419700.0,18.0,181.0,4600000.0,31.0,736200.0,5417.0,3200000.0,1924.0
25%,0 days 00:00:23,5525000.0,103.0,2008.5,105725000.0,162.75,1600000.0,16925.0,10175000.0,4802.0
50%,0 days 00:00:51.500000,11350000.0,400.5,4379.5,306150000.0,502.5,1900000.0,21550.0,15850000.0,17900.0
75%,0 days 00:00:57.750000,19250000.0,670.5,12150.5,446750000.0,906.75,2500000.0,30800.0,21050000.0,162425.0
max,0 days 00:00:59,43500000.0,1610.0,18000.0,992700000.0,1841.0,8100000.0,51000.0,32700000.0,544100.0


---

## !! EXAMPLE BREAK: Data wrangling

### Videos

Coming soon