In [21]:
import json
import os
import re
import time
import uuid
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, List

import matplotlib.pyplot as plt
import seaborn as sns
import requests
from tqdm.notebook import tqdm

In [22]:
STREAMER_IDS = {
    "northernlion": "14371185",
    "singsing": "21390470",
    "rinbanana": "434464251"
}
STREAMER = "northernlion"

In [23]:
CLIENT_ID = "qeyx02kvx8uffv4std0did44iyoj35"
CLIENT_SECRET = "s9jf674kb2ekkqqyv1nrs8uftnxuum"
STREAMER_ID = STREAMER_IDS[STREAMER] # https://www.twitch.tv/northernlion

In [24]:
@dataclass
class Comment:
    commenter: str
    createdAt: datetime
    contentOffsetSeconds: int
    message: str
    video_id: str
    game: str

In [25]:
def get_access_token():
    app_access_token_response = requests.post(
        "https://id.twitch.tv/oauth2/token",
        params={
            "client_id": CLIENT_ID,
            "client_secret": CLIENT_SECRET,
            "grant_type": "client_credentials",
        }
    )

    expires_in = app_access_token_response.json()["expires_in"]
    expiration_time_ = time.time() + expires_in
    return app_access_token_response.json()["access_token"], expiration_time_

app_access_token, expiration_time = get_access_token()

In [26]:
base_api_url = "https://api.twitch.tv/helix" 
def twitch_api_get(url, params=None):
    global app_access_token, expiration_time
    if time.time() > expiration_time:
        print("Refreshing token")
        app_access_token, expiration_time = get_access_token()
    full_url = os.path.join(base_api_url, url)
    return requests.get(
        full_url,
        params=params,
        headers={
            "Client-Id": CLIENT_ID,
            "Authorization": f"Bearer {app_access_token}",
        }
    )

In [27]:
videos = twitch_api_get("videos", params={"user_id": STREAMER_ID, "type": "archive"})

In [28]:
# util functions

def get_datetime(date_string):
    return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ"
                             if "." in date_string
                             else "%Y-%m-%dT%H:%M:%SZ")

def get_vod_link_from_comment(comment: Comment, offset=0) -> str:
    return f"https://twitch.tv/videos/{comment.video_id}?t={comment.contentOffsetSeconds + offset}s"

headers = {
    "Client-ID": "kd1unb4b3q4t58fwlpcbzcbnm76a8fp",
    "Accept": "application/vnd.twitchtv.v5+json",
    'X-Device-Id': uuid.uuid4().hex,
}

def gql(query: str):
    response = requests.post("https://gql.twitch.tv/gql", json={"query": query}, headers=headers)
    if response.status_code != 200 or "errors" in response.json():
        print(response.json())
        print(response.request.body)
        raise RuntimeError("GQL request failed")
    return response

In [29]:
def get_video_chapters(video_id: str) -> list:
    has_next_page = True
    cursor = None
    chapters = []
    while has_next_page:
        query = f"""
            query {{
                video(id: "{video_id}") {{
                    moments(momentRequestType: VIDEO_CHAPTER_MARKERS {f', after: "{cursor}"' if cursor else ''}) {{
                        edges {{
                            cursor
                            node {{
                                description
                                positionMilliseconds
                            }}
                        }}
                        pageInfo {{
                            hasNextPage
                        }}
                    }}
                }}
            }}
        """
        response = gql(query).json()['data']
        print(response)
        cursor = response['video']['moments']['edges'][-1]['cursor'] if len(response['video']['moments']['edges']) > 0 else None
        has_next_page = response['video']['moments']['pageInfo']['hasNextPage']
        chapters.extend(e['node'] for e in response['video']['moments']['edges'])
    return chapters

In [30]:
DURATION_RE = re.compile(r"(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?")

result: Dict[str, List[Comment]] = defaultdict(list)
chapters: Dict[str, List] = defaultdict(list)
data_dir = f"data-{STREAMER}"
os.makedirs(data_dir, exist_ok=True)
for video in tqdm(videos.json()["data"]):
    res = []
    has_next_page = True
    cursor = None
    video_id = video["id"]
    chapters[video_id] = get_video_chapters(video_id)
    duration_match = DURATION_RE.match(video["duration"])
    duration = sum(int(x or 0) * 60 ** i for i, x in enumerate(reversed(duration_match.groups())))
    
    with tqdm(total=duration, unit="s", desc=video['created_at']) as video_bar:
        if os.path.isfile(f"{data_dir}/{video['created_at']}.json"):
            with open(os.path.join(data_dir, f"{video['created_at']}.json")) as f:
                objs = json.load(f)
                result[video["created_at"]] = [
                    Comment(comment["commenter"],
                            datetime.fromisoformat(comment["createdAt"]),
                            comment["contentOffsetSeconds"],
                            comment["message"],
                            comment["video_id"],
                            comment["game"] if "game" in comment else None
                    )
                    for comment in objs
                ]
                video_bar.update(video_bar.total)
                continue
        while has_next_page:
            query = f"""
                query {{
                    video(id: "{video_id}") {{
                        comments{f'(after: "{cursor}")' if cursor else ''} {{
                            edges {{
                                cursor
                                node {{
                                    commenter {{
                                        displayName
                                        login
                                    }}
                                    createdAt
                                    contentOffsetSeconds
                                    message {{
                                        fragments {{
                                            text
                                        }}
                                    }}
                                }}
                            }}
                            pageInfo {{
                                hasNextPage
                            }}
                        }}
                    }}
                }}
            """
            response = gql(query)
            comments = response.json()["data"]["video"]["comments"]
            has_next_page = comments["pageInfo"]["hasNextPage"]
            if len(comments["edges"]) == 0:
                print(response.json())
                raise IOError("Bad.")
            for comment in comments["edges"]:
                if len(comment["node"]["message"]["fragments"]) == 0:
                    print(comment)
                    continue
                res.append(Comment(
                    commenter=comment["node"]["commenter"]["displayName"] if comment["node"]["commenter"] else "UNKNOWN",
                    createdAt=get_datetime(comment["node"]["createdAt"]),
                    contentOffsetSeconds=comment["node"]["contentOffsetSeconds"],
                    message=comment["node"]["message"]["fragments"][0]["text"],
                    video_id=video_id,
                    game=max((c for c in chapters[video_id] if c["positionMilliseconds"] // 1000 <= comment["node"]["contentOffsetSeconds"]),
                             key=lambda c: c["positionMilliseconds"], default={"description": "UNKNOWN"})["description"]
                ))
            cursor = comments["edges"][-1]["cursor"]
            video_bar.update(comments["edges"][-1]["node"]["contentOffsetSeconds"] - video_bar.n)
        result[video["created_at"]] = res
        with open(f"{data_dir}/{video['created_at']}.json", "w") as f:
            json.dump(res, f, indent=2, default=lambda o: o.isoformat() if isinstance(o, datetime) else o.__dict__)

        

  0%|          | 0/20 [00:00<?, ?it/s]

{'video': {'moments': {'edges': [], 'pageInfo': {'hasNextPage': False}}}}


2023-09-13T16:31:03Z:   0%|          | 0/161 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Super Auto Pets', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'im a big boy | !prime !docket', 'positionMilliseconds': 12389000}}, {'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 12904000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-12T16:15:21Z:   0%|          | 0/17585 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Super Auto Pets', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'victim mentality | !prime !docket', 'positionMilliseconds': 6953000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-11T17:04:09Z:   0%|          | 0/17890 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [], 'pageInfo': {'hasNextPage': False}}}}


2023-09-07T18:28:54Z:   0%|          | 0/9640 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [], 'pageInfo': {'hasNextPage': False}}}}


2023-09-07T16:10:09Z:   0%|          | 0/3227 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Super Auto Pets', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'FULL LOBBY? | !prime !docket', 'positionMilliseconds': 11141000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-06T16:15:20Z:   0%|          | 0/17436 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 9316000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-05T17:02:44Z:   0%|          | 0/14496 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Armored Core VI: Fires of Rubicon', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 10535000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-04T16:08:50Z:   0%|          | 0/17775 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Armored Core VI: Fires of Rubicon', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'Jackbox Party Packs', 'positionMilliseconds': 9763000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-09-01T16:22:19Z:   0%|          | 0/17217 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Armored Core VI: Fires of Rubicon', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 10563000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-31T16:05:19Z:   0%|          | 0/18344 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'London 2012: The Official Video Game', 'positionMilliseconds': 10592000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-30T16:06:01Z:   0%|          | 0/18471 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [], 'pageInfo': {'hasNextPage': False}}}}


2023-08-29T17:58:34Z:   0%|          | 0/11712 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'Sporcle', 'positionMilliseconds': 11366000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-28T16:16:08Z:   0%|          | 0/17559 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'F1 23', 'positionMilliseconds': 4210000}}, {'cursor': '', 'node': {'description': 'jackbox fridays | !F123 !prime !docket', 'positionMilliseconds': 11578000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-25T16:17:43Z:   0%|          | 0/17665 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'hit me | !snap !docket !prime 🐢', 'positionMilliseconds': 5866000}}, {'cursor': '', 'node': {'description': '#sponsored Marvel Snap', 'positionMilliseconds': 8548000}}, {'cursor': '', 'node': {'description': "he's snapping and sapping | !prime !docket", 'positionMilliseconds': 13109000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-24T16:20:32Z:   0%|          | 0/17014 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'the only self-aware gambler | !snap !docket !prime 🐢', 'positionMilliseconds': 5761000}}, {'cursor': '', 'node': {'description': 'fred durst: i got a chainsaw | !snap !docket !prime 🐢', 'positionMilliseconds': 10134000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-23T16:15:44Z:   0%|          | 0/17186 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'Super Auto Pets', 'positionMilliseconds': 11581000}}, {'cursor': '', 'node': {'description': 'CELEBRATING THE STEAM RELEASE OF MARVEL SNAP (#ad) | !snap !docket !prime 🐢', 'positionMilliseconds': 13629000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-22T16:14:20Z:   0%|          | 0/21879 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'The Texas Chain Saw Massacre', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 14672000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-21T16:06:26Z:   0%|          | 0/18537 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': "escaping to the one place that hasn't been corrupted by capitalism: Space (#ad) | !jumplight !docket !prime 🐢", 'positionMilliseconds': 2907000}}, {'cursor': '', 'node': {'description': 'the password is... | !jumplight !docket !prime 🐢', 'positionMilliseconds': 7197000}}, {'cursor': '', 'node': {'description': 'Jackbox Party Packs', 'positionMilliseconds': 11543000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-18T16:15:46Z:   0%|          | 0/17413 [00:00<?, ?s/s]

{'video': {'moments': {'edges': [{'cursor': '', 'node': {'description': 'Just Chatting', 'positionMilliseconds': 0}}, {'cursor': '', 'node': {'description': 'what do we tell the god of death | !docket !prime 🐢', 'positionMilliseconds': 5913000}}, {'cursor': '', 'node': {'description': 'Super Auto Pets', 'positionMilliseconds': 14242000}}], 'pageInfo': {'hasNextPage': False}}}}


2023-08-17T16:14:21Z:   0%|          | 0/17454 [00:00<?, ?s/s]

In [31]:
# Comments by coolseel

c = Counter()
[(c, get_vod_link_from_comment(c, -10)) for cs in result.values() for c in cs if c.commenter == "coolseel"]

[(Comment(commenter='coolseel', createdAt=datetime.datetime(2023, 9, 12, 16, 18, 3, 376000), contentOffsetSeconds=161, message="I'm happy with my 170 kJ in 30 mins PB but I'll get there eventually Jupijej", video_id='1923683200', game='Super Auto Pets'),
  'https://twitch.tv/videos/1923683200?t=151s'),
 (Comment(commenter='coolseel', createdAt=datetime.datetime(2023, 9, 12, 16, 26, 16, 81000), contentOffsetSeconds=654, message='no', video_id='1923683200', game='Super Auto Pets'),
  'https://twitch.tv/videos/1923683200?t=644s'),
 (Comment(commenter='coolseel', createdAt=datetime.datetime(2023, 9, 12, 16, 35, 42, 844000), contentOffsetSeconds=1221, message='slovenia NONTARGET', video_id='1923683200', game='Super Auto Pets'),
  'https://twitch.tv/videos/1923683200?t=1211s'),
 (Comment(commenter='coolseel', createdAt=datetime.datetime(2023, 9, 12, 16, 37, 16, 77000), contentOffsetSeconds=1314, message='top 1% btw', video_id='1923683200', game='Super Auto Pets'),
  'https://twitch.tv/videos

In [None]:
plt.style.use("seaborn-v0_8-dark")
# Plots, best bits etc.
categories = {
    "any": [""],
    "funny": ["LUL", "ICANT", "KEKW"],
    "sarcastic": ["ICANT"],
    "pogged": ["Pog", "POGCRAZY", "LETSGO"],
    "scary": ["monkaS"],
    "+2": ["+2"],
    "-2": ["-2"],
    "controversial": ["+2", "-2"],
    "end": ["ty ty ty"],
    "horny": ["COCKA"],
    "shocking": ["Cereal"],
    "bat": ["BatChest"],
}

game = ""
category = "any"

keywords = categories[category]
bucket_width = 30
overall_counter = Counter()
for k, v in sorted(result.items(), key=lambda kv: kv[0], reverse=True):
    video_id = v[0].video_id
    cmap = plt.get_cmap('Dark2')
    c = Counter(c.contentOffsetSeconds // bucket_width for c in v if any(k in c.message for k in keywords) and (game in c.game or c.game == "UNKNOWN"))
    print(c)
    overall_counter += {(bit[0], video_id): bit[1] for bit in c.items()}
    continue
    keys = sorted(c.keys())
    x = [k * bucket_width / 60 for k in keys]
    y = [c[k] for k in keys]
    chapters_here = chapters[video_id]
    fig, ax = plt.subplots(1, 1)
    plt.title(k[:len('YYYY-MM-DD')])
    plt.plot(x, y)
    sns.despine()
    ymax = ax.get_ylim()[1]
    for i, chapter in enumerate(chapters_here):
        chapter_start_min = chapter['positionMilliseconds'] / 1000 / 60
        chapter_end_min = chapters_here[i + 1]['positionMilliseconds'] / 1000 / 60 if i + 1 < len(chapters_here) else max(x)
        if chapter_end_min - chapter_start_min < 10:
            continue
        plt.axvspan(chapter_start_min, chapter_end_min, alpha=0.5, color=cmap(i), zorder=-10)
        plt.text(chapter_start_min + 1, ymax - 1, chapter['description'], fontsize=8, verticalalignment='top')
print(f"Top bits:")
for bit in overall_counter.most_common(100):
    print(f"https://twitch.tv/videos/{bit[0][1]}?t={bit[0][0] * bucket_width - 10}s \t {bit[1]} occurrences")

Counter({712: 9, 24: 8, 35: 7, 45: 7, 552: 7, 597: 7, 854: 7, 49: 6, 52: 6, 54: 6, 59: 6, 65: 6, 246: 6, 547: 6, 749: 6, 812: 6, 816: 6, 827: 6, 857: 6, 40: 5, 43: 5, 64: 5, 66: 5, 67: 5, 131: 5, 499: 5, 526: 5, 562: 5, 580: 5, 584: 5, 751: 5, 818: 5, 820: 5, 25: 4, 26: 4, 32: 4, 37: 4, 38: 4, 48: 4, 56: 4, 60: 4, 63: 4, 70: 4, 146: 4, 225: 4, 244: 4, 247: 4, 529: 4, 534: 4, 538: 4, 541: 4, 550: 4, 581: 4, 585: 4, 739: 4, 831: 4, 4: 3, 28: 3, 31: 3, 41: 3, 44: 3, 53: 3, 71: 3, 76: 3, 79: 3, 152: 3, 243: 3, 248: 3, 257: 3, 277: 3, 310: 3, 314: 3, 435: 3, 535: 3, 548: 3, 579: 3, 586: 3, 594: 3, 600: 3, 601: 3, 602: 3, 603: 3, 709: 3, 740: 3, 741: 3, 742: 3, 788: 3, 809: 3, 814: 3, 815: 3, 817: 3, 819: 3, 826: 3, 829: 3, 839: 3, 843: 3, 848: 3, 853: 3, 855: 3, 866: 3, 872: 3, 2: 2, 14: 2, 23: 2, 27: 2, 29: 2, 33: 2, 36: 2, 39: 2, 50: 2, 61: 2, 62: 2, 77: 2, 83: 2, 109: 2, 122: 2, 128: 2, 130: 2, 136: 2, 137: 2, 138: 2, 139: 2, 147: 2, 149: 2, 153: 2, 156: 2, 253: 2, 254: 2, 317: 2, 333: 2

In [None]:
plt.style.library.keys()

dict_keys(['_classic_test_patch', 'seaborn-v0_8-notebook', 'seaborn-v0_8-pastel', 'seaborn-v0_8-bright', 'seaborn-v0_8-paper', 'fivethirtyeight', '_mpl-gallery-nogrid', 'seaborn-v0_8-muted', 'seaborn-v0_8-dark-palette', 'classic', 'seaborn-v0_8-white', 'seaborn-v0_8-colorblind', 'fast', 'seaborn-v0_8-deep', '_mpl-gallery', 'seaborn-v0_8', 'grayscale', 'ggplot', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-poster', 'seaborn-v0_8-dark', 'Solarize_Light2', 'bmh', 'seaborn-v0_8-ticks', 'seaborn-v0_8-talk', 'tableau-colorblind10', 'seaborn-v0_8-whitegrid', 'dark_background'])