In [None]:
import json
import os
from typing import List, Dict

## Group VOD information to ID

In [None]:
available_vods_file = "./../../output/Jerma985/available_videos.json"
with open(available_vods_file, 'rt') as vod_info_file:
    vods: List[Dict[str, str]] = json.load(vod_info_file)

In [None]:
# Create dict where id to vod metadata.
id_vods = {vod["id"]: {k: v for k, v in vod.items() if k != "id"} for vod in vods}

# Write out id to vod metadata json.
with open("available_videos_by_id.json", "wt") as vod_info_by_id_file:
    json.dump(id_vods, vod_info_by_id_file)


## Convert IRC Formatted Chat to TSV
TSV is required as timestamps contain commas.

Also classify messages:
* Is command? Contains "!" at start of `message`.
* Is a mention? Contains "@" anywhere in `message`.

In [None]:
vod_file_path = "../../output/Jerma985/vod_chats/v1785872632.irc"
# Extract video id from vod filename.
vod_id, _ = os.path.splitext(os.path.basename(vod_file_path))
vod_id = vod_id.lstrip('v')
try:
    vod_metadata = id_vods[vod_id]
    vod_title = vod_metadata["title"]
except KeyError:
    raise Exception(f"Failed to get vod title for {vod_id} from available vods.")

In [None]:
vod_chat_fh = open(vod_file_path, "rt")
vod_chat_tsv_fh = open(f"{vod_title}.tsv", "wt")
# Read chat file and output tsv chat file.
with vod_chat_fh as vod_chat_file, vod_chat_tsv_fh as vod_chat_tsv_file:
    # Write header.
    fields = '\t'.join(["timestamp", "user", "msg", "is_command", "is_mention"])
    vod_chat_tsv_file.write(f"{fields}\n")

    for line in vod_chat_file.readlines():
        timestamp, user, msg = line.split(" ", max_split = 2)
        # Strip timestamp delimiters.
        # String user angle brackets.
        timestamp = timestamp.strip("[]")
        user = user.strip("<>")
        # Write fields out.
        field_values = '\t'.join([
            timestamp,
            user,
            msg,
            str(msg.startswith("!")),
            str('@' in msg)
        ])
        vod_chat_tsv_file.write(f"{field_values}\n")