# YouTube watch history

Selection of JSON file containing user's YouTube watching history

In [None]:
from PyQt5 import QtGui
from PyQt5 import QtGui, QtWidgets

import json
import os
from pathlib import Path 

def open_file():
    app = QtWidgets.QApplication([dir])
    fname = QtWidgets.QFileDialog.getOpenFileName(None, "Select a file...", '.json', filter="JSON files (*)")
    return fname[0]
    
fname = open_file()

with open(fname, encoding='utf-8') as file:
    history_data = json.load(file)


Counting videos per channel and ordering channels in ascending order

In [None]:
channels = {}

for video in history_data:    
    channel = video['subtitles'][0]['name'] if 'subtitles' in video else 'unknown'
    
    if not channel in channels:
        channels[channel] = 1
    else:
        channels[channel] += 1
        
for x in {k: v for k, v in sorted(channels.items(), key=lambda item: item[1])}.items():
    print(x)
    

Analyzing timestamps (format ISO 8601). Counting watched videos per month and per day. Timeframe of the history is determined too.

In [None]:
import dateutil.parser

monthly = {}
daily = {}
tmp = []

for video in history_data:
    time = dateutil.parser.parse(video['time'])
    
    month = time.strftime('%Y-%B')
    day = time.strftime('%Y-%B-%d')
    
    if not month in monthly:
        monthly[month] = 1
    else:
        monthly[month] += 1
    
    if not day in daily:
        daily[day] = 1
    else:
        daily[day] += 1
        
print(monthly)

for item in {k: v for k, v in sorted(daily.items(), key=lambda item: item[1])}.items():
    print(item)

timeframe = dateutil.parser.parse(history_data[0]['time']) - dateutil.parser.parse(history_data[-1]['time'])
print(timeframe.days)


Using YouTube API v3 to obtain lengths of watched videos. 

In [None]:
import os
import json

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors

scopes = ["https://www.googleapis.com/auth/youtube.readonly"]

# create an API client
api_service_name = "youtube"
api_version = "v3"

#json file containing api key
#{"api_key":"<KEY>"}

client_secrets_file = "keys.json"

with open(client_secrets_file, 'r') as secret:
    secret_json = json.load(secret)
    youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=secret_json['api_key'])

counter = 0
minutes = 0
durations = []

ids = []
for video in history_data:
    id = video['titleUrl'].split('?v=')[1] if 'titleUrl' in video else None
    if id is None:
        continue
    
    ids.append(id)
    counter += 1
    
    #50 ids per api call
    if counter == 50:
        request = youtube.videos().list(
            part="contentDetails",
            id=','.join(ids)
        )

        response = request.execute()
        for detail in response['items']:
            durations.append(detail['contentDetails']['duration'])
        
        ids = []
        counter = 0

print(len(durations))

Parsing duration strings and suming up lengths of videos

In [None]:
import re

pattern = r"P(\d+D)?T?(\d+H)?(\d+M)?(\d+S)?"
prog = re.compile(pattern)

hours = 0
minutes = 0
seconds = 0

for duration in durations:
    groups = prog.search(duration).groups()
    #days, hours, minutes, seconds
    hours += int(groups[1][:-1]) if groups[1] is not None else 0
    minutes += int(groups[2][:-1]) if groups[2] is not None else 0
    seconds += int(groups[3][:-1]) if groups[3] is not None else 0

    
hours += minutes / 60 + seconds / 3600

#days
print(hours/24) 