#### Import libraries

In [1]:
import pandas as pd
import numpy as np
import requests
import os

#### Functions

In [2]:
def get_trakt_history(api_url, headers, show_or_movie, trakt_user, 
                      start_at='2021-01-01', end_at='2021-12-31', limit=1000):
    
    history_url = 'users/' + trakt_user + '/history/'
    params = {
        'type': show_or_movie,
        'start_at': start_at + 'T00:00:00.000Z', 
        'end_at': end_at + 'T00:00:00.000Z', 
        'limit': limit
    }
    
    df = pd.DataFrame(requests.get(api_url + history_url, headers=headers, params=params).json())
    
    add_info = []
    for i, r in df.iterrows():
        add_info.append([
            r['show']['title'], r['episode']['season'], r['episode']['number'], r['episode']['title'], 
            r['show']['ids']['imdb']
        ])
    new_cols = ['show', 'season', 'episode', 'title', 'imdb_id']
    add_df = pd.DataFrame.from_records(add_info, columns=new_cols)
    main_df = df[['id', 'watched_at']].merge(add_df, how='inner', left_index=True, right_index=True)
    main_df.rename(columns={'id': 'trakt_id'}, inplace=True)
    main_df['trakt_user'] = trakt_user
    
    return main_df

def get_info(api_url, headers, imdb_id):
    
    r = requests.get(api_url + 'shows/' + imdb_id + '?extended=full', headers=headers).json()
    title_info = {
        'imdb_id': imdb_id,
        'network': r['network'],
        'genres': r['genres']
    }
    df = pd.DataFrame(title_info)
    
    return df

#### Configuration

In [3]:
my_client_id = 'd146e40421f5dac604c7ad8e650551e48128437c3bfab0c5f35a8025209889e2'
my_client_secret = '221e1c1c64f22ce4023c182fe5ac05c5d7f148c7f827a7ffa83ddc76fd8b2d88'
my_access_token = '51a375ab7853a9b5c358c0880ef3072df02a7622c9ca3042856a6e8eea69d699'
api_url = 'https://api.trakt.tv/' # https://trakt.docs.apiary.io/
headers = {'Content-type': 'application/json', 'trakt-api-key': my_client_id, 'trakt-api-version': '2', 
           'Authorization': 'Bearer ' + my_access_token}

#### Download watch history from `trakt`

In [6]:
df = pd.DataFrame()
for tu in ['szigony', 'moana']:
    df = pd.concat([df, get_trakt_history(api_url, headers, 'shows', tu)]).reset_index(drop=True)
df['watched_at'] = pd.to_datetime(df['watched_at']).dt.date

In [11]:
if not os.path.exists('data/'):
    os.makedirs('data/')
    
df.to_csv('data/watch_history.csv', index=False, header=True)

#### Additional info about genres and networks

In [9]:
info_df = pd.DataFrame()
for i in list(set(df['imdb_id'])):
    info_df = pd.concat([info_df, get_info(api_url, headers, i)]).reset_index(drop=True)

ConnectionError: HTTPSConnectionPool(host='api.trakt.tv', port=443): Max retries exceeded with url: /shows/tt13617060?extended=full (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000026BCFB071F0>: Failed to establish a new connection: [WinError 10065] Szoftvercsatorna-művelet végrehajtására történt kísérlet egy nem elérhető állomáson'))

In [None]:
info_df.to_csv('data/genres_and_networks.csv', index=False, header=True)

In [2]:
df = pd.read_csv('data/watch_history.csv')
info_df = pd.read_csv('data/genres_and_networks.csv')
master_df = df.merge(info_df, how='inner', on='imdb_id')
master_df.head()

Unnamed: 0,trakt_id,watched_at,show,season,episode,title,imdb_id,trakt_user,network,genres
0,8038508615,2021-12-29,Hawkeye,1,5,Ronin,tt10160804,szigony,Disney+,action
1,8038508615,2021-12-29,Hawkeye,1,5,Ronin,tt10160804,szigony,Disney+,adventure
2,8038508615,2021-12-29,Hawkeye,1,5,Ronin,tt10160804,szigony,Disney+,drama
3,8038508615,2021-12-29,Hawkeye,1,5,Ronin,tt10160804,szigony,Disney+,holiday
4,8038508615,2021-12-29,Hawkeye,1,5,Ronin,tt10160804,szigony,Disney+,superhero


In [4]:
set(master_df[(master_df['trakt_user'] == 'szigony') & (master_df['genres'] == 'reality')]['show'])

{'Baking Impossible',
 'Best Leftovers Ever!',
 'Blown Away: Christmas',
 'Harry Potter: Hogwarts Tournament of Houses',
 'Jack Whitehall: Travels with My Father',
 'Nailed It!',
 'Nailed It! Mexico',
 'School of Chocolate',
 'The Circle',
 'Too Hot to Handle'}

In [5]:
set(master_df[(master_df['trakt_user'] == 'moana') & (master_df['genres'] == 'reality')]['show'])

{'Bake Squad',
 'Baking Impossible',
 'Best Leftovers Ever!',
 'Bling Empire',
 'Blown Away: Christmas',
 'Cooking With Paris',
 'Dream Home Makeover',
 "Glow Up: Britain's Next Make-Up Star",
 'Harry Potter: Hogwarts Tournament of Houses',
 'Instant Hotel',
 'Jack Whitehall: Travels with My Father',
 'Motel Makeover',
 'Nailed It!',
 'Nailed It! Mexico',
 'School of Chocolate',
 'Selling Sunset',
 'Stay Here',
 'Sugar Rush',
 'The Circle',
 'The Wedding Coach',
 "The World's Most Amazing Vacation Rentals",
 'Too Hot to Handle'}