In [None]:
from pprint import pprint
from collections.abc import Generator, Callable
from pathlib import Path
import typing
from typing import Any, TypeAlias
import pandas as pd
import numpy as np
import datetime as dt
import re
from functools import partial, reduce
from tqdm import tqdm
from IPython.display import (
    display, # type: ignore[reportUnknownVariableType]
    Markdown,
)

import importlib

import spacy
from nltk.tokenize import word_tokenize as tokenize_nltk
import nltk
nltk.download('punkt_tab')

from config.fastf1 import fastf1
from config import config
from src.data.loader import stream_ndjson, load_submissions_df, load_comments_df
from src.data import preprocessing
importlib.reload(preprocessing)
import src.data.constants as dataset_constants

import nltk
from nltk.util import ngrams
from collections import defaultdict, Counter
import math

from src.utils import (
    temporary_pandas_options,
    display_full_dataframe,
    hide_index,
    compose,
)
from src import utils
utils.set_random_seeds()

import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)



In [4]:
#f1_ndjson_streamer = partial(stream_ndjson, limit=500000)
f1_ndjson_streamer = partial(stream_ndjson)

f1_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1_SUBMISSIONS, f1_ndjson_streamer)
f1_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1_COMMENTS, f1_ndjson_streamer)

f15_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1POINT5_SUBMISSIONS)
f15_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1POINT5_COMMENTS)

In [None]:
n = 4

with display_full_dataframe():
    display(Markdown('### r/formula1 submissions:'), f1_submissions_df.head(n))
    display(Markdown('### r/formula1 comments:'), f1_comments_df.head(n))
    display(Markdown('### r/formula1point5 submissions:'), f15_submissions_df.head(n))
    display(Markdown('### r/formula1point5 comments:'), f15_comments_df.head(n))

In [None]:
f1_df = preprocessing.concatenate_submissions_and_comments(f1_submissions_df, f1_comments_df)
f15_df = preprocessing.concatenate_submissions_and_comments(f15_submissions_df, f15_comments_df)

n = 3

with display_full_dataframe():
    display(Markdown('### r/formula1 posts:'), f1_df.head(n))
    display(Markdown('### r/formula1point5 posts:'), f15_df.head(n))

In [22]:
# Test Data

processed_testdata = [
    ['ricciardo', 'to', 'red', 'bull'],
    ['hamilton', 'to', 'stay', 'mercedes'],
    ['alonso', 'to', 'aston', 'martin'],
    ['max', 'verstappen', 'to', 'ferrari'],
    ['max', 'verstappen', 'stay', 'red', 'bull'],
    ['max', 'verstappen', 'stay', 'by', 'red', 'bull']
]

simple_processed_testdata = [
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'red', 'bull'],
    ['max', 'verstappen', 'to', 'mercedes'],
    ['max', 'verstappen', 'to', 'mercedes'],
    ['max', 'verstappen', 'to', 'mercedes'],
    ['max', 'verstappen', 'to', 'ferrari'],
    ['max', 'verstappen', 'to', 'ferrari'],
    ['max', 'verstappen', 'to', 'mclaren'],
    ['max', 'verstappen', 'to', 'mclaren'],
    ['max', 'verstappen', 'to', 'mclaren'],
    
    ['charles', 'leclerc', 'to', 'ferrari'],
    ['charles', 'leclerc', 'to', 'ferrari'],
    ['charles', 'leclerc', 'to', 'ferrari'],
    ['charles', 'leclerc', 'to', 'mclaren'],
    ['charles', 'leclerc', 'to', 'mclaren'],
    ['charles', 'leclerc', 'to', 'mclaren'],
    ['charles', 'leclerc', 'to', 'red', 'bull'],
    ['charles', 'leclerc', 'to', 'red', 'bull'],
    ['charles', 'leclerc', 'to', 'red', 'bull'],
    ['charles', 'leclerc', 'to', 'mercedes'],
    ['charles', 'leclerc', 'to', 'mercedes'],
    ['charles', 'leclerc', 'to', 'aston', 'martin'],
    ['charles', 'leclerc', 'to', 'aston', 'martin'],
    ['charles', 'leclerc', 'to', 'aston', 'martin'],
    ['charles', 'leclerc', 'to', 'alpine'],
    ['charles', 'leclerc', 'to', 'alpine'],
    ['charles', 'leclerc', 'to', 'alpine'],
    ['charles', 'leclerc', 'to', 'haas'],
    ['charles', 'leclerc', 'to', 'haas'],
    
    ['sergio', 'perez', 'to', 'red', 'bull'],
    ['sergio', 'perez', 'to', 'red', 'bull'],
    ['sergio', 'perez', 'to', 'red', 'bull'],
    ['sergio', 'perez', 'to', 'red', 'bull'],
    ['sergio', 'perez', 'to', 'mclaren'],
    ['sergio', 'perez', 'to', 'mclaren'],
    ['sergio', 'perez', 'to', 'aston', 'martin'],
    ['sergio', 'perez', 'to', 'aston', 'martin'],
    ['sergio', 'perez', 'to', 'aston', 'martin'],
    ['sergio', 'perez', 'to', 'ferrari'],
    ['sergio', 'perez', 'to', 'ferrari'],
    ['sergio', 'perez', 'to', 'alpine'],
    ['sergio', 'perez', 'to', 'alpine'],
    ['sergio', 'perez', 'to', 'alpine'],
    ['sergio', 'perez', 'to', 'haas'],
    ['sergio', 'perez', 'to', 'haas'],
    ['sergio', 'perez', 'to', 'haas'],
    ['sergio', 'perez', 'to', 'williams'],
    ['sergio', 'perez', 'to', 'williams'],
    
    ['george', 'russell', 'to', 'mercedes'],
    ['george', 'russell', 'to', 'mercedes'],
    ['george', 'russell', 'to', 'mercedes'],
    ['george', 'russell', 'to', 'mercedes'],
    ['george', 'russell', 'to', 'red', 'bull'],
    ['george', 'russell', 'to', 'red', 'bull'],
    ['george', 'russell', 'to', 'ferrari'],
    ['george', 'russell', 'to', 'ferrari'],
    ['george', 'russell', 'to', 'ferrari'],
    ['george', 'russell', 'to', 'mclaren'],
    ['george', 'russell', 'to', 'mclaren'],
    ['george', 'russell', 'to', 'mclaren'],
    ['george', 'russell', 'to', 'aston', 'martin'],
    ['george', 'russell', 'to', 'aston', 'martin'],
    ['george', 'russell', 'to', 'aston', 'martin'],
    ['george', 'russell', 'to', 'haas'],
    ['george', 'russell', 'to', 'haas'],
    ['george', 'russell', 'to', 'haas'],
    ['george', 'russell', 'to', 'alfa', 'romeo'],
    
    ['carlos', 'sainz', 'to', 'ferrari'],
    ['carlos', 'sainz', 'to', 'ferrari'],
    ['carlos', 'sainz', 'to', 'ferrari'],
    ['carlos', 'sainz', 'to', 'mclaren'],
    ['carlos', 'sainz', 'to', 'mclaren'],
    ['carlos', 'sainz', 'to', 'mclaren'],
    ['carlos', 'sainz', 'to', 'red', 'bull'],
    ['carlos', 'sainz', 'to', 'red', 'bull'],
    ['carlos', 'sainz', 'to', 'red', 'bull'],
    ['carlos', 'sainz', 'to', 'mercedes'],
    ['carlos', 'sainz', 'to', 'mercedes'],
    ['carlos', 'sainz', 'to', 'aston', 'martin'],
    ['carlos', 'sainz', 'to', 'aston', 'martin'],
    ['carlos', 'sainz', 'to', 'aston', 'martin'],
    ['carlos', 'sainz', 'to', 'haas'],
    ['carlos', 'sainz', 'to', 'haas'],
    ['carlos', 'sainz', 'to', 'haas'],
    ['carlos', 'sainz', 'to', 'alpine'],
    ['carlos', 'sainz', 'to', 'alpine'],
    
    ['lewis', 'hamilton', 'to', 'mercedes'],
    ['lewis', 'hamilton', 'to', 'mercedes'],
    ['lewis', 'hamilton', 'to', 'mercedes'],
    ['lewis', 'hamilton', 'to', 'mercedes'],
    ['lewis', 'hamilton', 'to', 'ferrari'],
    ['lewis', 'hamilton', 'to', 'ferrari'],
    ['lewis', 'hamilton', 'to', 'aston', 'martin'],
    ['lewis', 'hamilton', 'to', 'aston', 'martin'],
    ['lewis', 'hamilton', 'to', 'aston', 'martin'],
    ['lewis', 'hamilton', 'to', 'mclaren'],
    ['lewis', 'hamilton', 'to', 'mclaren'],
    ['lewis', 'hamilton', 'to', 'mclaren'],
    ['lewis', 'hamilton', 'to', 'alpine'],
    ['lewis', 'hamilton', 'to', 'alpine'],
    ['lewis', 'hamilton', 'to', 'alpine'],
    ['lewis', 'hamilton', 'to', 'haas'],
    ['lewis', 'hamilton', 'to', 'haas'],
    ['lewis', 'hamilton', 'to', 'haas'],
    ['lewis', 'hamilton', 'to', 'williams'],

    ['lando', 'norris', 'to', 'mclaren'],
    ['lando', 'norris', 'to', 'mclaren'],
    ['lando', 'norris', 'to', 'mclaren'],
    ['lando', 'norris', 'to', 'mclaren'],
    ['lando', 'norris', 'to', 'red', 'bull'],
    ['lando', 'norris', 'to', 'red', 'bull'],
    ['lando', 'norris', 'to', 'red', 'bull'],
    ['lando', 'norris', 'to', 'ferrari'],
    ['lando', 'norris', 'to', 'ferrari'],
    ['lando', 'norris', 'to', 'ferrari'],
    ['lando', 'norris', 'to', 'mercedes'],
    ['lando', 'norris', 'to', 'mercedes'],
    ['lando', 'norris', 'to', 'mercedes'],
    ['lando', 'norris', 'to', 'aston', 'martin'],
    ['lando', 'norris', 'to', 'aston', 'martin'],
    ['lando', 'norris', 'to', 'aston', 'martin'],
    ['lando', 'norris', 'to', 'haas'],
    ['lando', 'norris', 'to', 'haas'],
    ['lando', 'norris', 'to', 'haas'],
    
    ['esteban', 'ocon', 'to', 'alpine'],
    ['esteban', 'ocon', 'to', 'alpine'],
    ['esteban', 'ocon', 'to', 'alpine'],
    ['esteban', 'ocon', 'to', 'alpine'],
    ['esteban', 'ocon', 'to', 'red', 'bull'],
    ['esteban', 'ocon', 'to', 'red', 'bull'],
    ['esteban', 'ocon', 'to', 'ferrari'],
    ['esteban', 'ocon', 'to', 'ferrari'],
    ['esteban', 'ocon', 'to', 'ferrari'],
    ['esteban', 'ocon', 'to', 'mclaren'],
    ['esteban', 'ocon', 'to', 'mclaren'],
    ['esteban', 'ocon', 'to', 'mclaren'],
    ['esteban', 'ocon', 'to', 'mercedes'],
    ['esteban', 'ocon', 'to', 'mercedes'],
    ['esteban', 'ocon', 'to', 'mercedes'],
    ['esteban', 'ocon', 'to', 'aston', 'martin'],
    ['esteban', 'ocon', 'to', 'aston', 'martin'],
    ['esteban', 'ocon', 'to', 'aston', 'martin'],
    ['esteban', 'ocon', 'to', 'haas'],
    
    ['fernando', 'alonso', 'to', 'aston', 'martin'],
    ['fernando', 'alonso', 'to', 'aston', 'martin'],
    ['fernando', 'alonso', 'to', 'aston', 'martin'],
    ['fernando', 'alonso', 'to', 'mclaren'],
    ['fernando', 'alonso', 'to', 'mclaren'],
    ['fernando', 'alonso', 'to', 'mclaren'],
    ['fernando', 'alonso', 'to', 'ferrari'],
    ['fernando', 'alonso', 'to', 'ferrari'],
    ['fernando', 'alonso', 'to', 'ferrari'],
    ['fernando', 'alonso', 'to', 'red', 'bull'],
    ['fernando', 'alonso', 'to', 'red', 'bull'],
    ['fernando', 'alonso', 'to', 'red', 'bull'],
    ['fernando', 'alonso', 'to', 'mercedes'],
    ['fernando', 'alonso', 'to', 'mercedes'],
    ['fernando', 'alonso', 'to', 'mercedes'],
    ['fernando', 'alonso', 'to', 'alpine'],
    ['fernando', 'alonso', 'to', 'alpine'],
    ['fernando', 'alonso', 'to', 'alpine'],
    ['fernando', 'alonso', 'to', 'haas'],
    
    ['valtteri', 'bottas', 'to', 'alfa', 'romeo'],
    ['valtteri', 'bottas', 'to', 'alfa', 'romeo'],
    ['valtteri', 'bottas', 'to', 'alfa', 'romeo'],
    ['valtteri', 'bottas', 'to', 'mclaren'],
    ['valtteri', 'bottas', 'to', 'mclaren'],
    ['valtteri', 'bottas', 'to', 'mclaren'],
    ['valtteri', 'bottas', 'to', 'ferrari'],
    ['valtteri', 'bottas', 'to', 'ferrari'],
    ['valtteri', 'bottas', 'to', 'ferrari'],
    ['valtteri', 'bottas', 'to', 'red', 'bull'],
    ['valtteri', 'bottas', 'to', 'red', 'bull'],
    ['valtteri', 'bottas', 'to', 'red', 'bull'],
    ['valtteri', 'bottas', 'to', 'aston', 'martin'],
    ['valtteri', 'bottas', 'to', 'aston', 'martin'],
    ['valtteri', 'bottas', 'to', 'aston', 'martin'],
    ['valtteri', 'bottas', 'to', 'mercedes'],
    ['valtteri', 'bottas', 'to', 'mercedes'],
    ['valtteri', 'bottas', 'to', 'mercedes'],
    ['valtteri', 'bottas', 'to', 'haas'],
    
    ['daniel', 'ricciardo', 'to', 'mclaren'],
    ['daniel', 'ricciardo', 'to', 'mclaren'],
    ['daniel', 'ricciardo', 'to', 'mclaren'],
    ['daniel', 'ricciardo', 'to', 'mclaren'],
    ['daniel', 'ricciardo', 'to', 'red', 'bull'],
    ['daniel', 'ricciardo', 'to', 'red', 'bull'],
    ['daniel', 'ricciardo', 'to', 'red', 'bull'],
    ['daniel', 'ricciardo', 'to', 'ferrari'],
    ['daniel', 'ricciardo', 'to', 'ferrari'],
    ['daniel', 'ricciardo', 'to', 'ferrari'],
    ['daniel', 'ricciardo', 'to', 'mercedes'],
    ['daniel', 'ricciardo', 'to', 'mercedes'],
    ['daniel', 'ricciardo', 'to', 'mercedes'],
    ['daniel', 'ricciardo', 'to', 'aston', 'martin'],
    ['daniel', 'ricciardo', 'to', 'aston', 'martin'],
    ['daniel', 'ricciardo', 'to', 'aston', 'martin'],
    ['daniel', 'ricciardo', 'to', 'haas'],
    ['daniel', 'ricciardo', 'to', 'haas'],
    ['daniel', 'ricciardo', 'to', 'haas'],

    ['sebastian', 'vettel', 'to', 'aston', 'martin'],
    ['sebastian', 'vettel', 'to', 'aston', 'martin'],
    ['sebastian', 'vettel', 'to', 'aston', 'martin'],
    ['sebastian', 'vettel', 'to', 'mclaren'],
    ['sebastian', 'vettel', 'to', 'mclaren'],
    ['sebastian', 'vettel', 'to', 'mclaren'],
    ['sebastian', 'vettel', 'to', 'ferrari'],
    ['sebastian', 'vettel', 'to', 'ferrari'],
    ['sebastian', 'vettel', 'to', 'ferrari'],
    ['sebastian', 'vettel', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'to', 'mercedes'],
    ['sebastian', 'vettel', 'to', 'mercedes'],
    ['sebastian', 'vettel', 'to', 'mercedes'],
    ['sebastian', 'vettel', 'to', 'alpine'],
    ['sebastian', 'vettel', 'to', 'alpine'],
    ['sebastian', 'vettel', 'to', 'alpine'],
    ['sebastian', 'vettel', 'to', 'haas'],

    ['kevin', 'magnussen', 'to', 'haas'],
    ['kevin', 'magnussen', 'to', 'haas'],
    ['kevin', 'magnussen', 'to', 'haas'],
    ['kevin', 'magnussen', 'to', 'alfa', 'romeo'],
    ['kevin', 'magnussen', 'to', 'alfa', 'romeo'],
    ['kevin', 'magnussen', 'to', 'alfa', 'romeo'],
    ['kevin', 'magnussen', 'to', 'mclaren'],
    ['kevin', 'magnussen', 'to', 'mclaren'],
    ['kevin', 'magnussen', 'to', 'mclaren'],
    ['kevin', 'magnussen', 'to', 'ferrari'],
    ['kevin', 'magnussen', 'to', 'ferrari'],
    ['kevin', 'magnussen', 'to', 'ferrari'],
    ['kevin', 'magnussen', 'to', 'mercedes'],
    ['kevin', 'magnussen', 'to', 'mercedes'],
    ['kevin', 'magnussen', 'to', 'mercedes'],
    ['kevin', 'magnussen', 'to', 'red', 'bull'],
    ['kevin', 'magnussen', 'to', 'red', 'bull'],
    ['kevin', 'magnussen', 'to', 'red', 'bull'],
    ['kevin', 'magnussen', 'to', 'aston', 'martin'],

    ['pierre', 'gasly', 'to', 'alpine'],
    ['pierre', 'gasly', 'to', 'alpine'],
    ['pierre', 'gasly', 'to', 'alpine'],
    ['pierre', 'gasly', 'to', 'alpine'],
    ['pierre', 'gasly', 'to', 'red', 'bull'],
    ['pierre', 'gasly', 'to', 'red', 'bull'],
    ['pierre', 'gasly', 'to', 'ferrari'],
    ['pierre', 'gasly', 'to', 'ferrari'],
    ['pierre', 'gasly', 'to', 'ferrari'],
    ['pierre', 'gasly', 'to', 'mclaren'],
    ['pierre', 'gasly', 'to', 'mclaren'],
    ['pierre', 'gasly', 'to', 'mclaren'],
    ['pierre', 'gasly', 'to', 'mercedes'],
    ['pierre', 'gasly', 'to', 'mercedes'],
    ['pierre', 'gasly', 'to', 'mercedes'],
    ['pierre', 'gasly', 'to', 'aston', 'martin'],
    ['pierre', 'gasly', 'to', 'aston', 'martin'],
    ['pierre', 'gasly', 'to', 'aston', 'martin'],
    ['pierre', 'gasly', 'to', 'haas'],

    ['lance', 'stroll', 'to', 'aston', 'martin'],
    ['lance', 'stroll', 'to', 'aston', 'martin'],
    ['lance', 'stroll', 'to', 'aston', 'martin'],
    ['lance', 'stroll', 'to', 'mclaren'],
    ['lance', 'stroll', 'to', 'mclaren'],
    ['lance', 'stroll', 'to', 'mclaren'],
    ['lance', 'stroll', 'to', 'ferrari'],
    ['lance', 'stroll', 'to', 'ferrari'],
    ['lance', 'stroll', 'to', 'ferrari'],
    ['lance', 'stroll', 'to', 'red', 'bull'],
    ['lance', 'stroll', 'to', 'red', 'bull'],
    ['lance', 'stroll', 'to', 'red', 'bull'],
    ['lance', 'stroll', 'to', 'mercedes'],
    ['lance', 'stroll', 'to', 'mercedes'],
    ['lance', 'stroll', 'to', 'mercedes'],
    ['lance', 'stroll', 'to', 'alpine'],
    ['lance', 'stroll', 'to', 'alpine'],
    ['lance', 'stroll', 'to', 'alpine'],
    ['lance', 'stroll', 'to', 'haas'],

    ['mick', 'schumacher', 'to', 'haas'],
    ['mick', 'schumacher', 'to', 'haas'],
    ['mick', 'schumacher', 'to', 'haas'],
    ['mick', 'schumacher', 'to', 'alfa', 'romeo'],
    ['mick', 'schumacher', 'to', 'alfa', 'romeo'],
    ['mick', 'schumacher', 'to', 'alfa', 'romeo'],
    ['mick', 'schumacher', 'to', 'mclaren'],
    ['mick', 'schumacher', 'to', 'mclaren'],
    ['mick', 'schumacher', 'to', 'mclaren'],
    ['mick', 'schumacher', 'to', 'ferrari'],
    ['mick', 'schumacher', 'to', 'ferrari'],
    ['mick', 'schumacher', 'to', 'ferrari'],
    ['mick', 'schumacher', 'to', 'red', 'bull'],
    ['mick', 'schumacher', 'to', 'red', 'bull'],
    ['mick', 'schumacher', 'to', 'red', 'bull'],
    ['mick', 'schumacher', 'to', 'mercedes'],
    ['mick', 'schumacher', 'to', 'mercedes'],
    ['mick', 'schumacher', 'to', 'mercedes'],
    ['mick', 'schumacher', 'to', 'aston', 'martin'],

    ['yuki', 'tsunoda', 'to', 'alphatauri'],
    ['yuki', 'tsunoda', 'to', 'alphatauri'],
    ['yuki', 'tsunoda', 'to', 'alphatauri'],
    ['yuki', 'tsunoda', 'to', 'alphatauri'],
    ['yuki', 'tsunoda', 'to', 'red', 'bull'],
    ['yuki', 'tsunoda', 'to', 'red', 'bull'],
    ['yuki', 'tsunoda', 'to', 'ferrari'],
    ['yuki', 'tsunoda', 'to', 'ferrari'],
    ['yuki', 'tsunoda', 'to', 'ferrari'],
    ['yuki', 'tsunoda', 'to', 'mclaren'],
    ['yuki', 'tsunoda', 'to', 'mclaren'],
    ['yuki', 'tsunoda', 'to', 'mclaren'],
    ['yuki', 'tsunoda', 'to', 'mercedes'],
    ['yuki', 'tsunoda', 'to', 'mercedes'],
    ['yuki', 'tsunoda', 'to', 'mercedes'],
    ['yuki', 'tsunoda', 'to', 'aston', 'martin'],
    ['yuki', 'tsunoda', 'to', 'aston', 'martin'],
    ['yuki', 'tsunoda', 'to', 'aston', 'martin'],
    ['yuki', 'tsunoda', 'to', 'haas'],

    ['zhou', 'guanyu', 'to', 'alfa', 'romeo'],
    ['zhou', 'guanyu', 'to', 'alfa', 'romeo'],
    ['zhou', 'guanyu', 'to', 'alfa', 'romeo'],
    ['zhou', 'guanyu', 'to', 'mclaren'],
    ['zhou', 'guanyu', 'to', 'mclaren'],
    ['zhou', 'guanyu', 'to', 'mclaren'],
    ['zhou', 'guanyu', 'to', 'ferrari'],
    ['zhou', 'guanyu', 'to', 'ferrari'],
    ['zhou', 'guanyu', 'to', 'ferrari'],
    ['zhou', 'guanyu', 'to', 'red', 'bull'],
    ['zhou', 'guanyu', 'to', 'red', 'bull'],
    ['zhou', 'guanyu', 'to', 'red', 'bull'],
    ['zhou', 'guanyu', 'to', 'mercedes'],
    ['zhou', 'guanyu', 'to', 'mercedes'],
    ['zhou', 'guanyu', 'to', 'mercedes'],
    ['zhou', 'guanyu', 'to', 'aston', 'martin'],
    ['zhou', 'guanyu', 'to', 'aston', 'martin'],
    ['zhou', 'guanyu', 'to', 'aston', 'martin'],
    ['zhou', 'guanyu', 'to', 'haas'],

    ['alexander', 'albon', 'to', 'williams'],
    ['alexander', 'albon', 'to', 'williams'],
    ['alexander', 'albon', 'to', 'williams'],
    ['alexander', 'albon', 'to', 'alfa', 'romeo'],
    ['alexander', 'albon', 'to', 'alfa', 'romeo'],
    ['alexander', 'albon', 'to', 'alfa', 'romeo'],
    ['alexander', 'albon', 'to', 'mclaren'],
    ['alexander', 'albon', 'to', 'mclaren'],
    ['alexander', 'albon', 'to', 'mclaren'],
    ['alexander', 'albon', 'to', 'ferrari'],
    ['alexander', 'albon', 'to', 'ferrari'],
    ['alexander', 'albon', 'to', 'ferrari'],
    ['alexander', 'albon', 'to', 'red', 'bull'],
    ['alexander', 'albon', 'to', 'red', 'bull'],
    ['alexander', 'albon', 'to', 'red', 'bull'],
    ['alexander', 'albon', 'to', 'mercedes'],
    ['alexander', 'albon', 'to', 'mercedes'],
    ['alexander', 'albon', 'to', 'mercedes'],
    ['alexander', 'albon', 'to', 'aston', 'martin'],

    ['nicholas', 'latifi', 'to', 'williams'],
    ['nicholas', 'latifi', 'to', 'williams'],
    ['nicholas', 'latifi', 'to', 'williams'],
    ['nicholas', 'latifi', 'to', 'alfa', 'romeo'],
    ['nicholas', 'latifi', 'to', 'alfa', 'romeo'],
    ['nicholas', 'latifi', 'to', 'alfa', 'romeo'],
    ['nicholas', 'latifi', 'to', 'mclaren'],
    ['nicholas', 'latifi', 'to', 'mclaren'],
    ['nicholas', 'latifi', 'to', 'mclaren'],
    ['nicholas', 'latifi', 'to', 'ferrari'],
    ['nicholas', 'latifi', 'to', 'ferrari'],
    ['nicholas', 'latifi', 'to', 'ferrari'],
    ['nicholas', 'latifi', 'to', 'red', 'bull'],
    ['nicholas', 'latifi', 'to', 'red', 'bull'],
    ['nicholas', 'latifi', 'to', 'red', 'bull'],
    ['nicholas', 'latifi', 'to', 'mercedes'],
    ['nicholas', 'latifi', 'to', 'mercedes'],
    ['nicholas', 'latifi', 'to', 'mercedes'],
    ['nicholas', 'latifi', 'to', 'aston', 'martin'],

    ['nyck', 'vries', 'to', 'alphatauri'],
    ['nyck', 'vries', 'to', 'alphatauri'],
    ['nyck', 'vries', 'to', 'alphatauri'],
    ['nyck', 'vries', 'to', 'alphatauri'],
    ['nyck', 'vries', 'to', 'red', 'bull'],
    ['nyck', 'vries', 'to', 'red', 'bull'],
    ['nyck', 'vries', 'to', 'ferrari'],
    ['nyck', 'vries', 'to', 'ferrari'],
    ['nyck', 'vries', 'to', 'ferrari'],
    ['nyck', 'vries', 'to', 'mclaren'],
    ['nyck', 'vries', 'to', 'mclaren'],
    ['nyck', 'vries', 'to', 'mclaren'],
    ['nyck', 'vries', 'to', 'mercedes'],
    ['nyck', 'vries', 'to', 'mercedes'],
    ['nyck', 'vries', 'to', 'mercedes'],
    ['nyck', 'vries', 'to', 'aston', 'martin'],
    ['nyck', 'vries', 'to', 'aston', 'martin'],
    ['nyck', 'vries', 'to', 'aston', 'martin'],
    ['nyck', 'vries', 'to', 'haas'],

    ['nico', 'hulkenberg', 'to', 'haas'],
    ['nico', 'hulkenberg', 'to', 'haas'],
    ['nico', 'hulkenberg', 'to', 'haas'],
    ['nico', 'hulkenberg', 'to', 'alfa', 'romeo'],
    ['nico', 'hulkenberg', 'to', 'alfa', 'romeo'],
    ['nico', 'hulkenberg', 'to', 'alfa', 'romeo'],
    ['nico', 'hulkenberg', 'to', 'mclaren'],
    ['nico', 'hulkenberg', 'to', 'mclaren'],
    ['nico', 'hulkenberg', 'to', 'mclaren'],
    ['nico', 'hulkenberg', 'to', 'ferrari'],
    ['nico', 'hulkenberg', 'to', 'ferrari'],
    ['nico', 'hulkenberg', 'to', 'ferrari'],
    ['nico', 'hulkenberg', 'to', 'red', 'bull'],
    ['nico', 'hulkenberg', 'to', 'red', 'bull'],
    ['nico', 'hulkenberg', 'to', 'red', 'bull'],
    ['nico', 'hulkenberg', 'to', 'mercedes'],
    ['nico', 'hulkenberg', 'to', 'mercedes'],
    ['nico', 'hulkenberg', 'to', 'mercedes'],
    ['nico', 'hulkenberg', 'to', 'aston', 'martin'],

    ['oscar', 'piastri', 'to', 'mclaren'],
    ['oscar', 'piastri', 'to', 'mclaren'],
    ['oscar', 'piastri', 'to', 'mclaren'],
    ['oscar', 'piastri', 'to', 'mclaren'],
    ['oscar', 'piastri', 'to', 'ferrari'],
    ['oscar', 'piastri', 'to', 'ferrari'],
    ['oscar', 'piastri', 'to', 'ferrari'],
    ['oscar', 'piastri', 'to', 'red', 'bull'],
    ['oscar', 'piastri', 'to', 'red', 'bull'],
    ['oscar', 'piastri', 'to', 'red', 'bull'],
    ['oscar', 'piastri', 'to', 'mercedes'],
    ['oscar', 'piastri', 'to', 'mercedes'],
    ['oscar', 'piastri', 'to', 'mercedes'],
    ['oscar', 'piastri', 'to', 'aston', 'martin'],
    ['oscar', 'piastri', 'to', 'aston', 'martin'],
    ['oscar', 'piastri', 'to', 'aston', 'martin'],
    ['oscar', 'piastri', 'to', 'alpine'],
    ['oscar', 'piastri', 'to', 'alpine'],
    ['oscar', 'piastri', 'to', 'alpine'],

    ['logan', 'sargeant', 'to', 'williams'],
    ['logan', 'sargeant', 'to', 'williams'],
    ['logan', 'sargeant', 'to', 'williams'],
    ['logan', 'sargeant', 'to', 'mclaren'],
    ['logan', 'sargeant', 'to', 'mclaren'],
    ['logan', 'sargeant', 'to', 'mclaren'],
    ['logan', 'sargeant', 'to', 'ferrari'],
    ['logan', 'sargeant', 'to', 'ferrari'],
    ['logan', 'sargeant', 'to', 'ferrari'],
    ['logan', 'sargeant', 'to', 'red', 'bull'],
    ['logan', 'sargeant', 'to', 'red', 'bull'],
    ['logan', 'sargeant', 'to', 'red', 'bull'],
    ['logan', 'sargeant', 'to', 'mercedes'],
    ['logan', 'sargeant', 'to', 'mercedes'],
    ['logan', 'sargeant', 'to', 'mercedes'],
    ['logan', 'sargeant', 'to', 'aston', 'martin'],
    ['logan', 'sargeant', 'to', 'aston', 'martin'],
    ['logan', 'sargeant', 'to', 'aston', 'martin'],
    ['logan', 'sargeant', 'to', 'alpine']
]

hard_processed_testdata = [
    ['max', 'verstappen', 'joins', 'red', 'bull'],
    ['charles', 'leclerc', 'signs', 'with', 'ferrari'],
    ['sergio', 'perez', 'moves', 'to', 'red', 'bull'],
    ['george', 'russell', 'stays', 'at', 'mercedes'],
    ['carlos', 'sainz', 'extends', 'his', 'deal', 'with', 'ferrari'],
    ['lewis', 'hamilton', 'retires', 'from', 'formula', 'one'],
    ['lando', 'norris', 'leaves', 'mclaren', 'for', 'ferrari'],
    ['esteban', 'ocon', 'promoted', 'to', 'alpine', 'renault'],
    ['fernando', 'alonso', 'joins', 'aston', 'martin'],
    ['valtteri', 'bottas', 'shifts', 'to', 'alfa', 'romeo'],
    ['daniel', 'ricciardo', 'returns', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'steps', 'back', 'from', 'racing'],
    ['kevin', 'magnussen', 'remains', 'at', 'haas'],
    ['pierre', 'gasly', 'partners', 'with', 'alphatauri'],
    ['lance', 'stroll', 'stays', 'at', 'aston', 'martin'],
    ['mick', 'schumacher', 'leaves', 'haas', 'for', 'alfa', 'romeo'],
    ['yuki', 'tsunoda', 'signs', 'new', 'contract', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'extends', 'stay', 'at', 'alfa', 'romeo'],
    ['alexander', 'albon', 'linked', 'to', 'red', 'bull', 'return'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'replaces', 'perez', 'at', 'red', 'bull'],
    ['nico', 'hulkenberg', 'joins', 'haas'],
    ['oscar', 'piastri', 'promoted', 'to', 'mclaren'],
    ['liam', 'lawson', 'signs', 'with', 'alphatauri'],
    ['logan', 'sargeant', 'commits', 'to', 'williams'],
    ['max', 'verstappen', 'leaves', 'red', 'bull', 'for', 'ferrari'],
    ['charles', 'leclerc', 'moves', 'to', 'mercedes'],
    ['sergio', 'perez', 'replaces', 'alonso', 'at', 'aston', 'martin'],
    ['george', 'russell', 'partners', 'with', 'hamilton', 'at', 'mercedes'],
    ['carlos', 'sainz', 'remains', 'loyal', 'to', 'ferrari'],
    ['lewis', 'hamilton', 'extends', 'his', 'deal', 'with', 'mercedes'],
    ['lando', 'norris', 'joins', 'red', 'bull'],
    ['esteban', 'ocon', 'stays', 'at', 'alpine'],
    ['fernando', 'alonso', 'retires', 'from', 'formula', 'one'],
    ['valtteri', 'bottas', 'switches', 'to', 'sauber'],
    ['daniel', 'ricciardo', 'linked', 'with', 'mclaren', 'return'],
    ['sebastian', 'vettel', 'signs', 'with', 'aston', 'martin'],
    ['kevin', 'magnussen', 'leaves', 'haas', 'for', 'red', 'bull'],
    ['pierre', 'gasly', 'extends', 'stay', 'with', 'alpine', 'renault'],
    ['lance', 'stroll', 'moves', 'to', 'aston', 'martin'],
    ['mick', 'schumacher', 'replaces', 'hulkenberg', 'at', 'haas'],
    ['yuki', 'tsunoda', 'extends', 'contract', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'moves', 'to', 'mclaren'],
    ['alexander', 'albon', 'joins', 'williams'],
    ['nicholas', 'latifi', 'steps', 'back', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'partners', 'with', 'red', 'bull'],
    ['nico', 'hulkenberg', 'signs', 'with', 'haas'],
    ['oscar', 'piastri', 'replaces', 'norris', 'at', 'mclaren'],
    ['liam', 'lawson', 'commits', 'to', 'alphatauri'],
    ['logan', 'sargeant', 'linked', 'to', 'williams'],
    ['max', 'verstappen', 'signs', 'long-term', 'deal', 'with', 'red', 'bull'],
    ['charles', 'leclerc', 'remains', 'at', 'ferrari'],
    ['sergio', 'perez', 'steps', 'away', 'from', 'formula', 'one'],
    ['george', 'russell', 'linked', 'to', 'ferrari'],
    ['carlos', 'sainz', 'moves', 'to', 'aston', 'martin'],
    ['lewis', 'hamilton', 'joins', 'ferrari'],
    ['lando', 'norris', 'replaces', 'russell', 'at', 'mercedes'],
    ['esteban', 'ocon', 'returns', 'to', 'alpine'],
    ['fernando', 'alonso', 'signs', 'with', 'aston', 'martin'],
    ['valtteri', 'bottas', 'retires', 'from', 'formula', 'one'],
    ['daniel', 'ricciardo', 'replaces', 'verstappen', 'at', 'red', 'bull'],
    ['sebastian', 'vettel', 'joins', 'sauber'],
    ['kevin', 'magnussen', 'linked', 'to', 'ferrari'],
    ['pierre', 'gasly', 'remains', 'with', 'alpine'],
    ['lance', 'stroll', 'stays', 'with', 'aston', 'martin'],
    ['mick', 'schumacher', 'joins', 'alfa', 'romeo'],
    ['yuki', 'tsunoda', 'remains', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'linked', 'to', 'sauber'],
    ['alexander', 'albon', 'extends', 'deal', 'with', 'williams'],
    ['nicholas', 'latifi', 'leaves', 'formula', 'one'],
    ['nyck', 'vries', 'replaces', 'alonso', 'at', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'partners', 'with', 'haas'],
    ['oscar', 'piastri', 'stays', 'at', 'mclaren'],
    ['liam', 'lawson', 'signs', 'with', 'alphatauri'],
    ['logan', 'sargeant', 'promoted', 'to', 'williams'],
    ['max', 'verstappen', 'leaves', 'red', 'bull', 'for', 'mercedes'],
    ['charles', 'leclerc', 'joins', 'red', 'bull'],
    ['sergio', 'perez', 'partners', 'with', 'aston', 'martin'],
    ['george', 'russell', 'replaces', 'hamilton', 'at', 'mercedes'],
    ['carlos', 'sainz', 'signs', 'new', 'contract', 'with', 'ferrari'],
    ['lewis', 'hamilton', 'extends', 'deal', 'with', 'mercedes'],
    ['lando', 'norris', 'switches', 'to', 'ferrari'],
    ['esteban', 'ocon', 'remains', 'at', 'alpine'],
    ['fernando', 'alonso', 'linked', 'to', 'aston', 'martin'],
    ['valtteri', 'bottas', 'signs', 'with', 'sauber'],
    ['daniel', 'ricciardo', 'commits', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'retires', 'from', 'formula', 'one'],
    ['kevin', 'magnussen', 'remains', 'with', 'haas'],
    ['pierre', 'gasly', 'moves', 'to', 'ferrari'],
    ['lance', 'stroll', 'partners', 'with', 'alonso', 'at', 'aston', 'martin'],
    ['mick', 'schumacher', 'linked', 'to', 'mclaren'],
    ['yuki', 'tsunoda', 'promoted', 'to', 'red', 'bull'],
    ['zhou', 'guanyu', 'replaces', 'gasly', 'at', 'alpine'],
    ['alexander', 'albon', 'stays', 'at', 'williams'],
    ['nicholas', 'latifi', 'moves', 'to', 'haas'],
    ['nyck', 'vries', 'partners', 'with', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'linked', 'to', 'alfa', 'romeo'],
    ['oscar', 'piastri', 'joins', 'red', 'bull'],
    ['liam', 'lawson', 'replaces', 'alonso', 'at', 'aston', 'martin'],
    ['logan', 'sargeant', 'signs', 'with', 'alphatauri'],
    ['max', 'verstappen', 'switches', 'to', 'alpine'],
    ['charles', 'leclerc', 'returns', 'to', 'ferrari'],
    ['sergio', 'perez', 'leaves', 'formula', 'one'],
    ['george', 'russell', 'remains', 'at', 'mercedes'],
    ['carlos', 'sainz', 'commits', 'to', 'ferrari'],
    ['lewis', 'hamilton', 'partners', 'with', 'verstappen', 'at', 'red', 'bull'],
    ['lando', 'norris', 'joins', 'aston', 'martin'],
    ['esteban', 'ocon', 'linked', 'to', 'alpine'],
    ['fernando', 'alonso', 'retires', 'from', 'formula', 'one'],
    ['valtteri', 'bottas', 'signs', 'with', 'haas'],
    ['daniel', 'ricciardo', 'returns', 'to', 'alpine'],
    ['sebastian', 'vettel', 'moves', 'to', 'alphatauri'],
    ['kevin', 'magnussen', 'stays', 'at', 'haas'],
    ['pierre', 'gasly', 'joins', 'ferrari'],
    ['lance', 'stroll', 'replaces', 'ocon', 'at', 'alpine'],
    ['mick', 'schumacher', 'partners', 'with', 'ferrari'],
    ['yuki', 'tsunoda', 'signs', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'remains', 'at', 'alfa', 'romeo'],
    ['alexander', 'albon', 'linked', 'to', 'red', 'bull'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'replaces', 'vettel', 'at', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'commits', 'to', 'haas'],
    ['oscar', 'piastri', 'partners', 'with', 'norris', 'at', 'mclaren'],
    ['liam', 'lawson', 'linked', 'to', 'alphatauri'],
    ['logan', 'sargeant', 'moves', 'to', 'williams'],
    ['max', 'verstappen', 'remains', 'at', 'red', 'bull'],
    ['charles', 'leclerc', 'extends', 'deal', 'with', 'ferrari'],
    ['sergio', 'perez', 'switches', 'to', 'aston', 'martin'],
    ['george', 'russell', 'stays', 'at', 'mercedes'],
    ['carlos', 'sainz', 'replaces', 'hamilton', 'at', 'mercedes'],
    ['lewis', 'hamilton', 'linked', 'to', 'ferrari'],
    ['lando', 'norris', 'moves', 'to', 'ferrari'],
    ['esteban', 'ocon', 'remains', 'at', 'alpine'],
    ['fernando', 'alonso', 'retires', 'from', 'formula', 'one'],
    ['valtteri', 'bottas', 'partners', 'with', 'sauber'],
    ['daniel', 'ricciardo', 'replaces', 'verstappen', 'at', 'red', 'bull'],
    ['sebastian', 'vettel', 'joins', 'aston', 'martin'],
    ['kevin', 'magnussen', 'stays', 'with', 'haas'],
    ['pierre', 'gasly', 'leaves', 'alpine', 'for', 'red', 'bull'],
    ['lance', 'stroll', 'partners', 'with', 'alonso', 'at', 'aston', 'martin'],
    ['mick', 'schumacher', 'joins', 'haas'],
    ['yuki', 'tsunoda', 'remains', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'linked', 'to', 'sauber'],
    ['alexander', 'albon', 'replaces', 'latifi', 'at', 'williams'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'joins', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'partners', 'with', 'haas'],
    ['oscar', 'piastri', 'remains', 'at', 'mclaren'],
    ['liam', 'lawson', 'joins', 'alphatauri'],
    ['logan', 'sargeant', 'commits', 'to', 'williams'],
    ['max', 'verstappen', 'linked', 'to', 'ferrari'],
    ['charles', 'leclerc', 'remains', 'with', 'ferrari'],
    ['sergio', 'perez', 'leaves', 'red', 'bull'],
    ['george', 'russell', 'partners', 'with', 'hamilton', 'at', 'mercedes'],
    ['carlos', 'sainz', 'signs', 'new', 'contract', 'with', 'ferrari'],
    ['lewis', 'hamilton', 'retires', 'from', 'formula', 'one'],
    ['lando', 'norris', 'linked', 'to', 'ferrari'],
    ['esteban', 'ocon', 'commits', 'to', 'alpine'],
    ['fernando', 'alonso', 'remains', 'at', 'aston', 'martin'],
    ['valtteri', 'bottas', 'joins', 'sauber'],
    ['daniel', 'ricciardo', 'partners', 'with', 'verstappen', 'at', 'red', 'bull'],
    ['sebastian', 'vettel', 'linked', 'to', 'haas'],
    ['kevin', 'magnussen', 'remains', 'at', 'haas'],
    ['pierre', 'gasly', 'moves', 'to', 'alpine'],
    ['lance', 'stroll', 'stays', 'with', 'aston', 'martin'],
    ['mick', 'schumacher', 'leaves', 'haas', 'for', 'alfa', 'romeo'],
    ['yuki', 'tsunoda', 'promoted', 'to', 'red', 'bull'],
    ['zhou', 'guanyu', 'partners', 'with', 'alfa', 'romeo'],
    ['alexander', 'albon', 'linked', 'to', 'alphatauri'],
    ['nicholas', 'latifi', 'leaves', 'formula', 'one'],
    ['nyck', 'vries', 'signs', 'with', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'replaces', 'magnussen', 'at', 'haas'],
    ['oscar', 'piastri', 'joins', 'red', 'bull'],
    ['liam', 'lawson', 'partners', 'with', 'alphatauri'],
    ['logan', 'sargeant', 'moves', 'to', 'williams'],
    ['max', 'verstappen', 'signs', 'with', 'mclaren'],
    ['charles', 'leclerc', 'remains', 'at', 'ferrari'],
    ['sergio', 'perez', 'partners', 'with', 'hamilton', 'at', 'mercedes'],
    ['george', 'russell', 'moves', 'to', 'aston', 'martin'],
    ['carlos', 'sainz', 'extends', 'deal', 'with', 'ferrari'],
    ['lewis', 'hamilton', 'retires', 'from', 'formula', 'one'],
    ['lando', 'norris', 'linked', 'to', 'alphatauri'],
    ['esteban', 'ocon', 'stays', 'with', 'alpine'],
    ['fernando', 'alonso', 'replaces', 'stroll', 'at', 'aston', 'martin'],
    ['valtteri', 'bottas', 'partners', 'with', 'alfa', 'romeo'],
    ['daniel', 'ricciardo', 'returns', 'to', 'red', 'bull'],
    ['sebastian', 'vettel', 'linked', 'to', 'haas'],
    ['kevin', 'magnussen', 'commits', 'to', 'haas'],
    ['pierre', 'gasly', 'moves', 'to', 'ferrari'],
    ['lance', 'stroll', 'remains', 'at', 'aston', 'martin'],
    ['mick', 'schumacher', 'joins', 'alpine'],
    ['yuki', 'tsunoda', 'partners', 'with', 'verstappen', 'at', 'red', 'bull'],
    ['zhou', 'guanyu', 'linked', 'to', 'renault'],
    ['alexander', 'albon', 'switches', 'to', 'alphatauri'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'signs', 'with', 'alpine'],
    ['nico', 'hulkenberg', 'replaces', 'schumacher', 'at', 'haas'],
    ['oscar', 'piastri', 'partners', 'with', 'norris', 'at', 'mclaren'],
    ['liam', 'lawson', 'remains', 'at', 'alphatauri'],
    ['logan', 'sargeant', 'moves', 'to', 'williams'],
    ['max', 'verstappen', 'linked', 'to', 'ferrari'],
    ['charles', 'leclerc', 'stays', 'at', 'ferrari'],
    ['sergio', 'perez', 'commits', 'to', 'red', 'bull'],
    ['george', 'russell', 'signs', 'new', 'deal', 'with', 'mercedes'],
    ['carlos', 'sainz', 'partners', 'with', 'leclerc', 'at', 'ferrari'],
    ['lewis', 'hamilton', 'retires', 'from', 'formula', 'one'],
    ['lando', 'norris', 'joins', 'aston', 'martin'],
    ['esteban', 'ocon', 'moves', 'to', 'mclaren'],
    ['fernando', 'alonso', 'switches', 'to', 'alpine'],
    ['valtteri', 'bottas', 'remains', 'at', 'alfa', 'romeo'],
    ['daniel', 'ricciardo', 'returns', 'to', 'alphatauri'],
    ['sebastian', 'vettel', 'partners', 'with', 'gasly', 'at', 'alpine'],
    ['kevin', 'magnussen', 'stays', 'at', 'haas'],
    ['pierre', 'gasly', 'commits', 'to', 'alpine'],
    ['lance', 'stroll', 'remains', 'with', 'aston', 'martin'],
    ['mick', 'schumacher', 'replaces', 'bottas', 'at', 'alfa', 'romeo'],
    ['yuki', 'tsunoda', 'joins', 'red', 'bull'],
    ['zhou', 'guanyu', 'partners', 'with', 'alfa', 'romeo'],
    ['alexander', 'albon', 'switches', 'to', 'alphatauri'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'joins', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'partners', 'with', 'haas'],
    ['oscar', 'piastri', 'stays', 'at', 'mclaren'],
    ['liam', 'lawson', 'commits', 'to', 'alphatauri'],
    ['logan', 'sargeant', 'remains', 'with', 'williams'],
    ['max', 'verstappen', 'linked', 'to', 'ferrari'],
    ['charles', 'leclerc', 'remains', 'at', 'ferrari'],
    ['sergio', 'perez', 'joins', 'aston', 'martin'],
    ['george', 'russell', 'partners', 'with', 'hamilton', 'at', 'mercedes'],
    ['carlos', 'sainz', 'extends', 'contract', 'with', 'ferrari'],
    ['lewis', 'hamilton', 'retires', 'from', 'formula', 'one'],
    ['lando', 'norris', 'linked', 'to', 'ferrari'],
    ['esteban', 'ocon', 'stays', 'at', 'alpine'],
    ['fernando', 'alonso', 'commits', 'to', 'aston', 'martin'],
    ['valtteri', 'bottas', 'switches', 'to', 'sauber'],
    ['daniel', 'ricciardo', 'partners', 'with', 'verstappen', 'at', 'red', 'bull'],
    ['sebastian', 'vettel', 'joins', 'haas'],
    ['kevin', 'magnussen', 'stays', 'with', 'haas'],
    ['pierre', 'gasly', 'leaves', 'alpine', 'for', 'ferrari'],
    ['lance', 'stroll', 'partners', 'with', 'alonso', 'at', 'aston', 'martin'],
    ['mick', 'schumacher', 'joins', 'haas'],
    ['yuki', 'tsunoda', 'remains', 'with', 'alphatauri'],
    ['zhou', 'guanyu', 'partners', 'with', 'sauber'],
    ['alexander', 'albon', 'replaces', 'latifi', 'at', 'williams'],
    ['nicholas', 'latifi', 'retires', 'from', 'formula', 'one'],
    ['nyck', 'vries', 'signs', 'with', 'aston', 'martin'],
    ['nico', 'hulkenberg', 'partners', 'with', 'haas'],
    ['oscar', 'piastri', 'remains', 'at', 'mclaren'],
    ['liam', 'lawson', 'joins', 'alphatauri'],
    ['logan', 'sargeant', 'moves', 'to', 'williams']
]

In [None]:
# Preprocessing: normalize, lemmatize and tokenize

f1_df['text'] = f1_df['text'].apply(preprocessing.normalize)

from nltk.stem import PorterStemmer

nlp = spacy.load('en_core_web_sm')
stemmer = PorterStemmer()

all_lemmatized_tokens = [
    [token.lemma_.lower() for token in nlp(text)] 
    for text in f1_df['text']
]

print(all_lemmatized_tokens[:5])



In [None]:
# Lists of drivers, teams and action words

drivers = [
    'max', 'verstappen',
    'charles', 'leclerc',
    'sergio', 'perez',
    'george', 'russell',
    'carlos', 'sainz',
    'lewis', 'hamilton',
    'lando', 'norris',
    'esteban', 'ocon',
    'fernando', 'alonso',
    'valtteri', 'bottas',
    'daniel', 'ricciardo',
    'sebastian', 'vettel',
    'kevin', 'magnussen',
    'pierre', 'gasly',
    'lance', 'stroll',
    'mick', 'schumacher',
    'yuki', 'tsunoda',
    'zhou', 'guanyu',
    'alexander', 'albon',
    'nicholas', 'latifi',
    'nyck', 'vries',
    'nico', 'hulkenberg',
    'oscar', 'piastri',
    'logan', 'sargeant'
]

teams = [
    'mercedes',
    'ferrari',
    'red', 'bull',
    'alpine', 'renault',
    'mclaren',
    'aston', 'martin',
    'racing', 'point',
    'alphatauri', 'alpha', 'tauri',
    'haas',
    'alfa', 'romeo',
    'williams',
    'kick', 'sauber'
]

action_words = [
    'to',
    'go',
    'goes',
    'leave',
    'leaves',
    'join',
    'joins',
    'sign',
    'signs',
    'extend',
    'extends',
    'move',
    'moves',
    'replace',
    'replaces',
    'return',
    'returns',
    'stay',
    'stays'
]



def filter_sentences_by_driver_and_team(tokenized_texts, drivers, teams, action_words):
    filtered_sentences = []

    for sentence in tokenized_texts:
        contains_team = any(team in sentence for team in teams)
        
        for i, word in enumerate(sentence):
            if word in drivers and i + 1 < len(sentence) and sentence[i + 1] in action_words:
                if contains_team:
                    filtered_sentences.append(sentence)
                    break

    return filtered_sentences


filtered_sentences = filter_sentences_by_driver_and_team(all_lemmatized_tokens, drivers, teams, action_words)

print(filtered_sentences[:5])


In [73]:
# N-gram model

def train_ngram_model(data, n=2):
    ngram_counts = defaultdict(Counter)
    total_counts = Counter()

    for sentence in data:
        sentence = ['<s>'] + sentence + ['</s>']
        n_grams = list(ngrams(sentence, n))
        for gram in n_grams:
            prefix, next_word = tuple(gram[:-1]), gram[-1]
            ngram_counts[prefix][next_word] += 1
            total_counts[prefix] += 1

    ngram_probs = {
        prefix: {word: count / total_counts[prefix] for word, count in words.items()}
        for prefix, words in ngram_counts.items()
    }

    return ngram_probs



# Train a bigram model
bigram_model = train_ngram_model(filtered_sentences, n=2)

# Train a trigram model
trigram_model = train_ngram_model(filtered_sentences, n=3)

# Train a quadgram model
quadgram_model = train_ngram_model(filtered_sentences, n=4)


In [None]:
'''
# Create trigram model with Laplace Smoothing

def train_trigram_model_with_smoothing(data, n=3):
    ngram_counts = defaultdict(Counter)
    total_counts = Counter()
    vocabulary = set()

    for sentence in data:
        sentence = ['<s>'] * (n - 1) + sentence + ['</s>']  # Add padding
        n_grams = list(ngrams(sentence, n))
        vocabulary.update(sentence)  # Add tokens to vocabulary
        for gram in n_grams:
            prefix, next_word = tuple(gram[:-1]), gram[-1]
            ngram_counts[prefix][next_word] += 1
            total_counts[prefix] += 1

    # Laplace Smoothing
    vocabulary_size = len(vocabulary)
    trigram_probs = {
        prefix: {word: (count + 1) / (total_counts[prefix] + vocabulary_size)
                 for word, count in words.items()}
        for prefix, words in ngram_counts.items()
    }

    # Ensure all words in the vocabulary have a non-zero probability
    for prefix in ngram_counts.keys():
        for word in vocabulary:
            if word not in trigram_probs[prefix]:
                trigram_probs[prefix][word] = 1 / (total_counts[prefix] + vocabulary_size)

    return trigram_probs, vocabulary



# Train a trigram model with Laplace Smoothing
trigram_model_s, vocabulary = train_trigram_model_with_smoothing(all_lemmatized_tokens, n=3)
'''


In [None]:
# Predict next word

def predict_next_word(model, input_text, n=2):
    tokens = input_text.lower().split()
    prefix = tuple(tokens[-(n-1):])
    if prefix in model:
        return max(model[prefix], key=model[prefix].get)
    else:
        return "<unk>"



# Generate a prediction

input_text = "max verstappen to"
next_word = predict_next_word(trigram_model, input_text, n=3)
print(f"Next word: {next_word}")


In [None]:
# Generate full predictions

def generate_predictions(model, seed_text, n=2, max_length=10):
    tokens = seed_text.lower().split()
    for _ in range(max_length):
        next_word = predict_next_word(model, " ".join(tokens), n=n)
        if next_word == "</s>":
            break
        tokens.append(next_word)
    return " ".join(tokens)



# Generate a prediction

input_text = "max verstappen to"
prediction = generate_predictions(trigram_model, input_text, n=3)
print(f"Generated prediction: {prediction}")


In [None]:
# Forced to predict next team

def predict_next_team(model, teams, input_text, n=3):
    
    tokens = input_text.lower().split()
    prefix = tuple(tokens[-(n - 1):])
    
    if prefix in model:
        
        team_predictions = {word: prob for word, prob in model[prefix].items() if word in teams}
        if team_predictions:
            return max(team_predictions, key=team_predictions.get)
    
    return "<unk>"



# Generate a prediction

input_text = "max verstappen to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team: {next_word}")


In [None]:
# Predict top n words

def predict_top_n_for_input(model, input_text, n=5, ngram_size=3):

    tokens = tokens = input_text.split()
    prefix = tuple(tokens[-(ngram_size - 1):])

    if prefix in model:
        
        top_predictions = sorted(model[prefix].items(), key=lambda x: x[1], reverse=True)[:n]
        
        df = pd.DataFrame(top_predictions, columns=["Word", "Probability"])
        return df
    else:
        print("Prefix not found in the model. No predictions available.")
        return pd.DataFrame(columns=["Word", "Probability"])



# Generate a prediction

input_text = "max verstappen to"
top_predictions = predict_top_n_for_input(trigram_model, input_text, n=5)
print(top_predictions)

In [78]:
# Create summary

team_summary = dict()

def sort_to_team(team, driver):
    if team not in team_summary:
        team_summary[team] = {driver}
    else:
        team_summary[team].add(driver)

def print_team_summary(summary):
    print("Team Summary:")
    for team, drivers in summary.items():
        print(f"  {team}: {', '.join(drivers)}")

def convert_to_dataframe(summary):
    
    sorted_teams = [(team, ', '.join(sorted(drivers))) for team, drivers in summary.items()]
    
    df = pd.DataFrame(sorted_teams, columns=['Team', 'Drivers'])
    
    return df

In [None]:
input_text = "max verstappen to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "charles leclerc to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "sergio perez to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "george russell to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "carlos sainz to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "lewis hamilton to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "lando norris to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "esteban ocon to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "fernando alonso to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "valtteri bottas to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "daniel ricciardo to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "sebastian vettel to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "kevin magnussen to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "pierre gasly to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "lance stroll to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "mick schumacher to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "yuki tsunoda to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "zhou guanyu to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "alexander albon to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "nicholas latifi to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "nyck de vries to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "nico hulkenberg to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "oscar piastri to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])

input_text = "logan sargeant to"
next_word = predict_next_team(trigram_model, teams, input_text, n=3)
print(f"Next predicted team for {input_text[:-3]}: {next_word}")
sort_to_team(next_word, input_text[:-3])


In [None]:
print_team_summary(team_summary)

In [None]:
df = convert_to_dataframe(team_summary)
print(df)