In [1]:
import time
import json
from pathlib import Path
import pandas as pd
from pandas.io.formats.style import Styler
from collections.abc import Generator, Callable
import typing
from typing import Any, TypeAlias
import numpy as np
from contextlib import contextmanager
from functools import partial, reduce
import re
import datetime as dt
from tqdm import tqdm
import pickle
from IPython.display import (
    display, # type: ignore[reportUnknownVariableType]
    Markdown,
)
import importlib
import spacy

from config.fastf1 import fastf1
import fastf1.events as fastf1_events
from config import config
importlib.reload(config);
from src.data.loader import stream_ndjson, load_submissions_df, load_comments_df
import src.data.preprocessing as preprocessing
importlib.reload(preprocessing);
import src.data.constants as dataset_constants
import src.utils
importlib.reload(src.utils);
from src.utils import (
    temporary_pandas_options,
    display_full_dataframe,
    hide_index,
    compose,
)
from src import utils
utils.set_random_seeds()

import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)

DEVICE = utils.get_device()

PyTorch version: 2.5.1+cu124
CUDA available: True
CUDA version: 12.4
Selected GPU: NVIDIA GeForce GTX 1080 Ti (device_id=0)


In [2]:
def load_f1_df(limit: int | None = None, in_place: bool = True) -> pd.DataFrame:
    ndjson_streamer = partial(stream_ndjson, limit=limit)

    return preprocessing.concatenate_submissions_and_comments(
        submissions_df=load_submissions_df(dataset_constants.RawFile.FORMULA1_SUBMISSIONS, ndjson_streamer),
        comments_df=load_comments_df(dataset_constants.RawFile.FORMULA1_COMMENTS, ndjson_streamer),
        in_place=in_place,
    )

def load_f15_df(limit: int | None = None, in_place: bool = True) -> pd.DataFrame:
    ndjson_streamer = partial(stream_ndjson, limit=limit)

    return preprocessing.concatenate_submissions_and_comments(
        submissions_df=load_submissions_df(dataset_constants.RawFile.FORMULA1POINT5_SUBMISSIONS, ndjson_streamer),
        comments_df=load_comments_df(dataset_constants.RawFile.FORMULA1POINT5_COMMENTS, ndjson_streamer),
        in_place=in_place,
    )

In [3]:
f1_ndjson_streamer = partial(stream_ndjson, limit=100)
f15_ndjson_streamer = partial(stream_ndjson, limit=100)

f1_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1_SUBMISSIONS, f1_ndjson_streamer)
f1_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1_COMMENTS, f1_ndjson_streamer)

f15_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1POINT5_SUBMISSIONS, f15_ndjson_streamer)
f15_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1POINT5_COMMENTS, f15_ndjson_streamer)

f1_df = preprocessing.concatenate_submissions_and_comments(f1_submissions_df, f1_comments_df)
f15_df = preprocessing.concatenate_submissions_and_comments(f15_submissions_df, f15_comments_df)

In [4]:
n = 4

with display_full_dataframe():
    display(Markdown('### r/formula1 submissions:'), f1_submissions_df.head(n))
    display(Markdown('### r/formula1 comments:'), f1_comments_df.head(n))
    display(Markdown('### r/formula1point5 submissions:'), f15_submissions_df.head(n))
    display(Markdown('### r/formula1point5 comments:'), f15_comments_df.head(n))

### r/formula1 submissions:

Unnamed: 0,score,created_utc,title,selftext,id,author,gilded
0,1,2022-06-01 12:00:41,[Discussion] Could professional ESports drivers drive a real F1 car? How realistic are the sims?,[removed],v2fbpg,[deleted],0
1,2,2022-06-01 12:07:50,Questions concerning Alonso's future,[removed],v2fh6w,Doomaster14,0
2,1393,2022-06-01 12:15:14,Verstappen now has as many poles as Leclerc - but six times as many wins | 2022 Monaco Grand Prix stats and facts,,v2fmeh,motorace_addict,0
3,161,2022-06-01 12:23:16,Perez wins as Red Bull delivers race strategy blow to Ferrari - Mika Häkkinen’s thoughts on the Monaco Grand Prix,,v2frea,MrTuxedo1,0


### r/formula1 comments:

Unnamed: 0,score,created_utc,body,id,author,gilded
0,1,2022-06-01 00:00:57,top part of the wing got shaken off in the tunnel.,iaq4tev,CowsWantToKillMe,0
1,0,2022-06-01 00:01:15,That's been the rumour with Mercedes lately cuz in previous seasons Bottas hasn't been the luckiest.,iaq4urr,doc_55lk,0
2,3,2022-06-01 00:01:41,"Ah well, it's looking great already!",iaq4wpz,Organic-Measurement2,0
3,10,2022-06-01 00:01:46,And Ferrari would get them all wrong.,iaq4x1h,not_right,0


### r/formula1point5 submissions:

Unnamed: 0,score,created_utc,title,selftext,id,author,gilded
0,1,2022-06-07 09:21:41,Formula 1 - Hakkinen vs Schumacher - Spa-Francorchamps 2000,,v6qyud,orfeomclaren,0
1,1,2022-06-07 13:26:25,Formula 1 2003 - Rd 2 - Malaysian Grand Prix [Highlights] - Kimi Raikkonen Maiden Win,,v6viae,orfeomclaren,0
2,1,2022-06-09 08:12:22,Formula 1 2003 - Rd 9 - European Grand Prix (Nurburgring) [Highlights],,v8bwj6,orfeomclaren,0
3,1,2022-06-09 11:48:11,Red Bull drivers free to fight each other,,v8f1dk,ms_creativity,0


### r/formula1point5 comments:

Unnamed: 0,score,created_utc,body,id,author,gilded
0,3,2022-06-01 03:50:49,What is your team name please?,iaqwofj,debrek,0
1,2,2022-06-01 05:54:28,"It's lazily named team F1.5 and my name there is the same as my username here (Ignis Vizsla), I'm 34th on the leaderboard there for reference",iar7xgu,IgnisVizsla,0
2,3,2022-06-01 06:20:29,I had removed you as I thought you were inactive since you had a number of teams with an invalid team. I re-added you to the list.,iar9z0m,debrek,0
3,3,2022-06-01 06:49:13,"Yeah that's my fault, I forgot to update my team after the rules changed as I always remembered only after quali and that was too late, I finally changed before Monaco though",iarc3x7,IgnisVizsla,0


In [5]:
n = 3

with display_full_dataframe():
    display(Markdown('### r/formula1 posts:'), f1_df.head(n))
    display(Markdown('### r/formula1point5 posts:'), f15_df.head(n))

### r/formula1 posts:

Unnamed: 0,score,created_utc,id,author,gilded,text
0,1,2022-06-01 12:00:41,v2fbpg,[deleted],0,[Discussion] Could professional ESports drivers drive a real F1 car? How realistic are the sims? [removed]
1,2,2022-06-01 12:07:50,v2fh6w,Doomaster14,0,Questions concerning Alonso's future. [removed]
2,1393,2022-06-01 12:15:14,v2fmeh,motorace_addict,0,Verstappen now has as many poles as Leclerc - but six times as many wins | 2022 Monaco Grand Prix stats and facts.


### r/formula1point5 posts:

Unnamed: 0,score,created_utc,id,author,gilded,text
0,1,2022-06-07 09:21:41,v6qyud,orfeomclaren,0,Formula 1 - Hakkinen vs Schumacher - Spa-Francorchamps 2000.
1,1,2022-06-07 13:26:25,v6viae,orfeomclaren,0,Formula 1 2003 - Rd 2 - Malaysian Grand Prix [Highlights] - Kimi Raikkonen Maiden Win.
2,1,2022-06-09 08:12:22,v8bwj6,orfeomclaren,0,Formula 1 2003 - Rd 9 - European Grand Prix (Nurburgring) [Highlights]


# Baseline: Rule-Based Prediction Extraction

In [6]:
# TODO:

# Fastf1 historical data

In [7]:
full_schedule = fastf1.get_event_schedule(dataset_constants.YEAR)
schedule = typing.cast(
    fastf1_events.EventSchedule,
    full_schedule[
        (full_schedule['EventDate'] >= dataset_constants.START_DATE) &
        (full_schedule['EventDate'] <= dataset_constants.END_DATE) &
        (full_schedule['EventFormat'] == 'conventional') # TODO: Skip sprint weekends for now. Also include sprint weekends later
    ],
)

with display_full_dataframe():
    display(schedule.iloc[-3:])

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
20,19,United States,Austin,FORMULA 1 ARAMCO UNITED STATES GRAND PRIX 2022,2022-10-23,United States Grand Prix,conventional,Practice 1,2022-10-21 14:00:00-05:00,2022-10-21 19:00:00,Practice 2,2022-10-21 17:00:00-05:00,2022-10-21 22:00:00,Practice 3,2022-10-22 14:00:00-05:00,2022-10-22 19:00:00,Qualifying,2022-10-22 17:00:00-05:00,2022-10-22 22:00:00,Race,2022-10-23 14:00:00-05:00,2022-10-23 19:00:00,True
21,20,Mexico,Mexico City,FORMULA 1 HEINEKEN GRAN PREMIO DE LA CIUDAD DE MÉXICO 2022,2022-10-30,Mexico City Grand Prix,conventional,Practice 1,2022-10-28 13:00:00-06:00,2022-10-28 19:00:00,Practice 2,2022-10-28 16:00:00-06:00,2022-10-28 22:00:00,Practice 3,2022-10-29 12:00:00-06:00,2022-10-29 18:00:00,Qualifying,2022-10-29 15:00:00-06:00,2022-10-29 21:00:00,Race,2022-10-30 14:00:00-06:00,2022-10-30 20:00:00,True
23,22,Abu Dhabi,Yas Island,FORMULA 1 ETIHAD AIRWAYS ABU DHABI GRAND PRIX 2022,2022-11-20,Abu Dhabi Grand Prix,conventional,Practice 1,2022-11-18 14:00:00+04:00,2022-11-18 10:00:00,Practice 2,2022-11-18 17:00:00+04:00,2022-11-18 13:00:00,Practice 3,2022-11-19 14:30:00+04:00,2022-11-19 10:30:00,Qualifying,2022-11-19 18:00:00+04:00,2022-11-19 14:00:00,Race,2022-11-20 17:00:00+04:00,2022-11-20 13:00:00,True


In [8]:
posts_df = f1_df
race_weekend = schedule.iloc[-1]
first_post_at = typing.cast(dt.datetime, race_weekend['Session1DateUtc']) - dt.timedelta(days=1)
last_post_at = typing.cast(dt.datetime, race_weekend['Session5DateUtc'])
posts_df = posts_df[
    (posts_df['created_utc'] >= first_post_at) &
    (posts_df['created_utc'] <= last_post_at)
]

def get_top20(race_weekend: fastf1_events.Event) -> pd.DataFrame:
    race_session = race_weekend.get_session('Race')
    race_session.load(laps=False, telemetry=False, weather=False, messages=False)
    top20 = race_session.results[['FullName', 'Position']].astype({'Position': np.uint8})
    return top20

top20s = tuple(
    get_top20(typing.cast(fastf1_events.Event, race_weekend))
    for _, race_weekend in schedule.iterrows()
)
display(hide_index(top20s[-1]))

FullName,Position
Max Verstappen,1
Charles Leclerc,2
Sergio Perez,3
Carlos Sainz,4
George Russell,5
Lando Norris,6
Esteban Ocon,7
Lance Stroll,8
Daniel Ricciardo,9
Sebastian Vettel,10


# Pre-trained models

# Sentiment score:

In [9]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
from scipy.special import softmax

def driver_sentiment(comments, driver_list):
    model_name = "yangheng/deberta-v3-base-absa-v1.1"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    results = {driver: {"positive": 0.0, "neutral": 0.0, "negative": 0.0, "count": 0} for driver in driver_list}

    for comment in comments:
        found_drivers = [driver for driver in driver_list if driver in comment]
        
        for aspect in found_drivers:
            inputs = tokenizer(comment, aspect, return_tensors="pt", truncation=True, padding=True).to(device)
            
            with torch.no_grad():
                outputs = model(**inputs)
            
            scores = outputs.logits[0].cpu().numpy()
            probabilities = softmax(scores)

            results[aspect]["positive"] += probabilities[2]
            results[aspect]["neutral"] += probabilities[1]
            results[aspect]["negative"] += probabilities[0]
            results[aspect]["count"] += 1

    for driver, sentiment in results.items():
        if sentiment["count"] > 0:
            sentiment["positive"] /= sentiment["count"]
            sentiment["neutral"] /= sentiment["count"]
            sentiment["negative"] /= sentiment["count"]

    return results


prediction_posts_df = ['Carlos Sainz is loving this upgraded car, good top 3 for the race tomorrow! I disagree with you, Max Verstappen will definitely finish first. I think BOT will finish behind NOR, who will probably finish 7th. That\'s my opinion at least... I predict that the RedBulls with finish 1-2. Nah, the Danish driver from Haas will almost certainly finish in points! Stroll on the podium and Vettel in points. I like cookies!']
driver_list = ['Carlos Sainz', 'Max Verstappen']

F1_names= {
    'max verstappen',
    'charles leclerc',
    'sergio perez',
    'george russell',
    'carlos sainz',
    'lewis hamilton',
    'lando norris',
    'esteban ocon',
    'fernando alonso',
    'valtteri bottas',
    'daniel ricciardo',
    'sebastian vettel',
    'kevin magnussen',
    'pierre gasly',
    'lance stroll',
    'mick schumacher',
    'yuki tsunoda',
    'zhou guanyu',
    'alexander albon',
    'nicholas latifi',
    'nyck de vries',
    'nico hulkenberg',
    'oscar piastri',
    'liam lawson',
    'logan sargeant'
}


results = driver_sentiment(load_f1_df(1000)["text"], F1_names)

for driver, sentiment in results.items():
    if sentiment["count"] > 0:
        print(f"{driver}: [Positive: {sentiment['positive']:.4f}, Neutral: {sentiment['neutral']:.4f}, Negative: {sentiment['negative']:.4f}]")
    else:
        print(f"{driver}: No mentions found.")

Device set to use cuda:0


esteban ocon: No mentions found.
nicholas latifi: No mentions found.
daniel ricciardo: No mentions found.
logan sargeant: No mentions found.
max verstappen: No mentions found.
valtteri bottas: No mentions found.
george russell: No mentions found.
alexander albon: No mentions found.
fernando alonso: No mentions found.
lando norris: No mentions found.
nyck de vries: No mentions found.
pierre gasly: No mentions found.
nico hulkenberg: No mentions found.
lance stroll: No mentions found.
yuki tsunoda: No mentions found.
oscar piastri: No mentions found.
zhou guanyu: No mentions found.
lewis hamilton: No mentions found.
kevin magnussen: No mentions found.
carlos sainz: No mentions found.
sebastian vettel: No mentions found.
sergio perez: No mentions found.
mick schumacher: No mentions found.
charles leclerc: No mentions found.
liam lawson: No mentions found.


In [10]:
def final_scores(results):
    final_scores = []

    for driver, sentiment in results.items():
        if sentiment["count"] > 0:
            sentiment_score = (sentiment["positive"] - sentiment["negative"])
            final_scores.append((driver.title(), sentiment_score))

    # Sort drivers by positive - negative score (descending order)
    final_scores.sort(key=lambda x: x[1], reverse=True)
    
    return final_scores

# Print sorted results
scores = final_scores(results)

print("Drivers ranked by (positive - negative score):")
for driver, score in scores:
    print(f"{driver}: {score:.4f}")


Drivers ranked by (positive - negative score):


In [11]:
def prediction(n_event, final_scores, n_events=5, historical_score_contribution=0.4):
    #func to import historical data for this race
    historical_data = get_top20(n_event)
    historical_scores = {row["DriverFullName"]: 0 for _, row in historical_data.iterrows()}

    for i in range(n_events):
        #func to import historical data for one of the last 5 races
        historical_data = get_top20(n_event - (i+1))

        for _, row in historical_data.iterrows():
            historical_scores[row["DriverFullName"]] += 1 - ((row["Pos"] - 1) / 19) * 2

    for driver, score in historical_scores.items():
        historical_scores[driver] = score / n_events

    final_scores_dict = dict(final_scores)

    final_prediction = []
    for driver, historical_score in historical_scores.items():
        if driver in final_scores_dict:
            score = final_scores_dict[driver]
            
            combined_score = (1 - historical_score_contribution) * score + historical_score_contribution * historical_score
            final_prediction.append((driver, combined_score))
        else:
            final_prediction.append((driver, historical_score))

    final_prediction.sort(key=lambda x: x[1], reverse=True)

    return final_prediction

# final_prediction = prediction(16, scores, n_events=5, historical_score_contribution=0.4)
# pos = 0
# for driver, score in final_prediction:
#     pos += 1
#     print(f"{driver} finishes in position:{pos}      {score:.4f}")

# GLiNER

In [12]:
import os
os.environ["HF_HOME"] = "C:\\cache"
from gliner import GLiNER

gliner_pickle_path = config.DATA_DIR / '.cache' / 'gliner_model.pkl'
gliner_pickle_path.parent.mkdir(parents=True, exist_ok=True)
use_cache = False

if use_cache:
    if not gliner_pickle_path.exists():
        gliner_model = GLiNER.from_pretrained('urchade/gliner_medium-v2.1')

        with open(gliner_pickle_path, 'wb') as file:
            pickle.dump(gliner_model, file)
    else:
        with open(gliner_pickle_path, 'rb') as file:
            gliner_model = pickle.load(file)
else:
    gliner_model = GLiNER.from_pretrained('urchade/gliner_medium-v2.1')

gliner_model.to(DEVICE);

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [13]:
print(type(gliner_model))

<class 'gliner.model.GLiNER'>


In [14]:
nlp = spacy.load('en_core_web_sm')
text = 'Carlos Sainz is loving this upgraded car, good top 3 for the race tomorrow! I disagree with you, verstappening will definitely finish first. I think BOT will finish behind NOR, who will probably finish 7th. That\'s my opinion at least... I predict that the RedBulls with finish 1-2. Nah, the Danish driver from Haas will almost certainly finish in points! Stroll on the podium and Vettel in points. I like cookies!'
debug = False

if debug:
    doc = nlp(text)
    df = pd.DataFrame({'text': tuple(sentence.text for sentence in doc.sents)})
else:
    df = load_f1_df(1000)

In [15]:
with display_full_dataframe():
    display(df.head())

def has_prediction(post_text: str, threshold: float = 0.45) -> bool:
    # doc = nlp(post_text)

    # TODO: does GLiNER's performance improve with more context? if yes, refactor to chunking instead of going over each sentence individually
    # for sentence in doc.sents:
        # TODO: for some reason, if you include only 'position', the predictions are far worse than with 'driver' included
    entities = gliner_model.predict_entities(post_text, ('driver', 'position',), threshold=threshold) # TODO: very low threshold
    position_entities = tuple(entity for entity in entities if entity['label'] == 'position')

    if debug:
        print(position_entities)
        print(tuple(position['text'] for position in position_entities))

    if len(position_entities) != 0:
        return True
    
    return False


if debug:
    predictions_df = df[df['text'].apply(has_prediction)]
    print(len(predictions_df))
    with display_full_dataframe():
        display(predictions_df.head())

Unnamed: 0,score,created_utc,id,author,gilded,text
0,1,2022-06-01 12:00:41,v2fbpg,[deleted],0,[Discussion] Could professional ESports drivers drive a real F1 car? How realistic are the sims? [removed]
1,2,2022-06-01 12:07:50,v2fh6w,Doomaster14,0,Questions concerning Alonso's future. [removed]
2,1393,2022-06-01 12:15:14,v2fmeh,motorace_addict,0,Verstappen now has as many poles as Leclerc - but six times as many wins | 2022 Monaco Grand Prix stats and facts.
3,161,2022-06-01 12:23:16,v2frea,MrTuxedo1,0,Perez wins as Red Bull delivers race strategy blow to Ferrari - Mika Häkkinen’s thoughts on the Monaco Grand Prix.
4,0,2022-06-01 12:25:54,v2ft61,[deleted],0,"The ""new"" qualifying since (I think 2021?) 2021 is way too short for most Tracks IMO. [deleted]"


In [16]:
import dask.dataframe as dd
'''
# Convert the Pandas DataFrame to a Dask DataFrame
dask_df = dd.from_pandas(load_f1_df(10), npartitions=16)  # Adjust the number of partitions as needed

# Apply the function in parallel
dask_df['has_prediction'] = dask_df['text'].map(has_prediction_dask, meta=('text', 'bool'))

# Compute the result and convert back to a Pandas DataFrame
result_df = dask_df[dask_df['has_prediction']].compute()

# Display the filtered DataFrame
with display_full_dataframe():
    display(result_df.head())
'''

"\n# Convert the Pandas DataFrame to a Dask DataFrame\ndask_df = dd.from_pandas(load_f1_df(10), npartitions=16)  # Adjust the number of partitions as needed\n\n# Apply the function in parallel\ndask_df['has_prediction'] = dask_df['text'].map(has_prediction_dask, meta=('text', 'bool'))\n\n# Compute the result and convert back to a Pandas DataFrame\nresult_df = dask_df[dask_df['has_prediction']].compute()\n\n# Display the filtered DataFrame\nwith display_full_dataframe():\n    display(result_df.head())\n"

In [17]:
text = 'Carlos Sainz is loving this upgraded car, good top 3 for the race tomorrow! I disagree with you, verstappening will definitely finish first. I think BOT will finish behind NOR, who will probably finish 7th. That\'s my opinion at least... I predict that the RedBulls with finish 1-2. Nah, the Danish driver from Haas will almost certainly finish in points! Stroll on the podium and Vettel in points. I like cookies!'
debug = False

if debug:
    doc = nlp(text)
    posts_df = pd.DataFrame({'text': tuple(sentence.text for sentence in doc.sents)})
else:
    posts_df = load_f15_df()

# with display_full_dataframe():
#     display(hide_index(df.head()))

# df['text'] = df['text'].apply(preprocessing.correct_spelling_in_text_spacy)
# df = df[df['text'].apply(has_prediction)]

def display_posts_df(n=3):
    global posts_df
    
    with display_full_dataframe():
        display(hide_index(posts_df.head(n)))

def has_prediction_dask(post_text):
    return has_prediction(post_text)


for index, race_weekend in schedule.iterrows():
    #load relevant post
    first_post_at = typing.cast(dt.datetime, race_weekend['Session1DateUtc']) - dt.timedelta(days=1)
    last_post_at = typing.cast(dt.datetime, race_weekend['Session5DateUtc'])
    posts_df = posts_df[
        (posts_df['created_utc'] >= first_post_at) &
        (posts_df['created_utc'] <= last_post_at)
    ]
    # print(len(posts_df))
    # display(posts_df.head())
    display_posts_df()
    start = time.perf_counter_ns()
    posts_df['text'] = posts_df['text'].apply(preprocessing.correct_spelling_in_text_spacy)
    end = time.perf_counter_ns()
    print((end - start) / 10 ** 9, "spell time")

    # only predictions
    start = time.perf_counter_ns()
    
    posts_ddf = dd.from_pandas(posts_df, npartitions=16)
    has_prediction = posts_ddf['text'].map_partitions(has_prediction_dask, meta=('text', 'bool'))
    print(type(has_prediction))
    print(has_prediction.compute)
    print(has_prediction.compute())
    posts_df = posts_ddf[has_prediction].compute()
    end = time.perf_counter_ns()
    print((end - start) / 10 ** 9, "filter time")

    # sentiment score
    start = time.perf_counter_ns()
    sentiment = driver_sentiment(posts_df["text"], driver_list)
    score = final_scores(sentiment)
    print((end - start) / 10 ** 9, "sentiment time")

    # final prediction
    start = time.perf_counter_ns()
    pred = prediction(race_weekend, scores, n_events=5, historical_score_contribution=0.4)
    end = time.perf_counter_ns()
    print((end - start) / 10 ** 9, "final pred time")
    print(pred)

    break


score,created_utc,id,author,gilded,text
1,2022-06-09 11:48:11,v8f1dk,ms_creativity,0,Red Bull drivers free to fight each other.
7,2022-06-09 17:27:38,v8m0rh,[deleted],0,F1.5 Azerbaijan GP Facts and Stats. [deleted]
285,2022-06-09 17:40:12,v8maw7,forza_seb,0,F1.5 Azerbaijan GP facts and stats (Corrected)


8.6614495 spell time
<class 'dask_expr._collection.Series'>
<bound method FrameBase.compute of Dask Series Structure:
npartitions=16
3      bool
5       ...
       ... 
500     ...
501     ...
Dask Name: has_prediction_dask, 3 expressions
Expr=MapPartitions(has_prediction_dask)>


TypeError: 'Series' object is not callable