In [10]:
from collections.abc import Generator, Callable
from pathlib import Path
import typing
from typing import Any, TypeAlias
import pandas as pd
import numpy as np
import datetime as dt
import re
from functools import partial, reduce
from tqdm import tqdm
from IPython.display import (
    display, # type: ignore[reportUnknownVariableType]
    Markdown,
)

from config.fastf1 import fastf1
from config import config
from src.data.loader import stream_ndjson, load_submissions_df, load_comments_df
from src.data.preprocessing import concatenate_submissions_and_comments
import src.data.constants as dataset_constants

from src.utils import (
    temporary_pandas_options,
    display_full_dataframe,
    hide_index,
    compose,
)
from src import utils
utils.set_random_seeds()

import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)

In [11]:
f1_ndjson_streamer = partial(stream_ndjson, limit=5000)

f1_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1_SUBMISSIONS, f1_ndjson_streamer)
f1_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1_COMMENTS, f1_ndjson_streamer)

f15_submissions_df = load_submissions_df(dataset_constants.RawFile.FORMULA1POINT5_SUBMISSIONS)
f15_comments_df = load_comments_df(dataset_constants.RawFile.FORMULA1POINT5_COMMENTS)

In [None]:
n = 4

with display_full_dataframe():
    display(Markdown('### r/formula1 submissions:'), f1_submissions_df.head(n))
    display(Markdown('### r/formula1 comments:'), f1_comments_df.head(n))
    display(Markdown('### r/formula1point5 submissions:'), f15_submissions_df.head(n))
    display(Markdown('### r/formula1point5 comments:'), f15_comments_df.head(n))

In [None]:
f1_df = concatenate_submissions_and_comments(f1_submissions_df, f1_comments_df)
f15_df = concatenate_submissions_and_comments(f15_submissions_df, f15_comments_df)

n = 3

with display_full_dataframe():
    display(Markdown('### r/formula1 posts:'), f1_df.head(n))
    display(Markdown('### r/formula1point5 posts:'), f15_df.head(n))

In [14]:
# TODO: