In [None]:
import pandas as pd
from sentiment.analyser import apply_sentiment_analysis
from sentiment.parser import read_driver_reaction_file, read_driver_times_file
import os

In [None]:
def read_data_files(year):
    
    teams = pd.read_csv(f'data/{year}/teams.csv')

    races = os.listdir(f'data/{year}/races')

    df = pd.DataFrame(
        columns=["race", "driver", "place", "reaction"]
    )

    for race in races:
        df_driver_times = read_driver_times_file(f"data/{year}/races/{race}/driver-times.csv")
        df_driver_reactions = read_driver_reaction_file(f"data/{year}/races/{race}/driver-reactions.csv", df_driver_times)
        df_driver_reactions["race"] = race
        df = pd.concat([df, df_driver_reactions], ignore_index=True)

    df = pd.merge(df, teams, on='driver', how='left')

    return df

In [None]:
def run():
    year = "2024"
    df = read_data_files(year)
    df = apply_sentiment_analysis(df)
    # df["sentiment_normalised"] = df["sentiment"] * df["place"]
    return df

In [None]:
df = run()

In [None]:
df

In [None]:
# Which driver is the most positive, and what is their average sentiment?
most_positive_driver = df.groupby("driver")["sentiment"].mean().idxmax()
average_sentiment = df.groupby("driver")["sentiment"].mean().max()


In [None]:
print(f"The most positive driver is {most_positive_driver} with an average sentiment of {average_sentiment}")

In [None]:
# Which driver is the most negative, and what is their average sentiment?
most_negative_driver = df.groupby("driver")["sentiment"].mean().idxmin()
average_sentiment = df.groupby("driver")["sentiment"].mean().min()

In [None]:
print(f"The most negative driver is {most_negative_driver} with an average sentiment of {average_sentiment}")

In [None]:
# get average sentiment per driver and order by sentiment descending
df.groupby("driver")["sentiment"].mean().sort_values(ascending=False).plot(kind="bar", x="driver", y="sentiment")


In [None]:
df.groupby("team")["sentiment"].mean().sort_values(ascending=False).plot(kind="bar", x="team", y="sentiment")

In [None]:
df.groupby("race")["sentiment"].mean().reset_index().plot(kind="bar", x="race", y="sentiment")

In [None]:
# show correlation between sentiment and place, exclude DNF places, convert sentiment to int
df_plot = df.copy()
df_plot["sentiment"] = (df_plot["sentiment"] * 100).astype(int)
df_plot = df_plot[df_plot["place"] != "DNF"]
df_plot["place"] = df_plot["place"].astype(int)
df_plot.plot(kind="scatter", x="place", y="sentiment")