In [None]:
import pandas as pd
import numpy as np
import pickle
from APIcall_v2 import main_api_call
from data_extraction import main_extract_transform_memory

In [None]:
start_date, end_date, df_memory = main_api_call()


In [None]:
df = main_extract_transform_memory(start_date, end_date, df_memory)

In [None]:
def normalize_user(row, mean_df, std_df):
    mu = mean_df
    su = std_df
    z = (row - mu)/su
    return z

# Calculate the means and standard deviations of all healthy events per athlete
def getMeanStd_user(data):
    mean = data.mean()
    std = data.std()
    std.replace(to_replace=0.0, value=0.01, inplace=True)
    return mean, std

user_test_means, user_test_std = getMeanStd_user(df.copy())

# Apply normalization to the *test data* using the *training* statistics
user_normalized = df.apply(lambda x: normalize_user(x, user_test_means,user_test_std), axis=1)
user_normalized = user_normalized.drop(columns=[ 'Date'], errors='ignore')


In [None]:
# import the model
with open('../models/logistic_model.pkl', 'rb') as file:
    model = pickle.load(file)
# make predictions
predictions = model.predict(user_normalized)
# make probability predictions
probs = model.predict_proba(user_normalized)[:, 1]
df['injury predictions'] = predictions
df['injury probabilities'] = probs

In [None]:
# plot the probabilities over time
import matplotlib.pyplot as plt

# plot the probabilities over time
plt.figure(figsize=(12,6))
plt.xlabel('Date')
plt.ylabel('Injury Probability Score')
plt.plot(df['Date'],df['injury probabilities'])
plt.xticks(ticks=range(0, len(df['Date']), 10), labels=df['Date'][::10], rotation=20, ha='right')

# plot the probabilities over time with a rolling mean
plt.figure(figsize=(12,6))
plt.plot(df['Date'],df['injury probabilities'].rolling(window=4).mean())
plt.xlabel('Date')
plt.ylabel('Injury Probability Score')
plt.xticks(ticks=range(0, len(df['Date']), 10), labels=df['Date'][::10], rotation=20, ha='right')
plt.show()