In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix

import os
import sklearn
from sklearn.preprocessing import StandardScaler
import pandas as pd
from tabpfn import TabPFNClassifier, TabPFNRegressor
from tabpfn_time_series import TabPFNTimeSeriesPredictor, TabPFNMode
import matplotlib.pyplot as plt
from plots import plot_pred_and_actual_ts


In [None]:
def metrics():
    y_true = df_test.target.copy()
    y_pred = pred.target.copy()

    mae = mean_absolute_error(y_true, y_pred)

    mse = mean_squared_error(y_true, y_pred)

    rmse = np.sqrt(mse)

    r2 = r2_score(y_true, y_pred)

    return mae, mse, rmse, r2


In [None]:
from datasets import load_dataset
from autogluon.timeseries import TimeSeriesDataFrame

from tabpfn_time_series.data_preparation import to_gluonts_univariate, generate_test_X


In [None]:

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
files = os.listdir('timeseries/')
def categorize_acwr(acwr):
    if acwr < 0.8:
        return 0
    elif 0.8 <= acwr < 1.5:
        return 1
    elif acwr > 1.5:
        return 2
datas = pd.DataFrame()
file_names = []
datass = []
for file in files:
    if file.endswith('.csv'):
        df = pd.read_csv(os.path.join('timeseries', file))
        df['player_id'] = os.path.splitext(file)[0] *len(df)
        df = df.drop_duplicates(subset='Date', ignore_index=True)
        df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y', dayfirst=True)  
        df = df.sort_values(by='Date', ascending=True).reset_index(drop=True)

        df.columns = df.columns.str.upper()
        datas = df.rename(columns={'DATE': 'timestamp'})
        #datas = pd.concat([datas, df], axis=0, ignore_index=True)

        datas = datas.dropna(subset='ACWR')
        datas = datas.dropna(subset='TOTAL_DISTANCE')

        datas['ACWR_Risk'] = datas['ACWR'].apply(categorize_acwr)
        datas = datas.dropna(subset='ACWR_Risk')
        ACWR_Risk = datas['ACWR_Risk'].values
        injury_risk = datas['ACWR'].values
        datas = datas.drop(['CTL28', 'CTL42','ATL', 'PLAYER_ID','ACWR_Risk'],axis=1)
        datas = datas.rename(columns={'ACWR': 'target'})
        datas = datas.reset_index(drop=True)
        datas["item_id"] = 0 
        datas.set_index(["item_id", "timestamp"], inplace=True)  
        datas.item_ids = datas.index.get_level_values('item_id').unique()
        value_column = datas.pop('target')
        datas['target'] = value_column
        datas = datas[cos.cols.tolist()]
        #datas = datas[datas.columns[13:]]
        if len(datas)>80:
            file_names.append(file.replace('.csv','')) 	
            datass.append(datas)

In [None]:
ids, maes, mses, rmses, r2s = [], [], [], [], []
accuracys, precisions, recalls, f1s = [], [], [], []

for i in range(len(datass)):
    data_2020 = datass[i][datass[i].index.get_level_values('timestamp').year == 2020]
    train_size = int(len(data_2020) * 0.8)
    df_train, df_test = data_2020.iloc[:train_size], data_2020.iloc[train_size:]
    df_train.item_ids = df_train.index.get_level_values('item_id').unique()
    df_test.item_ids = df_test.index.get_level_values('item_id').unique()
    df_test_na = df_test.copy()
    df_test_na.ACWR = np.nan
    df_test_na.item_ids = df_test_na.index.get_level_values('item_id').unique()

    predictor = TabPFNTimeSeriesPredictor(
        tabpfn_mode=TabPFNMode.LOCAL,
    )
    if len(df_test)>0 and len(df_train):
        pred = predictor.predict(df_train, df_test_na)
    

        plot_pred_and_actual_ts(
            train=df_train,
            test=df_test,
            pred=pred,
            save_path=f'figures/{file_names[i]}_2020.png'
        )
        mae, mse, rmse, r2 = metrics()
        ids.append(f'{file_names[i]}_2020')
        maes.append(mae)
        mses.append(mse)
        rmses.append(rmse)
        r2s.append(r2)
        pred['ACWR_RISK'] = pred['target'].apply(categorize_acwr)
        df_test['ACWR_RISK'] = df_test['target'].apply(categorize_acwr)
        y_true = df_test.ACWR_RISK.copy()
        y_pred = pred.ACWR_RISK.copy()


        accuracy = accuracy_score(y_true, y_pred)


        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')


        accuracys.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        
        print(f"🔹 Accuracy（准确率）: {accuracy:.4f}")
        print(f"🔹 Precision（精确率）: {precision:.4f}")
        print(f"🔹 Recall（召回率）: {recall:.4f}")
        print(f"🔹 F1 Score: {f1:.4f}\n")
        print("🔹 Classification Report:\n", report)
        print("🔹 Confusion Matrix:\n", conf_matrix)
        
    data_2021 = datass[i][datass[i].index.get_level_values('timestamp').year == 2021]
    train_size = int(len(data_2021) * 0.8)
    df_train, df_test = data_2021.iloc[:train_size], data_2021.iloc[train_size:]
    df_train.item_ids = df_train.index.get_level_values('item_id').unique()
    df_test.item_ids = df_test.index.get_level_values('item_id').unique()
    df_test_na = df_test.copy()
    df_test_na.ACWR = np.nan
    df_test_na.item_ids = df_test_na.index.get_level_values('item_id').unique()

    if len(df_test)>0 and len(df_train):
        pred = predictor.predict(df_train, df_test_na)
    

        plot_pred_and_actual_ts(
            train=df_train,
            test=df_test,
            pred=pred,
            save_path=f'figures/{file_names[i]}_2021.png'
        )
        mae, mse, rmse, r2 = metrics()
        ids.append(f'{file_names[i]}_2021')
        maes.append(mae)
        mses.append(mse)
        rmses.append(rmse)
        r2s.append(r2)
        pred['ACWR_RISK'] = pred['target'].apply(categorize_acwr)
        df_test['ACWR_RISK'] = df_test['target'].apply(categorize_acwr)
        y_true = df_test.ACWR_RISK.copy()
        y_pred = pred.ACWR_RISK.copy()

        accuracy = accuracy_score(y_true, y_pred)

        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

        accuracys.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)

        print(f"🔹 Accuracy（准确率）: {accuracy:.4f}")
        print(f"🔹 Precision（精确率）: {precision:.4f}")
        print(f"🔹 Recall（召回率）: {recall:.4f}")
        print(f"🔹 F1 Score: {f1:.4f}\n")
        print("🔹 Classification Report:\n", report)
        print("🔹 Confusion Matrix:\n", conf_matrix)

In [None]:
data = {
    'ID': ids,
    'MAE': maes,
    'MSE': mses,
    'RMSE': rmses,
    'R2': r2s,
    'accuracy': accuracys,
    'precision': precisions,
    'recall': recalls,
    'f1': f1s
}


df = pd.DataFrame(data)

df.to_csv('result_corr1.csv')

print("Data saved to 'resultss.csv'")

In [None]:
datas

In [None]:
corrs = []
for i in range(len(datass)):
    dfs = datass[i].dropna()
    corr_matrix = dfs.corr().fillna(0)
    corrs.append(corr_matrix.target.values)

In [None]:
arr_stack = np.vstack(corrs)
mean_values = np.mean(arr_stack, axis=0)

In [None]:
mean_values

In [None]:
cos = pd.DataFrame({'cols':datass[1].columns,'mean':mean_values})

In [None]:
cos = cos[cos['mean'].abs()>0.05]
cos.cols.tolist()