In [466]:
import pandas as pd
import numpy as np
import sklearn.model_selection 
import sklearn.tree
import sklearn.ensemble
import sklearn.linear_model
import sklearn.metrics
import itertools

import warnings
warnings.simplefilter(action='ignore')

seed = 2024

In [467]:
df_athletes_activities = pd.read_csv('C:/Users/USER/Desktop/EstudosDados/Projetos/Corrida/physical_inactivity_prediction/scripts_prediction_model/final/st2_df_athletes_activities.csv', parse_dates=['activity_date'])
df_athletes_activities

Unnamed: 0,activity_date,athlete_id,total_distance (km),total_time (min),pace (min/km),speed (km/h),week_frequency,week_total_time,week_mean_time,week_max_time,...,PR_pace,PR_speed,PR_days_since_last_act,PR_week_frequency,PR_week_total_time,PR_week_max_time,PR_week_total_dist,PR_week_max_dist,PR_week_best_pace,PR_week_best_speed
0,2020-07-09,1,6.03,39.0,6.47,9.28,1.0,39.0,39.00,39.0,...,6.47,9.28,0,1.0,39.0,39.0,6.03,6.03,6.47,9.28
1,2020-07-12,1,7.57,55.0,7.27,8.26,2.0,94.0,47.00,55.0,...,6.47,9.28,3,2.0,94.0,55.0,13.60,7.57,6.47,9.28
2,2020-07-16,1,3.61,23.0,6.37,9.42,2.0,78.0,39.00,55.0,...,6.37,9.42,4,2.0,94.0,55.0,13.60,7.57,6.37,9.42
3,2020-07-24,1,5.84,41.0,7.02,8.55,1.0,41.0,41.00,41.0,...,6.37,9.42,8,2.0,94.0,55.0,13.60,7.57,6.37,9.42
4,2020-07-25,1,4.28,32.0,7.48,8.02,2.0,73.0,36.50,41.0,...,6.37,9.42,8,2.0,94.0,55.0,13.60,7.57,6.37,9.42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,2024-06-16,7,10.06,45.0,4.47,13.26,3.0,95.0,31.67,45.0,...,4.36,13.77,5,5.0,190.0,70.0,35.81,13.24,4.36,13.77
740,2024-06-18,7,5.20,33.0,6.35,9.27,3.0,108.0,36.00,45.0,...,4.36,13.77,5,5.0,190.0,70.0,35.81,13.24,4.36,13.77
741,2024-06-21,7,12.10,62.0,5.12,11.71,3.0,140.0,46.67,62.0,...,4.36,13.77,5,5.0,190.0,70.0,35.81,13.24,4.36,13.77
742,2024-06-26,7,4.08,22.0,5.39,11.13,2.0,84.0,42.00,62.0,...,4.36,13.77,5,5.0,190.0,70.0,35.81,13.24,4.36,13.77


#### Remoção de variáveis multicolineares para evitar redundância

In [468]:
# Matriz de correlação entre as features, excluindo a data, id do atleta e o target
corr = df_athletes_activities.loc[:, ~df_athletes_activities.columns.isin(['activity_date', 'athlete_id', 'desmotivation'])].corr()

# Transformar a matriz de correlação em um DataFrame (formatado de maneira que seja fácil visualizar pares de features)
df_corr = pd.melt(corr, ignore_index=False, var_name='feature2', value_name='correlation')

# Resetar o índice para que 'feature1' seja uma coluna em vez de índice e renomear a coluna de índice para 'feature1'
df_corr = df_corr.reset_index().rename(columns={'index': 'feature1'})

# Remover linhas onde 'feature1' e 'feature2' são iguais (ou seja, a correlação de uma feature com ela mesma)
df_corr.drop(df_corr[(df_corr['feature1'] == df_corr['feature2'])].index, axis=0, inplace=True)

# Remover duplicatas na coluna de correlação, mantendo apenas a primeira ocorrência
df_corr.drop_duplicates(subset='correlation', inplace=True)

# Resetar o índice mais uma vez após remover duplicatas
df_corr.reset_index(drop=True, inplace=True)

# Selecionando apenas correlações com valor acima de -0.7 ou 0.7 (Forte correlação)
df_strong_corr = df_corr[(np.abs(df_corr['correlation']) > 0.7)]

df_strong_corr

Unnamed: 0,feature1,feature2,correlation
0,total_time (min),total_distance (km),0.904113
8,week_mean_dist,total_distance (km),0.718465
29,week_mean_time,total_time (min),0.705361
49,speed (km/h),pace (min/km),-0.931039
57,week_best_pace,pace (min/km),0.716145
...,...,...,...
330,PR_week_max_dist,PR_week_frequency,0.838928
331,PR_week_best_speed,PR_week_frequency,0.958097
332,PR_week_total_dist,PR_week_total_time,0.988934
333,PR_week_best_pace,PR_week_total_dist,-0.891817


In [469]:
# Cria um conjunto vazio para armazenar features a serem removidas
features_to_remove = set()

# Itera sobre cada linha do DataFrame df_strong_corr (Correlações fortes)
for i, row in df_strong_corr.iterrows():
    # Obtém o nome da primeira feature
    feature1 = row['feature1']

    # Obtém o nome da segunda feature
    feature2 = row['feature2']

    # Obtém as correlações da feature1 com as outras features
    feature1_corrs = df_corr.loc[(df_corr['feature1'] == feature1) | (df_corr['feature2'] == feature1)]

    # Obtém a média absoluta da correlação da feature1 com as outras features, exceto a feature2
    feature1_corrs_abs_mean = round(np.abs(feature1_corrs.loc[~feature1_corrs['feature2'].isin([feature2]), 'correlation']).mean(), 4)

    # Obtém as correlações da feature2 com as outras features
    feature2_corrs = df_corr.loc[(df_corr['feature1'] == feature2) | (df_corr['feature2'] == feature2)]

    # Obtém a média absoluta da correlação da feature2 com as outras features, exceto a feature1
    feature2_corrs_abs_mean = round(np.abs(feature2_corrs.loc[~feature2_corrs['feature1'].isin([feature1]), 'correlation']).mean(), 4)

    print(f'Feature 1: {feature1}, Média absoluta da correlação com outras features: {feature1_corrs_abs_mean}')
    print(f'Feature 2: {feature2}, Média absoluta da correlação com outras features: {feature2_corrs_abs_mean}')

    # Compara as correlações das duas features
    if feature1_corrs_abs_mean > feature2_corrs_abs_mean:
        print(f'Feature com maior correlação com as outras features: {feature1}')
        # Adiciona feature1 ao conjunto se sua correlação for maior
        features_to_remove.add(feature1)
    elif feature1_corrs_abs_mean < feature2_corrs_abs_mean:
        # Adiciona feature2 ao conjunto se sua correlação for maior
        print(f'Feature com maior correlação com as outras features: {feature2}')
        features_to_remove.add(feature2)

    print()

# Converte o conjunto para uma lista
features_to_remove = list(features_to_remove)

# Exibe a lista de features a serem removidas
print(f'Features to remove ({len(features_to_remove)}): {features_to_remove}')

# Remove as features multicolineares selecionadas
df_athletes_activities.drop(features_to_remove, axis = 1, inplace= True)

Feature 1: total_time (min), Média absoluta da correlação com outras features: 0.2904
Feature 2: total_distance (km), Média absoluta da correlação com outras features: 0.4238
Feature com maior correlação com as outras features: total_distance (km)

Feature 1: week_mean_dist, Média absoluta da correlação com outras features: 0.5582
Feature 2: total_distance (km), Média absoluta da correlação com outras features: 0.4315
Feature com maior correlação com as outras features: week_mean_dist

Feature 1: week_mean_time, Média absoluta da correlação com outras features: 0.395
Feature 2: total_time (min), Média absoluta da correlação com outras features: 0.2987
Feature com maior correlação com as outras features: week_mean_time

Feature 1: speed (km/h), Média absoluta da correlação com outras features: 0.415
Feature 2: pace (min/km), Média absoluta da correlação com outras features: 0.2915
Feature com maior correlação com as outras features: speed (km/h)

Feature 1: week_best_pace, Média absolut

#### Função para gerar resultados preditivos e mostrar os parâmetros utilizados

In [470]:
def scores(y_pred_train, y_pred_test, y_train, y_test):
    accuracy_train = sklearn.metrics.accuracy_score(y_train, y_pred_train)
    accuracy_test = sklearn.metrics.accuracy_score(y_test, y_pred_test)

    precision_train = sklearn.metrics.precision_score(y_train, y_pred_train)
    precision_test = sklearn.metrics.precision_score(y_test, y_pred_test)

    recall_train = sklearn.metrics.recall_score(y_train, y_pred_train)
    recall_test = sklearn.metrics.recall_score(y_test, y_pred_test)

    f1_train = sklearn.metrics.f1_score(y_train, y_pred_train)
    f1_test = sklearn.metrics.f1_score(y_test, y_pred_test)

    roc_auc_train = sklearn.metrics.roc_auc_score(y_train, y_pred_train)
    roc_auc_test = sklearn.metrics.roc_auc_score(y_test, y_pred_test)

    metrics =  {
            #'Algorithm': str(model),
            #"Features": str(features),
            'accuracy_train': round(accuracy_train,2), 
            'accuracy_test': round(accuracy_test,2), 
            'precision_train': round(precision_train,2), 
            'precision_test': round(precision_test, 2), 
            'recall_train': round(recall_train,2), 
            'recall_test': round(recall_test,2), 
            'f1_train': round(f1_train,2), 
            'f1_test': round(f1_test,2), 
            'roc_auc_train': round(roc_auc_train,2), 
            'roc_auc_test': round(roc_auc_test, 2)
            }
    return metrics

In [471]:
'''
model = sklearn.ensemble.RandomForestClassifier(random_state= seed, class_weight={0:0.7, 1: 0.93})

_scoring = ['accuracy','precision', 'recall', 'f1', 'roc_auc']

# Configurando a validação cruzada estratificada
cv = sklearn.model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)


tuned_model = sklearn.model_selection.TunedThresholdClassifierCV(model, scoring='f1', cv=cv, n_jobs=-1)

tuned_model.fit(X_train, y_train)
y_pred_test = tuned_model.predict(X_test)

results = sklearn.model_selection.cross_validate(tuned_model, X= X_train, y= y_train, cv=cv, scoring=_scoring, return_train_score=True, n_jobs=-1)

metrics = {

    "Mean Accuracy Train": round(results['train_accuracy'].mean(),2),
    "Mean Accuracy Eval": round(results['test_accuracy'].mean(),2),
    "Accuracy Test": round(sklearn.metrics.accuracy_score(y_test, y_pred_test),2),

    "Mean Precision Train": round(results['train_precision'].mean(),2),
    "Mean Precision Eval": round(results['test_precision'].mean(),2),
    "Precision Test": round(sklearn.metrics.precision_score(y_test, y_pred_test),2),

    "Mean Recall Train": round(results['train_recall'].mean(),2),
    "Mean Recall Eval": round(results['test_recall'].mean(),2),
    "Recall Test": round(sklearn.metrics.recall_score(y_test, y_pred_test),2),

    "Mean F1 Train": round(results['train_f1'].mean(),2),
    "Mean F1 Eval": round(results['test_f1'].mean(),2),
    "F1 Test": round(sklearn.metrics.f1_score(y_test, y_pred_test),2),

    "Mean AUC-ROC Train": round(results['train_roc_auc'].mean(),2),
    "Mean AUC-ROC Eval": round(results['test_roc_auc'].mean(),2),
    "AUC-ROC Test": round(sklearn.metrics.roc_auc_score(y_test, y_pred_test),2)
}   

metrics
'''

'\nmodel = sklearn.ensemble.RandomForestClassifier(random_state= seed, class_weight={0:0.7, 1: 0.93})\n\n_scoring = [\'accuracy\',\'precision\', \'recall\', \'f1\', \'roc_auc\']\n\n# Configurando a validação cruzada estratificada\ncv = sklearn.model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)\n\n\ntuned_model = sklearn.model_selection.TunedThresholdClassifierCV(model, scoring=\'f1\', cv=cv, n_jobs=-1)\n\ntuned_model.fit(X_train, y_train)\ny_pred_test = tuned_model.predict(X_test)\n\nresults = sklearn.model_selection.cross_validate(tuned_model, X= X_train, y= y_train, cv=cv, scoring=_scoring, return_train_score=True, n_jobs=-1)\n\nmetrics = {\n\n    "Mean Accuracy Train": round(results[\'train_accuracy\'].mean(),2),\n    "Mean Accuracy Eval": round(results[\'test_accuracy\'].mean(),2),\n    "Accuracy Test": round(sklearn.metrics.accuracy_score(y_test, y_pred_test),2),\n\n    "Mean Precision Train": round(results[\'train_precision\'].mean(),2),\n    "Mean Preci

In [472]:
X = df_athletes_activities.loc[:, ~df_athletes_activities.columns.isin(['activity_date', 'athlete_id', 'desmotivation'])]
y = df_athletes_activities['desmotivation']

# Separando dados para treino e teste
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size= 0.2, random_state= seed, stratify=y)

print('Dados para treino e teste')
print(f'X_train shape : {X_train.shape}\ny_train shape: {y_train.value_counts()}')
print(f'X_test shape : {X_test.shape}\ny_test shape: {y_test.value_counts()}')

Dados para treino e teste
X_train shape : (595, 8)
y_train shape: desmotivation
0    433
1    162
Name: count, dtype: int64
X_test shape : (149, 8)
y_test shape: desmotivation
0    109
1     40
Name: count, dtype: int64


In [473]:
major_class_weight = round(y_train.value_counts(normalize= True)[0],2)
minor_class_weight = round(y_train.value_counts(normalize= True)[1],2)
print(major_class_weight, minor_class_weight)

algorithms = [sklearn.ensemble.RandomForestClassifier(random_state= seed), sklearn.tree.DecisionTreeClassifier(random_state= seed)]
features = df_athletes_activities.loc[:, ~df_athletes_activities.columns.isin(['activity_date', 'athlete_id', 'desmotivation'])].columns

# Função para gerar todas as combinações possíveis
all_features_comb = []

for r in range(1, len(features) + 1):
    combinations_r = list(itertools.combinations(features, r))
    all_features_comb.extend(combinations_r)

all_features_comb_lists = [list(comb) for comb in all_features_comb]

products = [dict(zip(('algorithm', 'features'), (i,j))) for i,j in itertools.product(algorithms, all_features_comb_lists)]

0.73 0.27


In [474]:
len(products)

510

#### Apenas cross-validation

In [475]:
def classification_function(X_train, y_train, X_test, y_test, algorithm, features):

    scoring = ['precision', 'recall', 'f1', 'roc_auc']

    # Configurando a validação cruzada estratificada para as vezes que for utilizada a validação cruzada
    cv = sklearn.model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)

    results = sklearn.model_selection.cross_validate(algorithm, X= X_train[features], y= y_train, cv=cv, scoring=scoring, return_train_score=True, n_jobs=-1, return_estimator=True)
    
    y_pred = algorithm.fit(X_train[features], y_train).predict(X_test[features])

    metrics = {

        "Features": features,
        "Algorithm": algorithm,
        
        "Precision Train": round(results['train_precision'].mean(),2),
        "Precision Eval": round(results['test_precision'].mean(),2),
        "Precision Test": round(sklearn.metrics.precision_score(y_test, y_pred),2),        

        "Recall Train": round(results['train_recall'].mean(),2),
        "Recall Eval": round(results['test_recall'].mean(),2),
        "Recall Test": round(sklearn.metrics.recall_score(y_test, y_pred),2),  

        "F1 Train": round(results['train_f1'].mean(),2),
        "F1 Eval": round(results['test_f1'].mean(),2),
        "F1 Test": round(sklearn.metrics.f1_score(y_test, y_pred),2),  
    
        "AUC-ROC Train": round(results['train_roc_auc'].mean(),2),
        "AUC-ROC Eval": round(results['test_roc_auc'].mean(),2),
        "AUC-ROC Test": round(sklearn.metrics.roc_auc_score(y_test, y_pred),2),  
    
    }    

    return metrics

results = list()
result_count = 1

for product in products:
    results.append(classification_function(X_train, y_train, X_test, y_test, product['algorithm'], product['features']))
    print(f'Result: [{result_count}/{len(products)}] DONE')
    result_count += 1

Result: [1/510] DONE
Result: [2/510] DONE
Result: [3/510] DONE
Result: [4/510] DONE
Result: [5/510] DONE
Result: [6/510] DONE
Result: [7/510] DONE
Result: [8/510] DONE
Result: [9/510] DONE
Result: [10/510] DONE
Result: [11/510] DONE
Result: [12/510] DONE
Result: [13/510] DONE
Result: [14/510] DONE
Result: [15/510] DONE
Result: [16/510] DONE
Result: [17/510] DONE
Result: [18/510] DONE
Result: [19/510] DONE
Result: [20/510] DONE
Result: [21/510] DONE
Result: [22/510] DONE
Result: [23/510] DONE
Result: [24/510] DONE
Result: [25/510] DONE
Result: [26/510] DONE
Result: [27/510] DONE
Result: [28/510] DONE
Result: [29/510] DONE
Result: [30/510] DONE
Result: [31/510] DONE
Result: [32/510] DONE
Result: [33/510] DONE
Result: [34/510] DONE
Result: [35/510] DONE
Result: [36/510] DONE
Result: [37/510] DONE
Result: [38/510] DONE
Result: [39/510] DONE
Result: [40/510] DONE
Result: [41/510] DONE
Result: [42/510] DONE
Result: [43/510] DONE
Result: [44/510] DONE
Result: [45/510] DONE
Result: [46/510] DO

In [477]:
df_results_cv = pd.DataFrame(results)
df_results_cv = df_results_cv.sort_values('F1 Eval', ascending= False).reset_index(drop=True)
df_results_cv

Unnamed: 0,Features,Algorithm,Precision Train,Precision Eval,Precision Test,Recall Train,Recall Eval,Recall Test,F1 Train,F1 Eval,F1 Test,AUC-ROC Train,AUC-ROC Eval,AUC-ROC Test
0,"[week_frequency, days_since_last_act]",DecisionTreeClassifier(random_state=2024),0.72,0.65,0.70,0.68,0.60,0.57,0.69,0.62,0.63,0.90,0.83,0.74
1,"[PR_total_time, PR_week_max_time]","(DecisionTreeClassifier(max_features='sqrt', r...",0.74,0.68,0.64,0.64,0.57,0.57,0.68,0.62,0.61,0.85,0.81,0.73
2,[PR_total_time],"(DecisionTreeClassifier(max_features='sqrt', r...",0.74,0.68,0.64,0.64,0.57,0.57,0.68,0.62,0.61,0.85,0.81,0.73
3,[PR_week_max_time],"(DecisionTreeClassifier(max_features='sqrt', r...",0.74,0.68,0.64,0.64,0.57,0.57,0.68,0.62,0.61,0.85,0.81,0.73
4,"[week_frequency, days_since_last_act, PR_week_...","(DecisionTreeClassifier(max_features='sqrt', r...",0.92,0.62,0.57,0.83,0.59,0.65,0.87,0.61,0.60,0.97,0.82,0.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,"[week_frequency, return_count]",DecisionTreeClassifier(random_state=2024),0.92,0.50,0.72,0.60,0.40,0.45,0.73,0.44,0.55,0.95,0.69,0.69
506,[pace (min/km)],DecisionTreeClassifier(random_state=2024),0.91,0.45,0.34,0.75,0.38,0.25,0.82,0.41,0.29,0.98,0.64,0.54
507,[return_count],DecisionTreeClassifier(random_state=2024),0.86,0.62,0.81,0.43,0.30,0.32,0.57,0.40,0.46,0.86,0.73,0.65
508,[total_time (min)],"(DecisionTreeClassifier(max_features='sqrt', r...",0.63,0.41,0.46,0.41,0.25,0.32,0.49,0.31,0.38,0.82,0.63,0.59


In [None]:
df_results_cv.to_excel('C:/Users/USER/Desktop/EstudosDados/Projetos/Corrida/physical_inactivity_prediction/scripts_prediction_model/final/results_cv.xlsx', index = False)

#### Nested Cross-validation

In [482]:
def classification_function2(X_train, y_train, X_test, y_test, algorithm, features):

    scoring = ['precision', 'recall', 'f1', 'roc_auc']

    # Configurando a validação cruzada estratificada para as vezes que for utilizada a validação cruzada
    cv = sklearn.model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)

    tuned_th_classifier = sklearn.model_selection.TunedThresholdClassifierCV(algorithm, scoring= 'f1', thresholds=100, cv = cv, random_state=seed, n_jobs=-1)

    results = sklearn.model_selection.cross_validate(tuned_th_classifier, X= X_train[features], y= y_train, cv=cv, scoring=scoring, return_train_score=True, n_jobs=-1, return_estimator=True)

    y_pred = tuned_th_classifier.fit(X_train[features], y_train).predict(X_test[features])
    
    metrics = {

        "Features": features,
        "Algorithm": algorithm,
        
        "Precision Train": round(results['train_precision'].mean(),2),
        "Precision Eval": round(results['test_precision'].mean(),2),
        "Precision Test": round(sklearn.metrics.precision_score(y_test, y_pred),2),        

        "Recall Train": round(results['train_recall'].mean(),2),
        "Recall Eval": round(results['test_recall'].mean(),2),
        "Recall Test": round(sklearn.metrics.recall_score(y_test, y_pred),2),  

        "F1 Train": round(results['train_f1'].mean(),2),
        "F1 Eval": round(results['test_f1'].mean(),2),
        "F1 Test": round(sklearn.metrics.f1_score(y_test, y_pred),2),  
    
        "AUC-ROC Train": round(results['train_roc_auc'].mean(),2),
        "AUC-ROC Eval": round(results['test_roc_auc'].mean(),2),
        "AUC-ROC Test": round(sklearn.metrics.roc_auc_score(y_test, y_pred),2),  
    
    } 

    return metrics

results = list()
result_count = 1

for product in products:
    results.append(classification_function2(X_train, y_train, X_test, y_test, product['algorithm'], product['features']))
    print(f'Result: [{result_count}/{len(products)}] DONE')
    result_count += 1

Result: [1/510] DONE
Result: [2/510] DONE
Result: [3/510] DONE
Result: [4/510] DONE
Result: [5/510] DONE
Result: [6/510] DONE
Result: [7/510] DONE
Result: [8/510] DONE
Result: [9/510] DONE
Result: [10/510] DONE
Result: [11/510] DONE
Result: [12/510] DONE
Result: [13/510] DONE
Result: [14/510] DONE
Result: [15/510] DONE
Result: [16/510] DONE
Result: [17/510] DONE
Result: [18/510] DONE
Result: [19/510] DONE
Result: [20/510] DONE
Result: [21/510] DONE
Result: [22/510] DONE
Result: [23/510] DONE
Result: [24/510] DONE
Result: [25/510] DONE
Result: [26/510] DONE
Result: [27/510] DONE
Result: [28/510] DONE
Result: [29/510] DONE
Result: [30/510] DONE
Result: [31/510] DONE
Result: [32/510] DONE
Result: [33/510] DONE
Result: [34/510] DONE
Result: [35/510] DONE
Result: [36/510] DONE
Result: [37/510] DONE
Result: [38/510] DONE
Result: [39/510] DONE
Result: [40/510] DONE
Result: [41/510] DONE
Result: [42/510] DONE
Result: [43/510] DONE
Result: [44/510] DONE
Result: [45/510] DONE
Result: [46/510] DO

In [486]:
df_results_cv_th = pd.DataFrame(results)
df_results_cv_th = df_results_cv_th.sort_values('F1 Eval', ascending= False).reset_index(drop=True)
df_results_cv_th

Unnamed: 0,Features,Algorithm,Precision Train,Precision Eval,Precision Test,Recall Train,Recall Eval,Recall Test,F1 Train,F1 Eval,F1 Test,AUC-ROC Train,AUC-ROC Eval,AUC-ROC Test
0,"[PR_total_time, PR_week_max_time]","(DecisionTreeClassifier(max_features='sqrt', r...",0.60,0.58,0.58,0.79,0.76,0.72,0.68,0.66,0.64,0.85,0.81,0.77
1,[PR_total_time],"(DecisionTreeClassifier(max_features='sqrt', r...",0.60,0.58,0.58,0.79,0.76,0.72,0.68,0.66,0.64,0.85,0.81,0.77
2,[PR_week_max_time],DecisionTreeClassifier(random_state=2024),0.62,0.60,0.59,0.78,0.73,0.72,0.69,0.66,0.65,0.85,0.79,0.77
3,"[PR_total_time, PR_week_max_time]",DecisionTreeClassifier(random_state=2024),0.62,0.60,0.59,0.78,0.73,0.72,0.69,0.66,0.65,0.85,0.79,0.77
4,[PR_week_max_time],"(DecisionTreeClassifier(max_features='sqrt', r...",0.60,0.58,0.58,0.79,0.76,0.72,0.68,0.66,0.64,0.85,0.81,0.77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,"[total_time (min), days_since_last_act, return...",DecisionTreeClassifier(random_state=2024),1.00,0.45,0.55,1.00,0.49,0.65,1.00,0.47,0.60,1.00,0.63,0.73
506,"[pace (min/km), week_frequency, days_since_las...",DecisionTreeClassifier(random_state=2024),0.98,0.45,0.51,1.00,0.46,0.52,0.99,0.45,0.52,1.00,0.63,0.67
507,[total_time (min)],DecisionTreeClassifier(random_state=2024),0.47,0.34,0.37,0.86,0.66,0.57,0.60,0.44,0.45,0.82,0.62,0.61
508,"[pace (min/km), days_since_last_act]",DecisionTreeClassifier(random_state=2024),0.97,0.44,0.40,0.98,0.45,0.42,0.98,0.44,0.41,1.00,0.62,0.59


In [501]:
df_results_cv_th.iloc[2,1]

In [484]:
df_results_cv_th.to_excel('C:/Users/USER/Desktop/EstudosDados/Projetos/Corrida/physical_inactivity_prediction/scripts_prediction_model/final/results_cv_th.xlsx', index = False)