In [16]:
import pandas as pd

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, make_scorer
from scipy.stats import uniform, loguniform

import sys
sys.path.append('../src/')
from models.classification_methods import create_test_results_df    

In [17]:
processed_data_path = '../data/processed/'

path_best_UFT = "XGBClassifier_TfidfVectorizer_{target}_top_mentioned_timelines_Texts_{split}_results.csv"
path_best_UT = "XGBClassifier_TfidfVectorizer_{target}_users_Timeline_{split}_results.csv"
path_best_S = "bert_classifier_pablocosta_bertabaporu_base_uncased_{target}_Stance_{split}_results.csv"

path_users = processed_data_path + "r3_{target}_{split}_users_processed.csv"
path_tmt = processed_data_path + "{split}_r3_{target}_top_mentioned_timelines_processed.csv"

test_results_path = '../reports/test_results/'
train_results_path = '../reports/train_results/'
val_results_path = '../reports/val_results/'

In [18]:
def fill_missing_indices(df):
    # Encontre o índice completo esperado
    full_index = pd.RangeIndex(start=df.index.min(), stop=df.index.max() + 1)

    # Identifique os índices faltantes
    missing_index = full_index.difference(df.index)

    # Crie um DataFrame com os índices faltantes e valores NaN
    missing_df = pd.DataFrame(index=missing_index, columns=df.columns)

    # Combine os DataFrames original e faltante
    combined_df = pd.concat([df, missing_df])

    # Ordene o DataFrame pelo índice
    combined_df = combined_df.sort_index()
    
    combined_df.index= combined_df.index.astype('int')

    return combined_df

In [19]:
target_list = [
    'ig',
    'bo', 
    'cl', 
    'co', 
    'gl', 
    'lu'
    ]

In [41]:

for target in target_list:

    train_UFT = pd.read_csv(train_results_path + path_best_UFT.format(split='train', target = target))
    train_UFT.columns = [col + '_UFT' for col in train_UFT.columns]
    train_UFT.index  = pd.read_csv(
        path_tmt.format(split="train", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index


    train_UT = pd.read_csv(train_results_path + path_best_UT.format(split='train', target = target))
    train_UT.columns = [col + '_UT' for col in train_UT.columns]
    train_UT.index  = pd.read_csv(
        path_users.format(split="train", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index
    
    train_S = pd.read_csv(train_results_path + path_best_S.format(split='train', target = target))
    train_S.columns = [col + '_S' for col in train_S.columns]
    train_S.index  = pd.read_csv(
        path_users.format(split="train", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index

    test_UFT = pd.read_csv(test_results_path + path_best_UFT.format(split='test', target = target))
    test_UFT.columns = [col + '_UFT' for col in test_UFT.columns]
    test_UFT.index  = pd.read_csv(
        path_tmt.format(split="test", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index

    test_UT = pd.read_csv(test_results_path + path_best_UT.format(split='test', target = target))
    test_UT.columns = [col + '_UT' for col in test_UT.columns]
    test_UT.index  = pd.read_csv(
        path_users.format(split="test", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index
    
    test_S = pd.read_csv(test_results_path + path_best_S.format(split='test', target = target))
    test_S.columns = [col + '_S' for col in test_S.columns]
    test_S.index  = pd.read_csv(
        path_users.format(split="test", target=target),
        sep = ';', 
        encoding='utf-8-sig',
        index_col = 0
    ).index
    
    
    filled_train_UFT = fill_missing_indices(train_UFT)
    filled_train_UFT.fillna(-1,inplace=True)

    filled_train_UT = fill_missing_indices(train_UT)
    filled_train_UT.fillna(-1,inplace=True)
    
    filled_train_S = fill_missing_indices(train_S)
    filled_train_S.fillna(-1,inplace=True)


    train = pd.concat([filled_train_UFT, filled_train_UT, filled_train_S], axis = 1)
    test = pd.concat([test_UFT, test_UT, test_S], axis = 1)

    if train.isna().sum().sum() > 0:
        raise TypeError("Null data in train")
    if test.isna().sum().sum() > 0:
        raise TypeError("Null data in test")


    if len(train[~ (train.test_UFT == train.test_UT) & ((train.test_UT !=-1) & (train.test_UFT !=-1))]) > 0: 
        raise ValueError("há valores inconsistentes para a label")

    if len(test[~ (test.test_UFT == test.test_UT) & ((test.test_UT !=-1) & (test.test_UFT !=-1))]) > 0: 
        raise ValueError("há valores inconsistentes para a label")

    # sabemos que as labels de UFT, UT e S são iguais tirando os casos onde é -1
    train_label_UFT = train.test_UFT.tolist()
    train_label_UT = train.test_UT.tolist()
    test_label_UFT = test.test_UFT.tolist()
    test_label_UT = test.test_UT.tolist()

    y_train = [train_label_UFT[i] if train_label_UFT[i] != -1 else train_label_UT[i]  for i in range(len(train_label_UFT))]
    y_test = [test_label_UFT[i] if test_label_UFT[i] != -1 else test_label_UT[i]  for i in range(len(test_label_UFT))]


    cols_to_drop = ['test_UFT',"pred_UFT",'test_UT', 'pred_UT', "pred_proba_0_UFT", "pred_proba_0_UT", "pred_proba_0_S", 'pred_S', 'test_S']
    
    X_train_full = train.drop(cols_to_drop,axis = 1)
    X_test_full = test.drop(cols_to_drop,axis = 1)

    for comb in [
        ('Texts', 'Timeline'),
        ('Texts', 'Stance'),
        ('Stance', 'Timeline'),
        ('Stance', 'Timeline', 'Texts')
    ]:

        str_cols = "_".join(comb)
        
        X_train = X_train_full.copy()
        X_test = X_test_full.copy()


        if "Texts" not in comb:
            X_train.drop([col for col in X_train.columns if "UFT" in col], axis =1, inplace=True)
            X_test.drop([col for col in X_test.columns if "UFT" in col], axis =1, inplace=True)
            
        if "Timeline" not in comb:
            X_train.drop([col for col in X_train.columns if "UT" in col], axis =1, inplace=True)
            X_test.drop([col for col in X_test.columns if "UT" in col], axis =1, inplace=True)
            
        if "Stance" not in comb:
            X_train.drop([col for col in X_train.columns if "S" in col], axis =1, inplace=True)
            X_test.drop([col for col in X_test.columns if "S" in col], axis =1, inplace=True)



        # Supondo que seus dados já estejam em X_train, X_test, y_train, y_test

        # Definindo os parâmetros para otimização
        param_dist = {
            'C': loguniform(1e-6, 1e6),  # Regularization parameter
            'penalty': ['l1', 'l2', 'elasticnet', 'none'],  # Regularization technique
            'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],  # Optimization algorithm
            'max_iter': [100, 200, 300, 500, 1000, 2000],  # Maximum number of iterations
            'l1_ratio': uniform(0, 1),  # L1 ratio, only used if penalty is 'elasticnet'
            'tol': loguniform(1e-5, 1e-1),  # Tolerance for stopping criteria
            'fit_intercept': [True, False],  # Whether to add a constant (bias or intercept) to the decision function
            'class_weight': [None, 'balanced'],  # Weights associated with classes
            'intercept_scaling': uniform(0.1, 2)  # Useful only when the solver ‘liblinear’ is used
        }

        # Criar uma instância do modelo de regressão logística
        model = LogisticRegression(random_state=42)

        # Definindo o Stratified K-Fold
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

        # Definindo o RandomizedSearchCV
        random_search = RandomizedSearchCV(
            model,
            param_distributions=param_dist,
            n_iter=500,  # Número de combinações a serem testadas
            scoring="f1_macro",
            cv=cv,  # Usar StratifiedKFold para validação cruzada
            random_state=42,  # Para reprodutibilidade
            n_jobs=-1,  # Utilizar todos os núcleos disponíveis
            verbose=True
        )

        # Ajustar o RandomizedSearchCV aos dados de treino
        random_search.fit(X_train, y_train)

        # Melhor estimador encontrado pelo RandomizedSearchCV
        best_model = random_search.best_estimator_

        # Fazer previsões nos dados de teste
        y_pred = best_model.predict(X_test)
        y_pred_proba = best_model.predict_proba(X_test)
        
        # create df test results
        ## format test and pred
        y_test_formated = [test for test in y_test]
        y_pred_formated = [pred for pred in y_pred]
        
        ## create list of proba of each class
        pred_proba_0 = [float(probas[0]) for probas in y_pred_proba]
        pred_proba_1 = [float(probas[1]) for probas in y_pred_proba]

        ## create df with results
        df_test_results = create_test_results_df(y_test_formated, y_pred_formated, pred_proba_0, pred_proba_1)
        
        
        

        results_path = f"{test_results_path}/Ensemble_{best_model.__class__.__name__}_{target}_{str_cols}_test_results.csv"
        
        print("results in ", results_path)
        
        df_test_results.to_csv(results_path, index = False)

  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_ig_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_ig_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_ig_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_ig_Stance_Timeline_Texts_test_results.csv


  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_bo_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_bo_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_bo_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_bo_Stance_Timeline_Texts_test_results.csv


  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_cl_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_cl_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_cl_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_cl_Stance_Timeline_Texts_test_results.csv


  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_co_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_co_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_co_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_co_Stance_Timeline_Texts_test_results.csv


  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_gl_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_gl_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_gl_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_gl_Stance_Timeline_Texts_test_results.csv


  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])
  combined_df = pd.concat([df, missing_df])


Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_lu_Texts_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_lu_Texts_Stance_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che

results in  ../reports/test_results//Ensemble_LogisticRegression_lu_Stance_Timeline_test_results.csv
Fitting 5 folds for each of 500 candidates, totalling 2500 fits




results in  ../reports/test_results//Ensemble_LogisticRegression_lu_Stance_Timeline_Texts_test_results.csv


1530 fits failed out of a total of 2500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/semcovici/anaconda3/envs/env-stance-pred/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1172, in fit
    solver = _che