In [1]:
import os # type:ignore # isort:skip # fmt:skip # noqa # nopep8
import sys # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from pathlib import Path # type:ignore # isort:skip # fmt:skip # noqa # nopep8

mod = sys.modules[__name__]

code_dir = None
code_dir_name = 'Code'
unwanted_subdir_name = 'Analysis'

if code_dir_name not in str(Path.cwd()).split('/')[-1]:
    for _ in range(5):

        parent_path = str(Path.cwd().parents[_]).split('/')[-1]

        if (code_dir_name in parent_path) and (unwanted_subdir_name not in parent_path):

            code_dir = str(Path.cwd().parents[_])

            if code_dir is not None:
                break
else:
    code_dir = str(Path.cwd())
sys.path.append(code_dir)

# %load_ext autoreload
# %autoreload 2


In [2]:
from setup_module.imports import * # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from setup_module.estimators_get_pipe import * # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from setup_module.plot_metric_fork import functions as plot_metric_functions # type:ignore # isort:skip # fmt:skip # noqa # nopep8


Using MPS


0it [00:00, ?it/s]

Using MPS


<Figure size 640x480 with 0 Axes>

### Set variables

In [3]:
# Variables
method = 'Supervised'
with open(f'{data_dir}{method}_results_save_path.txt', 'r') as f:
    results_save_path = f.read().strip('\n')
with open(f'{data_dir}{method}_done_xy_save_path.txt', 'r') as f:
    done_xy_save_path = f.read().strip('\n')

t = time.time()
n_jobs = -1
n_splits = 10
n_repeats = 3
random_state = 42
refit = True
class_weight = 'balanced'
cv = RepeatedStratifiedKFold(
    n_splits=n_splits, n_repeats=n_repeats, random_state=random_state
)
scoring = 'recall'
scores = [
    'recall', 'accuracy', 'f1', 'roc_auc',
    'explained_variance', 'matthews_corrcoef'
]
scorers = {
    'precision_score': make_scorer(precision_score, zero_division=0),
    'recall_score': make_scorer(recall_score, zero_division=0),
    'accuracy_score': make_scorer(accuracy_score, zero_division=0),
}
analysis_columns = ['Warmth', 'Competence']
text_col = 'Job Description spacy_sentencized'
metrics_dict = {
    f'{scoring.title()} Best Score': np.nan,
    f'{scoring.title()} Best Threshold': np.nan,
    'Train - Mean Cross Validation Score': np.nan,
    f'Train - Mean Cross Validation - {scoring.title()}': np.nan,
    f'Train - Mean Explained Variance - {scoring.title()}': np.nan,
    'Test - Mean Cross Validation Score': np.nan,
    f'Test - Mean Cross Validation - {scoring.title()}': np.nan,
    f'Test - Mean Explained Variance - {scoring.title()}': np.nan,
    'Explained Variance': np.nan,
    'Accuracy': np.nan,
    'Balanced Accuracy': np.nan,
    'Precision': np.nan,
    'Average Precision': np.nan,
    'Recall': np.nan,
    'F1-score': np.nan,
    'Matthews Correlation Coefficient': np.nan,
    'Brier Score': np.nan,
    'Fowlkes–Mallows Index': np.nan,
    'R2 Score': np.nan,
    'ROC': np.nan,
    'AUC': np.nan,
    'Log Loss/Cross Entropy': np.nan,
    'Cohen’s Kappa': np.nan,
    'Geometric Mean': np.nan,
    'Classification Report': np.nan,
    'Imbalanced Classification Report': np.nan,
    'Confusion Matrix': np.nan,
    'Normalized Confusion Matrix': np.nan,
}
skip_fitted_estimators = False
evaluate_estimator_on_concat = False


# Functions

In [4]:
def show_and_close_plots():
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()


In [5]:
def close_plots():
    plt.clf()
    plt.cla()
    plt.close()


In [6]:
def make_df_metrics(
    vectorizers_pipe, classifiers_pipe, transformers_pipe, metrics_list,
    col, vectorizer_name, classifier_name, protocol=None,
    analysis_columns=analysis_columns,
    table_save_path=table_save_path,
    method=method, save_name=None,
    compression=None, path_suffix=None,
):
    if save_name is None:
        save_name = f'{method} Estimators Table'
    if compression is None:
        compression = False
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if isinstance(metrics_list, dict):
        metrics_list = list(metrics_list.keys())

    transformers_tokenizers_list = [
        str(tranformer_dict['tokenizer']).split('.')[-1].split("'>")[0]
        for tranformer_dict in transformers_pipe.values()
    ]
    combined_classifiers_list = list(classifiers_pipe.keys()) + list(transformers_pipe.keys())
    combined_vectorizers_list = list(vectorizers_pipe.keys()) + transformers_tokenizers_list

    print('='*20)
    if os.path.exists(f'{table_save_path}{save_name}.pkl') and os.path.getsize(f'{table_save_path}{save_name}.pkl') > 0:
        print(f'Loading table from {table_save_path}{save_name}.pkl')
        df_metrics = pd.read_pickle(f'{table_save_path}{save_name}.pkl')
        print('Done loading table!')
    else:
        print('Table does not exist, creating new table...')
        if method == 'Transformers':
            index = pd.MultiIndex.from_product(
                [list(map(lambda classifier_name: classifier_name, list(transformers_pipe.keys())))],
                names=['Classifiers'],
            )
            columns = pd.MultiIndex.from_product(
                [
                    analysis_columns,
                    metrics_list,
                ],
                names=['Variable', 'Measures'],
            )
        elif method == 'Supervised':
            index = pd.MultiIndex.from_product(
                [list(map(lambda classifier_name: classifier_name, list(classifiers_pipe.keys())))],
                names=['Classifiers'],
            )
            columns = pd.MultiIndex.from_product(
                [
                    analysis_columns,
                    list(map(lambda vectorizer_name: vectorizer_name, list(vectorizers_pipe.keys()))),
                    metrics_list,
                ],
                names=['Variable', 'Vectorizer', 'Measures'],
            )
        # Make df
        df_metrics = pd.DataFrame(index=index, columns=columns)
        print('Done creating new table!')
    print('='*20)

    return df_metrics


In [7]:
def get_existing_files(
    results_save_path=results_save_path,
    estimator_names_list=None,
):
    if estimator_names_list is None:
        estimator_names_list = []

    print(f'Searching for existing estimators in directory:\n{results_save_path}')

    for estimators_file in tqdm.tqdm(glob.glob(f'{results_save_path}*.*')):
        if f'{method} Estimator - ' in estimators_file:

            col=estimators_file.split(f'{method} Estimator - ')[-1].split(' - ')[0]
            vectorizer_name=estimators_file.split(f'{col} - ')[-1].split(' + ')[0]
            classifier_name=estimators_file.split(f'{vectorizer_name} + ')[-1].split(' (Save_protocol=')[0]

            estimator_names_list.append(f'{col} - {vectorizer_name} + {classifier_name}')

    return (
        list(set(estimator_names_list))
    )


In [8]:
def load_Xy_search_cv_estimator(
    col, vectorizer_name, classifier_name, protocol,
    results_save_path=results_save_path,
    done_xy_save_path=done_xy_save_path, method=method,
    compression=None, saved_files_list=None,
    path_suffix=None, data_dict=None,
):
    if compression is None:
        compression = False
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if path_suffix is None:
        path_suffix = f' - {col} - {vectorizer_name} + {classifier_name} (Save_protocol={protocol}).pkl'
    if data_dict is None:
        data_dict = {}
    if saved_files_list is None:
        saved_files_list = []

    # Load data into dict
    for file_path in glob.glob(f'{done_xy_save_path}{method}*{path_suffix}*'):
        file_name = file_path.split(f'{done_xy_save_path}{method} ')[-1].split(path_suffix)[0]
        print(f'Loading {file_name} from {file_path}')
        if 'df_' in file_name:
            data_dict[file_name] = pd.read_pickle(file_path)
        else:
            with open(file_path, 'rb') as f:
                data_dict[file_name] = joblib.load(f)
        saved_files_list.append(file_name)
    # Load estimator
    print('Loading Estimator.')
    with open(
        f'{results_save_path}{method} Estimator{path_suffix}', 'rb'
    ) as f:
        data_dict['Estimator'] = joblib.load(f)
    saved_files_list.append('Estimator')

    # # Assign data to variables
    estimator = data_dict['Estimator']
    grid_search = data_dict['Grid Search']
    searchcv = data_dict['SearchCV']
    df_cv_results = data_dict['df_cv_results']
    # Train data
    df_train_data = data_dict['df_train_data']
    X_train = df_train_data['X_train'].values
    y_train = df_train_data['y_train'].values
    y_train_pred = df_train_data['y_train_pred'].values
    # Test data
    df_test_data = data_dict['df_test_data']
    X_test = df_test_data['X_test'].values
    y_test = df_test_data['y_test'].values
    y_test_pred = df_test_data['y_test_pred'].values
    y_test_pred_prob = df_test_data['y_test_pred_prob'].values
    # Val data
    df_val_data = data_dict['df_val_data']
    X_val = df_val_data['X_val'].values
    y_val = df_val_data['y_val'].values
    y_val_pred = df_val_data['y_val_pred'].values
    y_val_pred_prob = df_val_data['y_val_pred_prob'].values
    # Feature importances
    if 'df_feature_importances' in data_dict.keys():
        saved_files_list.append('df_feature_importances')
        df_feature_importances = data_dict['df_feature_importances']
    else:
        df_feature_importances = None

    # Check data
    check_consistent_length(X_train, y_train, y_train_pred)
    check_consistent_length(X_test, y_test, y_test_pred, y_test_pred_prob)
    check_is_fitted(estimator)

    # Get class weights
    (
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights, test_class_weights_ratio, test_class_weights_dict,
        val_class_weights, val_class_weights_ratio, val_class_weights_dict,
    ) = get_class_weights(
        X_train, y_train,
        X_test, y_test,
        X_val, y_val,
    )
    # Print info
    print_Xy(
        X_train, y_train,
        X_test, y_test,
        X_val, y_val,
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights, test_class_weights_ratio, test_class_weights_dict,
        val_class_weights, val_class_weights_ratio, val_class_weights_dict,
    )

    assert set(data_dict.keys()) == set(saved_files_list), f'Not all files were loaded! Missing: {set(data_dict.keys()) ^ set(saved_files_list)}'
    print(f'Done loading Xy, CV data, and estimator!\n{list(data_dict.keys())}')
    print('='*20)

    return (
        grid_search, searchcv,
        X_train, y_train, y_train_pred,
        X_test, y_test, y_test_pred, y_test_pred_prob,
        X_val, y_val, y_val_pred, y_val_pred_prob,
        df_feature_importances, df_cv_results, estimator,
    )


In [9]:
def get_class_weights(
    X_train, y_train,
    X_test, y_test,
    X_val, y_val,
):
    # Get train class weights
    train_class_weights = compute_class_weight(class_weight = class_weight, classes = np.unique(y_train), y = y_train)
    train_class_weights_ratio = train_class_weights[0]/train_class_weights[1]
    train_class_weights_dict = dict(zip(np.unique(y_train), train_class_weights))

    # Get train class weights
    test_class_weights = compute_class_weight(class_weight = class_weight, classes = np.unique(y_train), y = y_test)
    test_class_weights_ratio = test_class_weights[0]/test_class_weights[1]
    test_class_weights_dict = dict(zip(np.unique(y_test), test_class_weights))

    # Get val class weights
    val_class_weights = compute_class_weight(class_weight = class_weight, classes = np.unique(y_train), y = y_val)
    val_class_weights_ratio = val_class_weights[0]/val_class_weights[1]
    val_class_weights_dict = dict(zip(np.unique(y_val), val_class_weights))

    return (
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights, test_class_weights_ratio, test_class_weights_dict,
        val_class_weights, val_class_weights_ratio, val_class_weights_dict,
    )


In [10]:
def print_Xy(
    X_train, y_train,
    X_test, y_test,
    X_val, y_val,
    train_class_weights, train_class_weights_ratio, train_class_weights_dict,
    test_class_weights, test_class_weights_ratio, test_class_weights_dict,
    val_class_weights, val_class_weights_ratio, val_class_weights_dict,
):
    # Check for consistent length
    check_consistent_length(X_train, y_train)
    check_consistent_length(X_test, y_test)
    check_consistent_length(X_val, y_val)

    print('Done splitting data into training and testing sets.')
    print('='*20)
    print(f'Training set shape: {y_train.shape}')
    print('-'*10)
    print(f'Training set example:\n{X_train[0]}')
    print('~'*10)
    print(f'Testing set shape: {y_test.shape}')
    print('-'*10)
    print(f'Testing set example:\n{X_test[0]}')
    print('~'*10)
    print(f'Validation set shape: {y_val.shape}')
    print('-'*10)
    print(f'Validation set example:\n{X_val[0]}')
    print('~'*10)
    print(f'Training data class weights:\nRatio = {train_class_weights_ratio:.2f} (0 = {train_class_weights[0]:.2f}, 1 = {train_class_weights[1]:.2f})')
    print('-'*10)
    print(f'Testing data class weights:\nRatio = {test_class_weights_ratio:.2f} (0 = {test_class_weights[0]:.2f}, 1 = {test_class_weights[1]:.2f})')
    print('-'*10)
    print(f'Validation data class weights:\nRatio = {val_class_weights_ratio:.2f} (0 = {val_class_weights[0]:.2f}, 1 = {val_class_weights[1]:.2f})')
    print('='*20)


In [11]:
def compute_metrics_with_estimator(
    estimator, X, y_labels, col, vectorizer_name, classifier_name,
    cv=cv, return_train_score=None, results_save_path=results_save_path, print_enabled=None
):
    if return_train_score is None:
        return_train_score = True
    if print_enabled is None:
        print_enabled = True

    # Using estimator
    # Cross Validation
    if print_enabled:
        print('-'*20)
        print('Computing metrics using estimator.')
        print('-'*20)
        print('Cross Validating without scoring.')
    cv_score_noscoring = cross_validate(
        estimator,
        X,
        y_labels,
        cv=cv,
        scoring=None,
        return_train_score=return_train_score,
    )

    # Cross Validation with scoring
    if print_enabled:
        print('-'*20)
        print(f'Cross Validating with {scores} scoring.')
    cv_score_recall = cross_validate(
        estimator,
        X,
        y_labels,
        cv=cv,
        scoring=scores,
        return_train_score=return_train_score,
    )

    # Get mean and std of cross validation scores
    if print_enabled:
        print('-'*20)
        print('Getting mean and std of cross validation scores.')
    cv_train_scores = cv_score_noscoring['train_score'].mean()
    cv_test_scores = cv_score_noscoring['test_score'].mean()
    cv_train_recall = cv_score_recall['train_recall'].mean()
    cv_test_recall = cv_score_recall['test_recall'].mean()
    cv_train_explained_variance_recall = cv_score_recall['train_explained_variance'].mean()
    cv_test_explained_variance_recall = cv_score_recall['test_explained_variance'].mean()

    # Save cross validation scores to dataframe
    if print_enabled:
        print('-'*20)
        print('Saving cross validation scores to dataframe.')
    df_cv_score_noscoring = pd.DataFrame(cv_score_noscoring)
    df_cv_score_noscoring.to_pickle(f'{results_save_path}df_cv_score_noscoring - {col}_{vectorizer_name}_{classifier_name}.pkl')
    df_cv_score_recall = pd.DataFrame(cv_score_recall)
    df_cv_score_recall.to_pickle(f'{results_save_path}df_cv_score_recall - {col}_{vectorizer_name}_{classifier_name}.pkl')

    return (
        df_cv_score_recall,
        cv_train_scores, cv_test_scores,
        cv_train_recall, cv_test_recall,
        cv_train_explained_variance_recall, cv_test_explained_variance_recall
    )


In [12]:
def plot_metrics_with_estimator(
    estimator, X, y_labels, col, vectorizer_name, classifier_name, random_state=random_state, n_jobs=n_jobs, cv=cv,
    params=None, axis=None, alpha=None, verbose=None, print_enabled=None
):
    if axis is None:
        axis = 1
    if alpha is None:
        alpha = 0.1
    if verbose is None:
        verbose=1
    if print_enabled is None:
        print_enabled = True

    # Make param names and values
    if params is None:
        params = {
            param_name: classifier_params
            for classifier_name, classifier_and_params in classifiers_pipe.items()
            if estimator[-1].__class__.__name__ == classifier_name
            for param_name_, classifier_params in classifier_and_params[-1].items()
            for param_name in [param_name_.split(f'{classifier_name}__')[-1]]
            if param_name != 'random_state' and all(isinstance(n, (list, int, float)) for n in classifier_params) and not all(isinstance(n, (bool)) for n in classifier_params)
        }


    # Learning Curves
    print('Plotting Learning Curve.')
    print('-'*20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator=estimator,
        X=X,
        y=y_labels,
        cv=cv,
        n_jobs=n_jobs,
        random_state=random_state,
        shuffle=True,
        scoring=scoring,
        verbose=verbose,
        # train_sizes=np.linspace(0.1, 1.0, 10),
    )
    train_scores_mean = np.mean(train_scores, axis=axis)
    train_scores_std = np.std(train_scores, axis=axis)
    test_scores_mean = np.mean(test_scores, axis=axis)
    test_scores_std = np.std(test_scores, axis=axis)

    close_plots()
    plt.figure()
    plt.title(
        f'{col} - Learning Curves for {scoring.title()} - {vectorizer_name} + {classifier_name}'
        )
    plt.xlabel('Training examples')
    plt.ylabel('Score')
    plt.grid()
    plt.fill_between(
        train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=alpha, color='r'
    )
    plt.fill_between(
        train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=alpha, color='g'
    )
    plt.plot(
        train_sizes, train_scores_mean, 'o-', color='r', label='Training score'
    )
    plt.plot(
        train_sizes, test_scores_mean, 'o-', color='g', label='Cross-validation score'
    )
    plt.legend(loc='best')
    fig = plt.gcf()
    fig.tight_layout()

    # Save figure
    for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
        save_path = f'{plot_save_path}{method} {col} - Learning Curve - {vectorizer_name} + {classifier_name}.{image_save_format}'
        print(f'Saving Learning Curve at {save_path}.')
        fig.savefig(save_path, format=image_save_format)
    if print_enabled: show_and_close_plots()

    # Validation Curve
    for param_name, param_range in params.items():
        param_title = ' '.join(param_name.split('_')).title()
        print(f'Plotting Validation Curve for {param_title}.')
        print('-'*20)
        train_scores, test_scores = validation_curve(
            estimator=estimator[-1],
            X=X,
            y=y_labels,
            param_name=param_name,
            param_range=param_range,
            cv=cv,
            n_jobs=n_jobs,
            scoring=scorers['recall_score'],
            verbose=1,
        )
        train_scores_mean = np.mean(train_scores, axis=axis)
        train_scores_std = np.std(train_scores, axis=axis)
        test_scores_mean = np.mean(test_scores, axis=axis)
        test_scores_std = np.std(test_scores, axis=axis)

        # Ploting
        plt.figure()
        plt.title(
            f'{col} - Validation Curve for {scoring.title()} - {col} - {vectorizer_name} + {classifier_name}'
        )
        plt.xlabel(param_name)
        plt.ylabel('Score')
        plt.grid()
        plt.fill_between(
            param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=alpha, color='r'
        )
        plt.fill_between(
            param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=alpha, color='g'
        )
        plt.semilogx(
            param_range, train_scores_mean, label='Training score', color='r'
        )
        plt.semilogx(
            param_range, test_scores_mean, label='Cross-validation score', color='g'
        )
        plt.plot(
            param_range, train_scores_mean, 'o-', color='r', label='Training score'
        )
        plt.plot(
            param_range, test_scores_mean, 'o-', color='g', label='Cross-validation score'
        )
        plt.legend(loc='best')
        fig = plt.gcf()
        fig.tight_layout()

        # Save figure
        for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
            save_path = f'{plot_save_path}{method} {col} - Validation Curve for {scoring.title()} - {vectorizer_name} + {classifier_name}.{image_save_format}'
            print(f'Saving Validation Curve at {save_path}')
            fig.savefig(
                save_path, format=image_save_format
            )
        if print_enabled: show_and_close_plots()


In [13]:
def compute_metrics_with_y_pred(
    y_labels, y_pred,
    pos_label=None, labels=None, zero_division=None, alpha=None, print_enabled=None
):
    if pos_label is None:
        pos_label = 1
    if labels is None:
        labels = np.unique(y_pred)
    if zero_division is None:
        zero_division = 0
    if alpha is None:
        alpha = 0.1
    if print_enabled is None:
        print_enabled = True

    if print_enabled:
        print('-'*20)
        print('Computing metrics using y_pred.')
    # Using y_pred
    explained_variance = metrics.explained_variance_score(y_labels, y_pred)
    accuracy = metrics.accuracy_score(y_labels, y_pred)
    balanced_accuracy = metrics.balanced_accuracy_score(y_labels, y_pred)
    precision = metrics.precision_score(y_labels, y_pred, pos_label=pos_label, labels=labels, zero_division=zero_division)
    recall = metrics.recall_score(y_labels, y_pred, pos_label=pos_label, labels=labels, zero_division=zero_division)
    f1 = metrics.f1_score(y_labels, y_pred, pos_label=pos_label,labels=labels, zero_division=zero_division)
    mcc = metrics.matthews_corrcoef(y_labels, y_pred)
    brier = metrics.brier_score_loss(y_labels, y_pred)
    fm = metrics.fowlkes_mallows_score(y_labels, y_pred)
    r2 = metrics.r2_score(y_labels, y_pred)
    kappa = metrics.cohen_kappa_score(y_labels, y_pred, labels=labels)
    gmean_iba = imblearn.metrics.make_index_balanced_accuracy(alpha=alpha, squared=True)(geometric_mean_score)
    gmean = gmean_iba(y_labels, y_pred)
    report = metrics.classification_report(y_labels, y_pred, labels=labels, zero_division=zero_division)
    imblearn_report = classification_report_imbalanced(y_labels, y_pred, labels=labels, zero_division=zero_division)
    cm = metrics.confusion_matrix(y_labels, y_pred, labels=labels)
    cm_normalized = metrics.confusion_matrix(y_labels, y_pred, normalize='true', labels=labels)

    return (
        explained_variance, accuracy, balanced_accuracy, precision,
        recall, f1, mcc, brier, fm, r2, kappa, gmean, report, imblearn_report, cm, cm_normalized
    )


In [14]:
def plot_metrics_with_y_pred(
    y_labels, y_pred, col, vectorizer_name, classifier_name,
    pos_label=None, labels=None, print_enabled=None
):
    if pos_label is None:
        pos_label = 1
    if labels is None:
        labels = np.unique(y_pred)
    if print_enabled is None:
        print_enabled = True

    # Displays
    close_plots()
    cm_curve = metrics.ConfusionMatrixDisplay.from_predictions(
        y_labels, y_pred, display_labels=labels, cmap=plt.cm.Grays, colorbar=True
    )
    cm_normalized_curve = metrics.ConfusionMatrixDisplay.from_predictions(
        y_labels, y_pred, normalize='true', display_labels=labels, cmap=plt.cm.Grays, colorbar=True
    )
    roc_curve = metrics.RocCurveDisplay.from_predictions(
        y_labels, y_pred, pos_label=pos_label, color='black'
    )
    pr_curve = metrics.PrecisionRecallDisplay.from_predictions(
        y_labels, y_pred, pos_label=pos_label, color='black'
    )
    calibration_curve = CalibrationDisplay.from_predictions(
        y_labels, y_pred, pos_label=pos_label, color='black'
    )
    if print_enabled: show_and_close_plots()

    # Plots
    plots_dict = {
        'Confusion Matrix': cm_curve,
        'Normalized Confusion Matrix': cm_normalized_curve,
        'ROC Curve': roc_curve,
        'Precision-Recall Curve': pr_curve,
        'Calibration Curve': calibration_curve,
    }

    print('=' * 20)
    close_plots()
    print('Plotting metrics with y_pred:')
    print('='*20)

    for plot_name, plot_ in tqdm.tqdm(plots_dict.items()):
        close_plots()
        print(f'Plotting {plot_name}:')
        fig, ax = plt.subplots()
        ax.set_title(
            f'{col} - {plot_name} - {vectorizer_name} + {classifier_name}'
            )
        if plot_name == 'ROC Curve':
            ax.plot([0, 1], [0, 1], 'r--', lw=1)
        try:
            plot_.plot(ax=ax, cmap=plt.cm.Grays)
        except Exception:
            plot_.plot(ax=ax, color='black')
        print('=' * 20)
        fig = plt.gcf()
        fig.tight_layout()

        # Save Plots
        for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
            save_path = f'{plot_save_path}{method} {col} - {plot_name} - {vectorizer_name} + {classifier_name}.{image_save_format}'
            print(f'Saving {plot_name} at {save_path}')
            try:
                fig.savefig(
                    save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
                )
            except Exception:
                print(f'Failed to save {plot_name}!')
                print('=' * 20)
            else:
                print(f'Saved {plot_name}!')
                print('=' * 20)
        if print_enabled: show_and_close_plots()

    # with contextlib.suppress(AttributeError):
    # Visualisation with plot_metric
    bc = plot_metric_functions.BinaryClassification(y_labels, y_pred, labels=[0, 1], matplotlib_style='grayscale', seaborn_style='whitegrid')

    # Figures
    close_plots()
    fig = plt.figure(figsize=(15, 10))
    fig.suptitle(f'{col} - {vectorizer_name} + {classifier_name}')
    plt.subplot2grid((2, 6), (1, 1), colspan=2)
    bc.plot_confusion_matrix(colorbar=True, cmap=plt.cm.Grays)
    plt.subplot2grid((2, 6), (1, 3), colspan=2)
    bc.plot_confusion_matrix(normalize=True, colorbar=True, cmap=plt.cm.Grays)
    plt.subplot2grid(shape=(2, 6), loc=(0, 0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2, 6), (0, 2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2, 6), (0, 4), colspan=2)
    bc.plot_class_distribution()
    bc.print_report()
    fig = plt.gcf()
    fig.tight_layout()

    # Save Plots
    for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
        save_path = f'{plot_save_path}{method} {col} - plot_metric Curves - {vectorizer_name} + {classifier_name}.{image_save_format}'
        print(f'Saving plot_metric Curves at {save_path}')
        fig.savefig(
            save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
        )
    if print_enabled: show_and_close_plots()

    # Heatmap
    print('Plotting Heatmap:')
    close_plots()
    classifications_dict = defaultdict(int)
    for _y_labels, _y_pred in zip(y_labels, y_pred):
        if _y_labels != _y_pred:
            classifications_dict[(_y_labels, _y_pred)] += 1

    dicts_to_plot = [
        {
            f'True {col} value': _y_labels,
            f'Predicted {col} value': _y_pred,
            'Number of Classifications': _count,
        }
        for (_y_labels, _y_pred), _count in classifications_dict.items()
    ]
    df_to_plot = pd.DataFrame(dicts_to_plot)
    df_wide = df_to_plot.pivot_table(
        index=f'True {col} value',
        columns=f'Predicted {col} value',
        values='Number of Classifications'
    )
    plt.figure(figsize=(9,7))
    sns.set(style='ticks', font_scale=1.2)
    sns.heatmap(df_wide, linewidths=1, cmap=plt.cm.Grays, annot=True)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.title(f'{col} Heatmap - {vectorizer_name} + {classifier_name}')
    fig = plt.gcf()
    fig.tight_layout()

    # Save Heatmap
    for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
        save_path = f'{plot_save_path}{method} {col} - Heatmap - {vectorizer_name} + {classifier_name}.{image_save_format}'
        print(f'Saving Heatmap at {save_path}')
        fig.savefig(
            save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
        )
    print('Saved Heatmap!')
    if print_enabled: show_and_close_plots()


In [None]:
def plot_metrics_with_y_pred_and_estimator(
    estimator, X, y_labels, y_pred, col, vectorizer_name, classifier_name,
    pos_label=None, labels=None, print_enabled=None
):
    # Make pipe
    pipe = make_pipeline(estimator.steps[0][1], TruncatedSVD(n_components=2))

    # Apply fit_transform on texts
    X_viz = pipe.fit_transform(X)

    # Plot
    close_plots()
    altair_plt = plot_text(
        X_viz,
        X,
        color_array=y_pred,
        color_words=[
            stemmer.stem(word.lower())
            for word in warmth_competence_words[col]
        ],
    )

    # Save Text Clusters
    title = f'{col} - Text Clusters - {vectorizer_name} + {classifier_name}'
    altair_plt.title = title
    save_path = f'{plot_save_path}{method} {title}'
    print(f'Saving Text Clusters at {save_path}')
    altair_plt.save(f'{save_path}.html')
    Html2Image(output_path=plot_save_path, figsize=(2644, 1604)).screenshot(
        html_file=f'{save_path}.html', save_as=f'{save_path.split(plot_save_path)[-1]}.png'
    )
    print('Saved Text Clusters!')


In [15]:
def compute_metrics_with_y_pred_prob(
    y_labels, y_pred_prob,
    pos_label=None,
    print_enabled=None
):
    if pos_label is None:
        pos_label = 1
    if print_enabled is None:
        print_enabled = True

    if print_enabled:
        print('-'*20)
        print('Computing metrics using y_pred_prob.')
    average_precision = metrics.average_precision_score(y_labels, y_pred_prob)
    roc_auc = metrics.roc_auc_score(y_labels, y_pred_prob)
    fpr, tpr, threshold = metrics.roc_curve(y_labels, y_pred_prob, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    loss = metrics.log_loss(y_labels, y_pred_prob)
    precision_pr, recall_pr, threshold_pr = metrics.precision_recall_curve(y_labels, y_pred_prob, pos_label=1)

    return (
        average_precision, roc_auc, auc,
        fpr, tpr, threshold, loss,
        precision_pr, recall_pr, threshold_pr
    )


In [16]:
def compute_metrics_all(
    estimator, X, y_labels, y_pred, y_pred_prob,
    col, vectorizer_name, classifier_name, cv=cv, n_jobs=n_jobs,
    with_estimator=None, with_y_pred=None, with_y_pred_prob=None,
    pos_label=None, verbose=None, print_enabled=None
):
    if pos_label is None:
        pos_label = 1
    if verbose is None:
        verbose = 1
    if with_estimator is None:
        with_estimator = True
    if with_y_pred is None:
        with_y_pred = True
    if with_y_pred_prob is None:
        with_y_pred_prob = True
    if print_enabled is None:
        print_enabled = True

    # Get metrics
    print('='*20)
    # Using estimator
    if with_estimator:
        (
            df_cv_score_recall,
            cv_train_scores, cv_test_scores,
            cv_train_recall, cv_test_recall,
            cv_train_explained_variance_recall, cv_test_explained_variance_recall
        ) = compute_metrics_with_estimator(
             estimator, X, y_labels, col, vectorizer_name, classifier_name, print_enabled=print_enabled
        )
    # Using y_pred
    if with_y_pred:
        (
            explained_variance, accuracy, balanced_accuracy, precision,
            recall, f1, mcc, brier, fm, r2, kappa, gmean, report, imblearn_report, cm, cm_normalized
        ) = compute_metrics_with_y_pred(
            y_labels, y_pred, print_enabled=print_enabled
        )
    # Using y_pred_prob
    if with_y_pred_prob:
        (
            average_precision, roc_auc, auc,
            fpr, tpr, threshold, loss,
            precision_pr, recall_pr, threshold_pr
        ) = compute_metrics_with_y_pred_prob(
            y_labels, y_pred_prob, print_enabled=print_enabled
        )

    #Place metrics into dict
    if print_enabled:
        print('-'*20)
        print('Appending metrics to dict.')
    metrics_dict = {
        f'{scoring.title()} Best Score': float(best_train_score),
        f'{scoring.title()} Best Threshold': threshold,
        'Train - Mean Cross Validation Score': float(cv_train_scores),
        f'Train - Mean Cross Validation - {scoring.title()}': float(cv_train_recall),
        f'Train - Mean Explained Variance - {scoring.title()}': float(cv_train_explained_variance_recall),
        'Test - Mean Cross Validation Score': float(cv_test_scores),
        f'Test - Mean Cross Validation - {scoring.title()}': float(cv_test_recall),
        f'Test - Mean Explained Variance - {scoring.title()}': float(cv_test_explained_variance_recall),
        'Explained Variance': float(explained_variance),
        'Accuracy': float(accuracy),
        'Balanced Accuracy': float(balanced_accuracy),
        'Precision': float(precision),
        'Average Precision': float(average_precision),
        'Recall': float(recall),
        'F1-score': float(f1),
        'Matthews Correlation Coefficient': float(mcc),
        'Brier Score': float(brier),
        'Fowlkes–Mallows Index': float(fm),
        'R2 Score': float(r2),
        'ROC': float(roc_auc),
        'AUC': float(auc),
        'Log Loss/Cross Entropy': float(loss),
        'Cohen’s Kappa': float(kappa),
        'Geometric Mean': float(gmean),
        'Classification Report': report,
        'Imbalanced Classification Report': str(imblearn_report),
        'Confusion Matrix': str(cm),
        'Normalized Confusion Matrix': str(cm_normalized),
    }
    if print_enabled: print('Done appending metrics to dict.')

    return (
        metrics_dict, df_cv_score_recall,
        cv_train_scores, cv_test_scores,
        cv_train_recall, cv_test_recall,
        cv_train_explained_variance_recall, cv_test_explained_variance_recall,
    )


In [17]:
def plot_metrics(
    estimator, X, y_labels, y_pred, y_pred_prob,
    col, vectorizer_name, classifier_name,
    with_estimator=None, with_y_pred=None, with_y_pred_prob=None, print_enabled=None
):
    if with_estimator is None:
        with_estimator = True
    if with_y_pred is None:
        with_y_pred = True
    if with_y_pred_prob is None:
        with_y_pred_prob = True
    if print_enabled is None:
        print_enabled = True

    # Plotting
    print('~'*20)
    print('Plotting metrics.')
    print('~'*20)
    # Using estimator
    if with_estimator:
        plot_metrics_with_estimator(
             estimator, X, y_labels, col, vectorizer_name, classifier_name, print_enabled=print_enabled
        )
    # Using y_pred
    if with_y_pred:
        plot_metrics_with_y_pred(
            y_labels, y_pred, col, vectorizer_name, classifier_name, print_enabled=print_enabled
        )
    # Using y_pred and estimator
    if with_y_pred and with_estimator:
        plot_metrics_with_y_pred_and_estimator(
             estimator, X, y_labels, y_pred, col, vectorizer_name, classifier_name, print_enabled=print_enabled
        )
    print('='*20)


In [18]:
def examine_predictions(
    X_test, y_test, y_test_pred, col
):
    # Examine predictions
    print('~'*20)
    print(f'Examining predictions for {col}')
    print('Incorrectly Classified Reviews:')
    for _y_test, _y_test_pred, _X_test in random.sample(list(zip(y_test, y_test_pred, X_test)), 5):
        if _y_test != _y_test_pred:
            print('-'*20)
            print(f'TRUE LABEL: {_y_test}')
            print(f'PREDICTED LABEL: {_y_test_pred}')
            print(f'REVIEW TEXT: {_X_test[:100]}')
            print('-'*20)
    print('~'*20)


In [19]:
def evaluation(
    estimator, X, y_labels, y_pred, y_pred_prob,
    best_train_score, df_metrics,
    col, vectorizer_name, classifier_name, scorig=scoring, plot_enabled=None, print_enabled=None
):
    if plot_enabled is None:
        plot_enabled = True
    if print_enabled is None:
        print_enabled = True

    # Get metrics dict
    (
        metrics_dict, df_cv_score_recall,
        cv_train_scores, cv_test_scores,
        cv_train_recall, cv_test_recall,
        cv_train_explained_variance_recall, cv_test_explained_variance_recall,
    ) = compute_metrics_all(
        estimator, X, y_labels, y_pred, y_pred_prob,
        col, vectorizer_name, classifier_name, print_enabled=print_enabled
    )

    # Print metrics
    print('=' * 20)
    print('~' * 20)
    print(f' Testing Metrics for {col} - {vectorizer_name} + {classifier_name}')
    print('~' * 20)
    print(f'Classification Report:\n {metrics_dict["Classification Report"]}')
    print('-' * 20)
    for metric_name, metric_value in metrics_dict.items():
        if 'Threshold' not in metric_name:
            with contextlib.suppress(TypeError, ValueError):
                metric_value = float(metric_value)
            if isinstance(metric_value, (int, float)):
                df_metrics.loc[
                    (classifier_name), (col, vectorizer_name, metric_name)
                ] = metric_value
                print(f'{metric_name}: {round(metric_value, 2)}')
            else:
                print(f'{metric_name}:\n{metric_value}')
                df_metrics.loc[
                    (classifier_name), (col, vectorizer_name, metric_name)
                ] = str(metric_value)
            print('-' * 20)

    print('=' * 20)

    if plot_enabled:
        # Plot Metrics
        plot_metrics(
            estimator, X, y_labels, y_pred, y_pred_prob,
            col, vectorizer_name, classifier_name,
        )

    return df_metrics, metrics_dict, df_cv_score_recall


In [20]:
def prob_confirmatory_tests(y_pred, y_pred_prob):

    # Confirmatory Regression
    print('+'*20)
    print('Confirmatory Tests validating the linear relationship between y_pred and y_pred_prob')
    print('-'*20)
    print('T-Test y_pred_prob ~ y_pred:')
    levene = scipy.stats.levene(y_pred_prob, y_pred)
    equal_var_levene = levene.pvalue < 0.05
    print(scipy.stats.ttest_ind(y_pred_prob, y_pred, equal_var=equal_var_levene))


    print('\n')
    print('-'*20)
    print('Logit y_pred ~ y_pred_prob:')
    try:
        logit_model = sm.Logit(endog=y_pred, exog=y_pred_prob)
        logit_results = logit_model.fit()
        std_coef = logit_results.params[0] / np.std(y_pred_prob)
        std_err = logit_results.bse[0]
        log_likelihood = logit_results.llf
        print(logit_results.summary())
        print('-'*20)
        print(f'Std Coef: {std_coef}')
        print(f'Std Err: {std_err}')
        print(f'Log Likelihood: {log_likelihood}')
    except Exception as e:
        print(type(e).__name__)

    print('-'*20)
    print('\n')
    print('-'*20)
    print('OLS y_pred_prob ~ y_pred:')
    try:
        ols_model = sm.OLS(endog=y_pred_prob, exog=y_pred)
        ols_results = ols_model.fit()
        std_coef = ols_results.params[0] / np.std(y_pred)
        std_err = ols_results.bse[0]
        print(ols_results.summary())
        print('-'*20)
        print(f'Std Coef: {std_coef}')
        print(f'Std Err: {std_err}')
    except Exception as e:
        print(type(e).__name__)

    print('-'*20)
    print('+'*20)
    print('\n')


In [21]:
def save_fitted_estimator(
    estimator,
    col, vectorizer_name, classifier_name,
    protocol=None,
    results_save_path=results_save_path,
    method=method, done_xy_save_path=done_xy_save_path,
    path_suffix=None, data_dict=None,
    compression=None,
):
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if path_suffix is None:
        path_suffix = f' - {col} - {vectorizer_name} + {classifier_name} (Save_protocol={protocol}).pkl'
    if data_dict is None:
        data_dict = {}
    if compression is None:
        compression = False

    # Save fitted estimator
    print('~'*20)
    print(f'Saving fitted estimator {classifier_name} at {results_save_path}')
    with open(
        f'{results_save_path}{method} Fitted Estimator{path_suffix}', 'wb'
    ) as f:
        joblib.dump(estimator, f, compress=compression, protocol=protocol)
    print('~'*20)


In [22]:
# Function to normalize unusual classifiers after fitting
def normalize_after_fitting(estimator, X_train, y_train, X_test, y_test, grid_search, searchcv, vectorizer_name, classifier_name):
    # Classifiers to normalize = ['GaussianNB', 'DecisionTreeClassifier', 'RandomForestClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'Perceptron', 'Sequential']

    # Get feature importance if classifier provides them and use as X
    if any(hasattr(estimator, feature_attr) for feature_attr in ['feature_importances_', 'coef_']):
        feature_selector = SelectFromModel(estimator, prefit=True)
        X_train = feature_selector.transform(X_train)
        X_test = X_test[:, feature_selector.get_support()]
        df_feature_importances = pd.DataFrame(
            {
                'features': X_test.values,
                'feature_importances': estimator.feature_importances_
            }
        )
        df_feature_importances = df_feature_importances.sort_values('feature_importances', ascending=False)
        print(df_feature_importances.head(20))
        print(f'Best estimator has feature_importances of shape:\n{estimator}')
    else:
        df_feature_importances = None

    # For perceptron: calibrate classifier to get prediction probabilities
    if (not hasattr(searchcv, 'predict_proba') and not hasattr(searchcv, '_predict_proba_lr') and hasattr(searchcv, 'decision_function')) or classifier_name == 'Perceptron' or estimator.__class__.__name__ == 'Perceptron':
        path_suffix = f' - {col} - {vectorizer_name} + {classifier_name} (Save_protocol={protocol}).pkl'
        data_dict = {
            'Estimator': estimator,
            'Grid Search': grid_search,
            'SearchCV': searchcv,
        }
        for file_name, file_ in data_dict.items():
            with open(
                f'{results_save_path}{method} {file_name}{path_suffix}', 'wb'
            ) as f:
                joblib.dump(file_, f, compress=False, protocol=pickle.HIGHEST_PROTOCOL)
            print(f'Saved {method} {file_name}{path_suffix} to {results_save_path}')

        searchcv = CalibratedClassifierCV(
            searchcv, cv=cv, method='sigmoid'
        ).fit(X_train, y_train)

    # For Sequential classifier: compile for binary classification, optimize with adam and score on recall
    if classifier_name == 'Sequential':
        searchcv.compile(
            loss='binary_crossentropy', optimizer='adamw', metrics=list(scoring)
        ).fit(X_train, y_train)

    return (
        estimator, X_train, y_train, X_test, y_test, searchcv, df_feature_importances
    )


In [23]:
def save_table(
    df_metrics,
    col, vectorizer_name, classifier_name, protocol,
    table_save_path=table_save_path,
    method=method, save_name=None,
    compression=None,
    path_suffix=None,
):
    if save_name is None:
        save_name = f'{method} Estimators Table'
    if compression is None:
        compression = False
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if path_suffix is None:
        path_suffix = f' - {col} - {vectorizer_name} + {classifier_name} (Save_protocol={protocol}).pkl'

    # Save metrics df
    save_path = f'{table_save_path}{save_name}'
    print(f'Saving fitted estimator and table at {save_path}')
    df_metrics.to_csv(f'{save_path}.csv')
    df_metrics.to_pickle(f'{save_path}.pkl')
    df_metrics.to_excel(f'{save_path}.xlsx')
    df_metrics.style.to_latex(f'{save_path}.tex', hrules=True)
    df_metrics.to_markdown(f'{save_path}.md')
    df_metrics.to_html(f'{save_path}.html')

    print('Done saving fitted estimator and table!')


In [24]:
def get_completed_estimators(
    results_save_path=results_save_path, method=method, classifiers_pipe=classifiers_pipe,
    estimators_list=None, used_classifiers=None,
):
    if estimators_list is None:
        estimators_list = []
    if used_classifiers is None:
        used_classifiers = []

    for estimator_path in glob.glob(f'{results_save_path}{method} Estimator - *.pkl'):
        classifier_name = estimator_path.split(f'{results_save_path}{method} ')[1].split(' + ')[1].split(' (Save_protocol=')[0]
        used_classifiers.append(classifier_name)
        with open(estimator_path, 'rb') as f:
            estimators_list.append(joblib.load(f))

    return estimators_list


In [25]:
def comparison_plots(
    estimators_list, X_test, y_test, col,
    curves_dict=None, cmap=plt.cm.Grays
):

    curves_dict = {
        'ROC Curve': metrics.RocCurveDisplay,
        'Precision Recall Curve': metrics.PrecisionRecallDisplay,
        # 'Calibration Curve': CalibrationDisplay,
        # 'Validation Curve': ValidationCurveDisplay,
        # 'Learning Curve': LearningCurveDisplay,
    }

    assert len(estimators_list) != 0

    for curve_name, curve_package in curves_dict.items():
        print('-' * 20)
        print(f'{col} - {str(curve_name)}')
        fig, ax = plt.subplots()
        ax.set_title(f'{col} - {str(curve_name)}')
        for estimator in estimators_list:
            try:
                curve = curve_package.from_estimator(
                    estimator, X_test, y_test, pos_label=1, ax=ax, cmap=cmap,
                    name=f'{estimator.steps[0][0]} + {estimator.steps[1][0]} + {estimator.steps[-1][0]}'
                )
            except AttributeError:
                curve = curve_package.from_estimator(
                    estimator, X_test, y_test, pos_label=1, ax=ax, color='black',
                    name=f'{estimator.steps[0][0]} + {estimator.steps[1][0]} + {estimator.steps[-1][0]}'
                )
        show_and_close_plots()

        # Save Plots
        for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
            save_path = f'{plot_save_path}{method} {col} - All {str(curve_name)}s.{image_save_format}'
            print(f'Saving {curve_name} at {save_path}')
            curve.figure_.savefig(
                save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
            )


# Evaluating

In [26]:
%%time
print('#'*40)
print('Starting!')
print('#'*40)

method = 'Supervised'
analysis_columns = ['Warmth', 'Competence']
text_col = 'Job Description spacy_sentencized'

# Get existing estimators
estimator_names_list = get_existing_files()
done_estimators = glob.glob(f'{done_xy_save_path}*')
done_files = [
    'df_train_data', 'df_test_data', 'df_val_data', 'df_cv_results', 'Grid Search', 'SearchCV'
]

# Identify cols, vectorizers and classifiers
for estimators_file in tqdm.tqdm(glob.glob(f'{results_save_path}{method} Estimator - *.pkl')):
    assert f'{method} Estimator - ' in estimators_file, f'Estimators file name {estimators_file} does not contain {method} Estimator - '
    estimate_file_name = estimators_file.split(f'{method} Estimator - ')[-1]

    # Skip fitted estimators
    fitted_estimators = [fitted_estimators_file.split(f'{method} Fitted Estimator - ')[-1] for fitted_estimators_file in tqdm.tqdm(glob.glob(f'{results_save_path}{method} Fitted Estimator - *.pkl'))]
    if estimate_file_name in fitted_estimators and skip_fitted_estimators is True:
        print(f'Estimator {estimate_file_name} already fitted! Skipping...')
        continue

    # Specify col, vectorizer and classifier
    col = estimate_file_name.split(' - ')[0]
    vectorizer_name = estimators_file.split(f'{col} - ')[-1].split(' + ')[0]
    classifier_name = estimators_file.split(f'{vectorizer_name} + ')[-1].split(' (Save_protocol=')[0]
    protocol = int(estimators_file.split(f'{vectorizer_name} + ')[-1].split(' (Save_protocol=')[-1].split(').pkl')[0])

    # Load Table DF
    df_metrics = make_df_metrics(
        vectorizers_pipe=vectorizers_pipe, classifiers_pipe=classifiers_pipe, transformers_pipe=transformers_pipe,
        metrics_list=metrics_dict,
        col=col, vectorizer_name=vectorizer_name, classifier_name=classifier_name, protocol=protocol
    )

    print('~'*20)
    print(f'Loading data for {col} - {vectorizer_name} + {classifier_name}')
    print('~'*20)
    # Load X, y, search_cv, estimator
    try:
        (
            grid_search, searchcv,
            X_train, y_train, y_train_pred,
            X_test, y_test, y_test_pred, y_test_pred_prob,
            X_val, y_val, y_val_pred, y_val_pred_prob,
            df_feature_importances, df_cv_results, estimator,
        ) = load_Xy_search_cv_estimator(
            col, vectorizer_name, classifier_name, protocol
        )
    except:
        continue

    if searchcv.__class__.__name__ != 'CalibratedClassifierCV':
        print('-'*20)
        print(f'{"="*30} EVALUATING DATASET OF LENGTH {len(X_train)+len(X_test)+len(X_val)} ON {col.upper()} {"="*30}')
        print('='*20)
        print(
            f'GridSearch - Best mean train score: M = {float(best_mean_train_score:=searchcv.cv_results_["mean_train_score"][best_index:=searchcv.best_index_]):.2f}, SD = {int(best_std_train_score:=searchcv.cv_results_["std_train_score"][best_index]):.2f}\n'
        )
        print(
            f'GridSearch - Best mean test score: M = {float(best_mean_test_score:=searchcv.cv_results_["mean_test_score"][best_index]):.2f}, SD = {int(best_std_test_score:=searchcv.cv_results_["std_test_score"][best_index]):.2f}\n'
        )
        print(
            f'Best train score for {scoring}: {float(best_train_score:=searchcv.best_score_):.2f}\n'
        )
        print(
            f'Best test score for {scoring}: {float(best_test_score:=searchcv.score(X_test, y_test)):.2f}\n'
        )
        print(
            f'Number of splits: {int(n_splits:=searchcv.n_splits_)}\n'
        )
        print(
            f'Best estimator:\n{searchcv.best_estimator_}\n'
        )
        print(
            f'Best estimator and parameters:\n{searchcv.best_params_}\n'
        )
        print(
            f'Testing Classification Report:\n{(train_report:=metrics.classification_report(y_test, y_test_pred, labels=np.unique(y_test_pred), zero_division=0))}\n'
        )
        # Examine predictions
        examine_predictions(
            X_test, y_test, y_test_pred, col
        )
        print('='*20)
        # Train and Test Confusion Matrix
        print('='*20)
        print('Train and Test Confusion Matrix:\n')
        close_plots()
        fig, axs = plt.subplots(1, 2)
        fig.suptitle(f'{col} - Train and Test Confusion Matrix - {vectorizer_name} + {classifier_name}')
        for ax in axs:
            ax.set_aspect('equal')
        train_cm = metrics.ConfusionMatrixDisplay.from_estimator(
            estimator, X_train, y_train, normalize='true', ax=axs[0], cmap=plt.cm.Grays, colorbar=False
        )
        train_cm.ax_.set_title('training Data')
        test_cm = metrics.ConfusionMatrixDisplay.from_estimator(
            estimator, X_test, y_test, normalize='true', ax=axs[1], cmap=plt.cm.Grays, colorbar=False
        )
        test_cm.ax_.set_title('Testing Data')
        plt.tight_layout()
        for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
            save_path = f'{plot_save_path}{method} {col} - Train and Test Confusion Matrix - {vectorizer_name} + {classifier_name}.{image_save_format}'
            print(f'Train and Test Confusion Matrix plot at {save_path}')
            fig.savefig(
                save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
            )
        show_and_close_plots()
        print('='*20)
        # Train and Test ROC Curve
        print('='*20)
        print('Train and Test Scores in K Folds Cross Validation:')
        close_plots()
        fig = plt.figure(figsize=(10, 5))
        plt.title(f'K-folds Cross Validation Train vs. Test Scores for {col} - {vectorizer_name} + {classifier_name}')
        plt.plot(searchcv.cv_results_["mean_train_score"], label='Train Scores')
        plt.plot(searchcv.cv_results_["mean_test_score"], label='Test Scores')
        plt.legend(loc='best')
        plt.xlabel('Cross Validation Steps Over K Number of Folds')
        plt.ylabel('Recall Score')
        fig.text(0.1, 0.01, '*Number of folds used (K) = 10', ha='center', va='center', fontsize=10)
        for image_save_format in tqdm.tqdm(['eps', 'png', 'svg']):
            save_path = f'{plot_save_path}{method} {col} - Train and Test Scores in K Folds Cross Validation - {vectorizer_name} + {classifier_name}.{image_save_format}'
            print(f'K Folds plot at {save_path}')
            fig.savefig(
                save_path, format=image_save_format, dpi=3000, bbox_inches='tight'
            )
        show_and_close_plots()
        print('='*20)

        if evaluate_estimator_on_concat:
            # Fit estimator
            print('~'*20)
            print('Fitting best params to estimator')
            X = np.concatenate((X_train, X_val), axis=0)
            y_labels = np.concatenate((y_train, y_val), axis=0)
            estimator = estimator.set_params(**searchcv.best_params_)
            estimator.fit(X, y)

            # Normalize Xy for unusual classifiers after fitting
            (
                estimator, X, y_labels, X_test, y_test, searchcv, df_feature_importances
            ) = normalize_after_fitting(
                estimator, X, y_labels, X_test, y_test, grid_search, searchcv, vectorizer_name, classifier_name
            )

            # Set prediction probability attribute
            if hasattr(estimator, 'predict_proba'):
                searchcv_predict_attr = estimator.predict_proba
            elif hasattr(estimator, '_predict_proba_lr'):
                searchcv_predict_attr = estimator._predict_proba_lr

            # Get predictions and probabilities
            y_pred = estimator.predict(X_test)
            y_pred_prob = searchcv_predict_attr(X_test)[:, 1]
        else:
            X = X_test
            y_labels = y_test
            y_pred = y_test_pred
            y_pred_prob = y_test_pred_prob

        print('Saving fitted estimator')
        save_fitted_estimator(
            estimator, col, vectorizer_name, classifier_name, protocol,
        )
        print('Fitted estimator saved')
        print('~'*20)

        # Evaluate Model
        df_metrics, metrics_dict, df_cv_score_recall = evaluation(
            estimator, X, y_labels, y_pred, y_pred_prob,
            best_train_score, df_metrics,
            col, vectorizer_name, classifier_name, plot_enabled=True, print_enabled=True
        )

        # Confirmatory Regression
        prob_confirmatory_tests(y_pred, y_pred_prob)

        # Save Vectorizer, Selector, and Classifier
        save_table(df_metrics, col, vectorizer_name, classifier_name, protocol)
        print(df_metrics)

    # # Compare Estimators
    # print('='*20)
    # print(f'Comparing Estimators for {col}')
    # comparison_plots(get_completed_estimators(), X_test, y_test, col)
    # print('='*20)

print('#'*40)
print('DONE!')
print('#'*40)


########################################
Starting!
########################################
Searching for existing estimators in directory:
/Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/


100%|██████████| 304/304 [00:00<00:00, 1201761.00it/s]
100%|██████████| 70/70 [00:00<00:00, 1208235.72it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + PassiveAggressiveClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl


  1%|          | 1/88 [00:35<51:23, 35.45s/it]

Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties 

100%|██████████| 70/70 [00:00<00:00, 863533.18it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Aut

  2%|▏         | 2/88 [02:57<2:20:34, 98.08s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 975419.53it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Su




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and han

  3%|▎         | 3/88 [03:12<1:25:21, 60.25s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.54      0.63       292
           1       0.59      0.79      0.67       242

    accuracy                           0.65       534
   macro avg       0.67      0.67      0.65       534
weighted avg       0.68      0.65      0.65       534

--------------------
Recall Best Score: 0.81
--------------------
Train - Mean Cross Validation Score: 0.65
--------------------
Train - Mean Cross Validation - Recall: 0.9
--------------------
Train - Mean Explained Variance - Recall: -0.14
--------------------
Test - Mean Cross Validation Score: 0.64
--------------------
Test - Mean Cross Validation - Recall: 0.9
--------------------
Test - Mean Explained Variance - Recall: -0.14
--------------------
Explained Variance: -0.3
--------------------
Accuracy: 0.65
--------------------


100%|██████████| 70/70 [00:00<00:00, 1146880.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Su




Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international envi

  5%|▍         | 4/88 [03:15<52:25, 37.45s/it]  

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.50      0.53       292
           1       0.47      0.53      0.50       242

    accuracy                           0.51       534
   macro avg       0.51      0.51      0.51       534
weighted avg       0.52      0.51      0.51       534

--------------------
Recall Best Score: 0.51
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.53
--------------------
Train - Mean Explained Variance - Recall: -0.95
--------------------
Test - Mean Cross Validation Score: 0.48
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.08
--------------------
Explained Variance: -0.95
--------------------
Accuracy: 0.51
------------------

100%|██████████| 70/70 [00:00<00:00, 1249367.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resu

  6%|▌         | 5/88 [03:29<40:22, 29.19s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.65      0.69       292
           1       0.63      0.70      0.66       242

    accuracy                           0.67       534
   macro avg       0.67      0.68      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.68
--------------------
Train - Mean Explained Variance - Recall: -0.33
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.68
--------------------
Test - Mean Explained Variance - Recall: -0.35
--------------------
Explained Variance: -0.3
--------------------
Accuracy: 0.67
-----------------

100%|██████████| 70/70 [00:00<00:00, 229376.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl





Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supe

  7%|▋         | 6/88 [03:32<27:35, 20.19s/it]

Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

100%|██████████| 70/70 [00:00<00:00, 384798.53it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification mode




Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and respo

  8%|▊         | 7/88 [03:36<20:13, 14.98s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.54      0.66       383
           1       0.40      0.75      0.52       154

    accuracy                           0.60       537
   macro avg       0.62      0.64      0.59       537
weighted avg       0.71      0.60      0.62       537

--------------------
Recall Best Score: 0.67
--------------------
Train - Mean Cross Validation Score: 0.63
--------------------
Train - Mean Cross Validation - Recall: 0.71
--------------------
Train - Mean Explained Variance - Recall: -0.6
--------------------
Test - Mean Cross Validation Score: 0.6
--------------------
Test - Mean Cross Validation - Recall: 0.63
--------------------
Test - Mean Explained Variance - Recall: -0.77
--------------------
Explained Variance: -0.64
--------------------
Accuracy: 0.6
--------------------

100%|██████████| 70/70 [00:00<00:00, 1208235.72it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + MultinomialNB (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_trai

  9%|▉         | 8/88 [03:39<15:03, 11.29s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.71      0.76       427
           1       0.44      0.59      0.51       166

    accuracy                           0.68       593
   macro avg       0.63      0.65      0.63       593
weighted avg       0.71      0.68      0.69       593

--------------------
Recall Best Score: 0.54
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.57
--------------------
Train - Mean Explained Variance - Recall: -0.61
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.57
--------------------
Test - Mean Explained Variance - Recall: -0.63
--------------------
Explained Variance: -0.56
--------------------
Accuracy: 0.68
--------------------
Bal

100%|██████████| 70/70 [00:00<00:00, 1124909.12it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + PassiveAggressiveClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl





Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl


 10%|█         | 9/88 [03:57<17:23, 13.21s/it]

Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile de

100%|██████████| 70/70 [00:00<00:00, 1091454.57it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl





Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy

 11%|█▏        | 10/88 [15:26<4:48:37, 222.01s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1223338.67it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + XGBClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + XGBClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` f




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading Estimator.
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need

 12%|█▎        | 11/88 [15:33<3:20:23, 156.15s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.69      0.70       292
           1       0.63      0.64      0.64       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.6
--------------------
Train - Mean Cross Validation Score: 0.82
--------------------
Train - Mean Cross Validation - Recall: 0.78
--------------------
Train - Mean Explained Variance - Recall: 0.29
--------------------
Test - Mean Cross Validation Score: 0.63
--------------------
Test - Mean Cross Validation - Recall: 0.58
--------------------
Test - Mean Explained Variance - Recall: -0.5
--------------------
Explained Variance: -0.33
--------------------
Accuracy: 0.67
--------------------
Ba

100%|██████████| 70/70 [00:00<00:00, 270102.37it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + MLPClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_r

 14%|█▎        | 12/88 [15:40<2:20:02, 110.56s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           1       0.29      1.00      0.45       154

   micro avg       0.29      1.00      0.45       154
   macro avg       0.29      1.00      0.45       154
weighted avg       0.29      1.00      0.45       154

--------------------
Recall Best Score: 1.0
--------------------
Train - Mean Cross Validation Score: 0.29
--------------------
Train - Mean Cross Validation - Recall: 1.0
--------------------
Train - Mean Explained Variance - Recall: 0.0
--------------------
Test - Mean Cross Validation Score: 0.29
--------------------
Test - Mean Cross Validation - Recall: 1.0
--------------------
Test - Mean Explained Variance - Recall: -0.0
--------------------
Explained Variance: -0.0
--------------------
Accuracy: 0.29
--------------------
Balanced Accuracy: 0.5
--------------------
Precision: 0.29
----

100%|██████████| 70/70 [00:00<00:00, 1129235.69it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/S

 15%|█▍        | 13/88 [15:46<1:38:48, 79.05s/it] 

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.57      0.64       292
           1       0.59      0.76      0.66       242

    accuracy                           0.65       534
   macro avg       0.66      0.66      0.65       534
weighted avg       0.67      0.65      0.65       534

--------------------
Recall Best Score: 0.74
--------------------
Train - Mean Cross Validation Score: 0.65
--------------------
Train - Mean Cross Validation - Recall: 0.75
--------------------
Train - Mean Explained Variance - Recall: -0.33
--------------------
Test - Mean Cross Validation Score: 0.64
--------------------
Test - Mean Cross Validation - Recall: 0.74
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.34
--------------------
Accuracy: 0.65
--------------

100%|██████████| 70/70 [00:00<00:00, 1129235.69it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resul




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideal

100%|██████████| 70/70 [00:00<00:00, 947100.90it/s]


Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/S

 17%|█▋        | 15/88 [16:29<58:41, 48.24s/it]  

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.72      0.70       292
           1       0.64      0.59      0.61       242

    accuracy                           0.66       534
   macro avg       0.66      0.66      0.66       534
weighted avg       0.66      0.66      0.66       534

--------------------
Recall Best Score: 0.62
--------------------
Train - Mean Cross Validation Score: 0.69
--------------------
Train - Mean Cross Validation - Recall: 0.59
--------------------
Train - Mean Explained Variance - Recall: -0.22
--------------------
Test - Mean Cross Validation Score: 0.65
--------------------
Test - Mean Cross Validation - Recall: 0.54
--------------------
Test - Mean Explained Variance - Recall: -0.38
--------------------
Explained Variance: -0.36
--------------------
Accuracy: 0.66
----------------

100%|██████████| 70/70 [00:00<00:00, 492619.60it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + VotingClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences betwee

 18%|█▊        | 16/88 [21:42<2:33:34, 127.97s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 333259.11it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Superv




Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/

 19%|█▉        | 17/88 [21:51<1:49:08, 92.23s/it] 

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           1       0.45      1.00      0.62       242

   micro avg       0.45      1.00      0.62       242
   macro avg       0.45      1.00      0.62       242
weighted avg       0.45      1.00      0.62       242

--------------------
Recall Best Score: 1.0
--------------------
Train - Mean Cross Validation Score: 0.45
--------------------
Train - Mean Cross Validation - Recall: 1.0
--------------------
Train - Mean Explained Variance - Recall: 0.0
--------------------
Test - Mean Cross Validation Score: 0.45
--------------------
Test - Mean Cross Validation - Recall: 1.0
--------------------
Test - Mean Explained Variance - Recall: -0.0
--------------------
Explained Variance: 0.0
--------------------
Accuracy: 0.45
--------------------
Balanced Accuracy: 0.5
--------------------
Precision: 0.45
-

100%|██████████| 70/70 [00:00<00:00, 333637.82it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + XGBClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + XGBClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving 

 20%|██        | 18/88 [21:59<1:18:01, 66.88s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.54      0.66       383
           1       0.39      0.72      0.50       154

    accuracy                           0.59       537
   macro avg       0.61      0.63      0.58       537
weighted avg       0.70      0.59      0.61       537

--------------------
Recall Best Score: 0.67
--------------------
Train - Mean Cross Validation Score: 0.75
--------------------
Train - Mean Cross Validation - Recall: 0.71
--------------------
Train - Mean Explained Variance - Recall: -0.2
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.49
--------------------
Test - Mean Explained Variance - Recall: -0.78
--------------------
Explained Variance: -0.69
--------------------
Accuracy: 0.59
--------------------
Bala

100%|██████████| 70/70 [00:00<00:00, 427367.22it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + StackingClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resu

 22%|██▏       | 19/88 [22:14<58:50, 51.17s/it]  

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 920380.19it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + Perceptron
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resul




Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + Perceptron (Save_protocol=5).pkl


 23%|██▎       | 20/88 [22:20<42:37, 37.61s/it]

Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['df_val_data', 'df_train_data', 'df_test_data', 'df_cv

100%|██████████| 70/70 [00:00<00:00, 760625.08it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl





Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Sear

 24%|██▍       | 21/88 [22:29<32:24, 29.02s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 51427.79it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + LinearSVC
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + LinearSVC (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data -

 25%|██▌       | 22/88 [22:40<26:06, 23.73s/it]

Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and responsibilities: Handling incoming phone calls and emails from the website users;Acting as an intermediary between the customers and accommodations;Managing reservations, special requests, and complaints and finding solutions to website users inquiries.
~~~~~~~~~~
Training data class weights:
Rati

100%|██████████| 70/70 [00:00<00:00, 316380.69it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised

 26%|██▌       | 23/88 [22:48<20:39, 19.08s/it]

Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and responsibilities: Handling incoming phone calls and emails from the website users;Acting as an intermediary between the customers and accommodations;Managing reservations, special requests, and complaints and finding solutions to website users inquiries.
~~~~~~~~~~
Training data class weights:
Rati

100%|██████████| 70/70 [00:00<00:00, 357179.17it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search

 27%|██▋       | 24/88 [23:24<25:38, 24.03s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1075462.56it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + VotingClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated

 28%|██▊       | 25/88 [25:11<51:29, 49.05s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1044844.41it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + SGDClassifier (Save_protocol=5).pkl





Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Searc

 30%|██▉       | 26/88 [25:16<36:48, 35.62s/it]

Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['df_val_data', 'SearchCV', 'df_train_data', 'Grid Search', 'df_cv_results

100%|██████████| 70/70 [00:00<00:00, 1112126.06it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classificat




Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['df_val_data', 'Grid Search', 'df_cv_results', 'df_tra

 31%|███       | 27/88 [25:19<26:24, 25.98s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.77      0.70       292
           1       0.63      0.48      0.54       242

    accuracy                           0.63       534
   macro avg       0.63      0.62      0.62       534
weighted avg       0.63      0.63      0.63       534

--------------------
Recall Best Score: 0.65
--------------------
Train - Mean Cross Validation Score: 0.65
--------------------
Train - Mean Cross Validation - Recall: 0.7
--------------------
Train - Mean Explained Variance - Recall: -0.38
--------------------
Test - Mean Cross Validation Score: 0.63
--------------------
Test - Mean Cross Validation - Recall: 0.68
--------------------
Test - Mean Explained Variance - Recall: -0.44
--------------------
Explained Variance: -0.42
--------------------
Accuracy: 0.63
--------------

100%|██████████| 70/70 [00:00<00:00, 1048576.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + SGDClassifier (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + SGDClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results 

 32%|███▏      | 28/88 [26:01<30:38, 30.65s/it]

Done splitting data into training and testing sets.
Training set shape: (4446,)
----------
Training set example:
This internship is for you if:
~~~~~~~~~~
Testing set shape: (593,)
----------
Testing set example:
General switchboard number +44 (0)207 801 3380.
~~~~~~~~~~
Validation set shape: (889,)
----------
Validation set example:
Effective ability to prioritize tasks and deliver on deadlines, with high performance standards and a commitment to excellence.
~~~~~~~~~~
Training data class weights:
Ratio = 0.37 (0 = 0.68, 1 = 1.86)
----------
Testing data class weights:
Ratio = 0.39 (0 = 0.69, 1 = 1.79)
----------
Validation data class weights:
Ratio = 0.40 (0 = 0.70, 1 = 1.76)
Done loading Xy, CV data, and estimator!
['df_test_data', 'Grid Search', 'df_val_data', 'df_train_data', 'SearchCV', 'df_cv_results', 'Estimator']


100%|██████████| 70/70 [00:00<00:00, 714358.35it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + PassiveAggressiveClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_C




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The 

100%|██████████| 70/70 [00:00<00:00, 1059932.42it/s]


Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Sea

 34%|███▍      | 30/88 [26:25<19:40, 20.35s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.51      0.58       383
           1       0.26      0.42      0.32       154

    accuracy                           0.48       537
   macro avg       0.47      0.47      0.45       537
weighted avg       0.56      0.48      0.51       537

--------------------
Recall Best Score: 0.48
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.49
--------------------
Train - Mean Explained Variance - Recall: -1.21
--------------------
Test - Mean Cross Validation Score: 0.48
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.26
--------------------
Explained Variance: -1.36
--------------------
Accuracy: 0.48
--------------------
B

100%|██████████| 70/70 [00:00<00:00, 1188669.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + LogisticRegression (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + LogisticRegression (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Su

 35%|███▌      | 31/88 [26:41<18:18, 19.28s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - FeatureUnion + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.46      0.61       415
           1       0.30      0.82      0.44       119

    accuracy                           0.54       534
   macro avg       0.60      0.64      0.53       534
weighted avg       0.77      0.54      0.57       534

--------------------
Recall Best Score: 0.83
--------------------
Train - Mean Cross Validation Score: 0.5
--------------------
Train - Mean Cross Validation - Recall: 0.88
--------------------
Train - Mean Explained Variance - Recall: -0.73
--------------------
Test - Mean Cross Validation Score: 0.49
--------------------
Test - Mean Cross Validation - Recall: 0.87
--------------------
Test - Mean Explained Variance - Recall: -0.73
--------------------
Explained Variance: -0.81
--------------------
Accuracy: 0.54
--------------------
Ba

100%|██████████| 70/70 [00:00<00:00, 551882.11it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + Perceptron
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + Perceptron (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results 

100%|██████████| 70/70 [00:00<00:00, 1169726.22it/s]


Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/class

 38%|███▊      | 33/88 [26:52<11:02, 12.05s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.55      0.67       383
           1       0.40      0.76      0.53       154

    accuracy                           0.61       537
   macro avg       0.63      0.65      0.60       537
weighted avg       0.72      0.61      0.63       537

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.69
--------------------
Train - Mean Cross Validation - Recall: 0.75
--------------------
Train - Mean Explained Variance - Recall: -0.37
--------------------
Test - Mean Cross Validation Score: 0.6
--------------------
Test - Mean Cross Validation - Recall: 0.61
--------------------
Test - Mean Explained Variance - Recall: -0.77
--------------------
Explained Variance: -0.6
--------------------
Accuracy: 0.61
--------------

100%|██████████| 70/70 [00:00<00:00, 503604.25it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + StackingClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Su

 39%|███▊      | 34/88 [27:07<11:51, 13.18s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.55      0.66       383
           1       0.40      0.75      0.52       154

    accuracy                           0.60       537
   macro avg       0.62      0.65      0.59       537
weighted avg       0.72      0.60      0.62       537

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.72
--------------------
Train - Mean Explained Variance - Recall: -0.46
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.6
--------------------
Test - Mean Explained Variance - Recall: -0.75
--------------------
Explained Variance: -0.63
--------------------
Accuracy: 0.6
--------------------


100%|██████████| 70/70 [00:00<00:00, 1169726.22it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/

 40%|███▉      | 35/88 [34:01<1:57:44, 133.28s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.54      0.66       383
           1       0.40      0.77      0.52       154

    accuracy                           0.60       537
   macro avg       0.62      0.65      0.59       537
weighted avg       0.72      0.60      0.62       537

--------------------
Recall Best Score: 0.72
--------------------
Train - Mean Cross Validation Score: 0.63
--------------------
Train - Mean Cross Validation - Recall: 0.77
--------------------
Train - Mean Explained Variance - Recall: -0.51
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.72
--------------------
Test - Mean Explained Variance - Recall: -0.61
--------------------
Explained Variance: -0.61
--------------------
Accuracy: 0.6
--------------------


100%|██████████| 70/70 [00:00<00:00, 998643.81it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl





Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classifica

 41%|████      | 36/88 [34:05<1:21:58, 94.58s/it] 

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.65      0.68       292
           1       0.62      0.68      0.65       242

    accuracy                           0.66       534
   macro avg       0.66      0.66      0.66       534
weighted avg       0.67      0.66      0.66       534

--------------------
Recall Best Score: 0.67
--------------------
Train - Mean Cross Validation Score: 0.73
--------------------
Train - Mean Cross Validation - Recall: 0.66
--------------------
Train - Mean Explained Variance - Recall: -0.09
--------------------
Test - Mean Cross Validation Score: 0.63
--------------------
Test - Mean Cross Validation - Recall: 0.57
--------------------
Test - Mean Explained Variance - Recall: -0.46
--------------------
Explained Variance: -0.35
--------------------
Accuracy: 0.66
-------

100%|██████████| 70/70 [00:00<00:00, 1249367.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Searc

 42%|████▏     | 37/88 [34:17<59:22, 69.85s/it]  

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1293397.71it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + LinearSVC
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl


 43%|████▎     | 38/88 [34:24<42:17, 50.74s/it]

Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + LinearSVC (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/

100%|██████████| 70/70 [00:00<00:00, 1208235.72it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resu

 44%|████▍     | 39/88 [34:33<31:16, 38.29s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.63      0.67       292
           1       0.61      0.71      0.66       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.72
--------------------
Train - Mean Cross Validation Score: 0.74
--------------------
Train - Mean Cross Validation - Recall: 0.78
--------------------
Train - Mean Explained Variance - Recall: -0.04
--------------------
Test - Mean Cross Validation Score: 0.67
--------------------
Test - Mean Cross Validation - Recall: 0.72
--------------------
Test - Mean Explained Variance - Recall: -0.32
--------------------
Explained Variance: -0.32
--------------------
Accuracy: 0.67
--------------

100%|██████████| 70/70 [00:00<00:00, 1299120.71it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + PassiveAggressiveClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equi




Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl


 45%|████▌     | 40/88 [35:07<29:36, 37.01s/it]

Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + PassiveAggressiveClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class 

100%|██████████| 70/70 [00:00<00:00, 1129235.69it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supe




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, R

 47%|████▋     | 41/88 [36:14<35:59, 45.94s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1124909.12it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + MultinomialNB (Save_protocol=5).pkl





Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + MultinomialNB (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + MultinomialNB (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + MultinomialNB (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervis

 48%|████▊     | 42/88 [36:18<25:45, 33.61s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.76      0.70       319
           1       0.65      0.52      0.58       274

    accuracy                           0.65       593
   macro avg       0.65      0.64      0.64       593
weighted avg       0.65      0.65      0.64       593

--------------------
Recall Best Score: 0.57
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.57
--------------------
Train - Mean Explained Variance - Recall: -0.31
--------------------
Test - Mean Cross Validation Score: 0.65
--------------------
Test - Mean Cross Validation - Recall: 0.55
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.39
--------------------
Accuracy: 0.65
--------------------
Ba

100%|██████████| 70/70 [00:00<00:00, 667275.64it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supe




Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative a

 49%|████▉     | 43/88 [36:23<18:38, 24.86s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 402193.53it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + Perceptron
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + Perceptron (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervi

 50%|█████     | 44/88 [36:27<13:37, 18.58s/it]

Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['SearchCV', 'df_cv_results', 'Grid Search', 'df_val_data', 'df_train_data

100%|██████████| 70/70 [00:00<00:00, 1059932.42it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervis




Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['SearchCV', 'df_train_data', 'df_val_data', 'df_test_d

 51%|█████     | 45/88 [36:29<09:49, 13.71s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1249367.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl





Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised 

 52%|█████▏    | 46/88 [46:20<2:10:53, 186.99s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.66      0.69       292
           1       0.63      0.69      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.7
--------------------
Train - Mean Cross Validation - Recall: 0.72
--------------------
Train - Mean Explained Variance - Recall: -0.19
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.68
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.31
--------------------
Accuracy: 0.67
------------------

100%|██████████| 70/70 [00:00<00:00, 1112126.06it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/S

 53%|█████▎    | 47/88 [52:45<2:48:11, 246.12s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.65      0.68       292
           1       0.62      0.69      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.69
--------------------
Train - Mean Cross Validation - Recall: 0.72
--------------------
Train - Mean Explained Variance - Recall: -0.24
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.68
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.33
--------------------
Accuracy: 0.67
---------------

100%|██████████| 70/70 [00:00<00:00, 1133595.68it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/S

 55%|█████▍    | 48/88 [52:47<1:55:24, 173.10s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.36      0.50       292
           1       0.54      0.93      0.69       242

    accuracy                           0.61       534
   macro avg       0.70      0.64      0.59       534
weighted avg       0.71      0.61      0.59       534

--------------------
Recall Best Score: 0.91
--------------------
Train - Mean Cross Validation Score: 0.61
--------------------
Train - Mean Cross Validation - Recall: 0.93
--------------------
Train - Mean Explained Variance - Recall: -0.15
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.93
--------------------
Test - Mean Explained Variance - Recall: -0.14
--------------------
Explained Variance: -0.15
--------------------
Accuracy: 0.61
---------------

100%|██████████| 70/70 [00:00<00:00, 1208235.72it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/clas




Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes stat

 56%|█████▌    | 49/88 [52:51<1:19:34, 122.43s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.64      0.68       292
           1       0.62      0.71      0.66       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.68
--------------------
Train - Mean Cross Validation - Recall: 0.73
--------------------
Train - Mean Explained Variance - Recall: -0.28
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.72
--------------------
Test - Mean Explained Variance - Recall: -0.32
--------------------
Explained Variance: -0.31
--------------------
Accuracy: 0.67
-----------

100%|██████████| 70/70 [00:00<00:00, 1151377.57it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/

 57%|█████▋    | 50/88 [52:56<55:11, 87.15s/it]   

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.53      0.66       383
           1       0.40      0.78      0.53       154

    accuracy                           0.60       537
   macro avg       0.63      0.66      0.59       537
weighted avg       0.73      0.60      0.62       537

--------------------
Recall Best Score: 0.74
--------------------
Train - Mean Cross Validation Score: 0.61
--------------------
Train - Mean Cross Validation - Recall: 0.77
--------------------
Train - Mean Explained Variance - Recall: -0.55
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.74
--------------------
Test - Mean Explained Variance - Recall: -0.59
--------------------
Explained Variance: -0.58
--------------------
Accuracy: 0.6
----------------

100%|██████████| 70/70 [00:00<00:00, 1404790.81it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + StackingClassifier (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Se

 58%|█████▊    | 51/88 [53:14<40:58, 66.44s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.65      0.68       292
           1       0.62      0.69      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.69
--------------------
Train - Mean Cross Validation Score: 0.66
--------------------
Train - Mean Cross Validation - Recall: 0.74
--------------------
Train - Mean Explained Variance - Recall: -0.32
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.74
--------------------
Test - Mean Explained Variance - Recall: -0.33
--------------------
Explained Variance: -0.33
--------------------
Accuracy: 0.67
------------------

100%|██████████| 70/70 [00:00<00:00, 1129235.69it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + DummyClassifier (Save_protocol=5).pkl





Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised

 59%|█████▉    | 52/88 [53:19<28:49, 48.04s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - FeatureUnion + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.51      0.58       383
           1       0.26      0.42      0.32       154

    accuracy                           0.48       537
   macro avg       0.47      0.47      0.45       537
weighted avg       0.56      0.48      0.51       537

--------------------
Recall Best Score: 0.48
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.49
--------------------
Train - Mean Explained Variance - Recall: -1.21
--------------------
Test - Mean Cross Validation Score: 0.48
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.26
--------------------
Explained Variance: -1.36
--------------------
Accuracy: 0.48
--------------------
Bala

100%|██████████| 70/70 [00:00<00:00, 1203283.93it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_C




Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre 

 60%|██████    | 53/88 [53:24<20:23, 34.95s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.67      0.69       292
           1       0.62      0.67      0.64       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.67
--------------------
Train - Mean Cross Validation Score: 0.79
--------------------
Train - Mean Cross Validation - Recall: 0.77
--------------------
Train - Mean Explained Variance - Recall: 0.14
--------------------
Test - Mean Cross Validation Score: 0.64
--------------------
Test - Mean Cross Validation - Recall: 0.63
--------------------
Test - Mean Explained Variance - Recall: -0.43
--------------------
Explained Variance: -0.34
--------------------
Accuracy: 0.67
--------

100%|██████████| 70/70 [00:00<00:00, 1299120.71it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + GradientBoostingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification model

 61%|██████▏   | 54/88 [53:29<14:46, 26.09s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + GradientBoostingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.58      0.69       415
           1       0.32      0.71      0.44       119

    accuracy                           0.60       534
   macro avg       0.60      0.64      0.57       534
weighted avg       0.75      0.60      0.64       534

--------------------
Recall Best Score: 0.67
--------------------
Train - Mean Cross Validation Score: 0.7
--------------------
Train - Mean Cross Validation - Recall: 0.69
--------------------
Train - Mean Explained Variance - Recall: -0.55
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.02
--------------------
Explained Variance: -0.88
--------------------
Accuracy: 0.6
-------------

100%|██████████| 70/70 [00:00<00:00, 1271001.21it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Res




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + VotingClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences betwee

 62%|██████▎   | 55/88 [54:17<17:50, 32.44s/it]

Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

100%|██████████| 70/70 [00:00<00:00, 1160479.37it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resul


 64%|██████▎   | 56/88 [54:20<12:36, 23.63s/it]

Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and responsibilities: Handling incoming phone calls and emails from the website users;Acting as an intermediary between the customers and accommodations;Managing reservations, special requests, and complaints and finding solutions to website users inquiries.
~~~~~~~~~~
Training data 

100%|██████████| 70/70 [00:00<00:00, 1188669.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + VotingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Superv




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + VotingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating 

 65%|██████▍   | 57/88 [55:21<18:06, 35.04s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1075462.56it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + LogisticRegression (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/S

 66%|██████▌   | 58/88 [55:24<12:36, 25.23s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.44      0.56       292
           1       0.56      0.85      0.67       242

    accuracy                           0.63       534
   macro avg       0.67      0.64      0.62       534
weighted avg       0.68      0.63      0.61       534

--------------------
Recall Best Score: 0.86
--------------------
Train - Mean Cross Validation Score: 0.61
--------------------
Train - Mean Cross Validation - Recall: 0.93
--------------------
Train - Mean Explained Variance - Recall: -0.15
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.93
--------------------
Test - Mean Explained Variance - Recall: -0.14
--------------------
Explained Variance: -0.29
--------------------
Accuracy: 0.63
---------------

100%|██████████| 70/70 [00:00<00:00, 1293397.71it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/S

 67%|██████▋   | 59/88 [59:11<41:29, 85.86s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.66      0.69       292
           1       0.62      0.68      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.73
--------------------
Train - Mean Cross Validation - Recall: 0.74
--------------------
Train - Mean Explained Variance - Recall: -0.09
--------------------
Test - Mean Cross Validation Score: 0.65
--------------------
Test - Mean Cross Validation - Recall: 0.67
--------------------
Test - Mean Explained Variance - Recall: -0.38
--------------------
Explained Variance: -0.32
--------------------
Accuracy: 0.67
----------------

100%|██████████| 70/70 [00:00<00:00, 985239.19it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification model

 68%|██████▊   | 60/88 [59:15<28:38, 61.38s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.64      0.68       292
           1       0.62      0.71      0.66       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.71
--------------------
Train - Mean Explained Variance - Recall: -0.32
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.7
--------------------
Test - Mean Explained Variance - Recall: -0.35
--------------------
Explained Variance: -0.31
--------------------
Accuracy: 0.67
------------

100%|██████████| 70/70 [00:00<00:00, 452390.26it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + LinearSVC
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/S




Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl


 69%|██████▉   | 61/88 [59:26<20:44, 46.10s/it]

Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and responsibili

100%|██████████| 70/70 [00:00<00:00, 873813.33it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Res




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - FeatureUnion + DummyClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and hand

 70%|███████   | 62/88 [59:30<14:35, 33.67s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.50      0.53       292
           1       0.47      0.53      0.50       242

    accuracy                           0.51       534
   macro avg       0.51      0.51      0.51       534
weighted avg       0.52      0.51      0.51       534

--------------------
Recall Best Score: 0.51
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.53
--------------------
Train - Mean Explained Variance - Recall: -0.95
--------------------
Test - Mean Cross Validation Score: 0.48
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.08
--------------------
Explained Variance: -0.95
--------------------
Accuracy: 0.51
--------------------


100%|██████████| 70/70 [00:00<00:00, 1249367.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl





Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + RandomForestClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/

 72%|███████▏  | 63/88 [59:35<10:26, 25.05s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + RandomForestClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.50      0.63       383
           1       0.39      0.80      0.53       154

    accuracy                           0.59       537
   macro avg       0.63      0.65      0.58       537
weighted avg       0.73      0.59      0.60       537

--------------------
Recall Best Score: 0.76
--------------------
Train - Mean Cross Validation Score: 0.61
--------------------
Train - Mean Cross Validation - Recall: 0.79
--------------------
Train - Mean Explained Variance - Recall: -0.55
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.77
--------------------
Test - Mean Explained Variance - Recall: -0.57
--------------------
Explained Variance: -0.59
--------------------
Accuracy: 0.59
---------------

100%|██████████| 70/70 [00:00<00:00, 1165084.44it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + SGDClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supe




Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + SGDClassifier (Save_protocol=5).pkl


 73%|███████▎  | 64/88 [59:39<07:28, 18.67s/it]

Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['df_test_data', 'df_cv_results', 'df_train_data', 'Gri

100%|██████████| 70/70 [00:00<00:00, 1075462.56it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - FeatureUnion + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - FeatureUnion + XGBClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving

 74%|███████▍  | 65/88 [59:48<05:59, 15.62s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - FeatureUnion + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.71      0.70       292
           1       0.64      0.64      0.64       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.6
--------------------
Train - Mean Cross Validation Score: 0.84
--------------------
Train - Mean Cross Validation - Recall: 0.79
--------------------
Train - Mean Explained Variance - Recall: 0.36
--------------------
Test - Mean Cross Validation Score: 0.62
--------------------
Test - Mean Cross Validation - Recall: 0.57
--------------------
Test - Mean Explained Variance - Recall: -0.52
--------------------
Explained Variance: -0.31
--------------------
Accuracy: 0.67
--------------------
Bala

100%|██████████| 70/70 [00:00<00:00, 1146880.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + StackingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Res




Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + StackingClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of mult

 75%|███████▌  | 66/88 [1:00:14<06:56, 18.93s/it]

Done saving fitted estimator and table!
Variable                          Warmth                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

100%|██████████| 70/70 [00:00<00:00, 1146880.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification 




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development pr

 76%|███████▌  | 67/88 [1:00:18<05:04, 14.50s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.50      0.64       383
           1       0.40      0.84      0.54       154

    accuracy                           0.59       537
   macro avg       0.64      0.67      0.59       537
weighted avg       0.75      0.59      0.61       537

--------------------
Recall Best Score: 0.78
--------------------
Train - Mean Cross Validation Score: 0.61
--------------------
Train - Mean Cross Validation - Recall: 0.81
--------------------
Train - Mean Explained Variance - Recall: -0.54
--------------------
Test - Mean Cross Validation Score: 0.6
--------------------
Test - Mean Cross Validation - Recall: 0.79
--------------------
Test - Mean Explained Variance - Recall: -0.57
--------------------
Explained Variance: -0.51
--------------------
Accuracy: 0.59
----------------

100%|██████████| 70/70 [00:00<00:00, 887012.93it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/cl




Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - TfidfVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statis

 77%|███████▋  | 68/88 [1:00:21<03:37, 10.86s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - TfidfVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.67      0.69       292
           1       0.63      0.68      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.68      0.67      0.67       534

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.76
--------------------
Train - Mean Explained Variance - Recall: -0.28
--------------------
Test - Mean Cross Validation Score: 0.64
--------------------
Test - Mean Cross Validation - Recall: 0.73
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.31
--------------------
Accuracy: 0.67
------------

100%|██████████| 70/70 [00:00<00:00, 1059932.42it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classifi




Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + KNeighborsClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical 

 78%|███████▊  | 69/88 [1:00:24<02:41,  8.52s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.70      0.68       292
           1       0.62      0.58      0.60       242

    accuracy                           0.65       534
   macro avg       0.64      0.64      0.64       534
weighted avg       0.64      0.65      0.64       534

--------------------
Recall Best Score: 0.61
--------------------
Train - Mean Cross Validation Score: 0.7
--------------------
Train - Mean Cross Validation - Recall: 0.62
--------------------
Train - Mean Explained Variance - Recall: -0.2
--------------------
Test - Mean Cross Validation Score: 0.67
--------------------
Test - Mean Cross Validation - Recall: 0.58
--------------------
Test - Mean Explained Variance - Recall: -0.33
--------------------
Explained Variance: -0.43
--------------------
Accuracy: 0.65
---------------

100%|██████████| 70/70 [00:00<00:00, 1233618.82it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + XGBClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automa

 80%|███████▉  | 70/88 [1:00:36<02:54,  9.68s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - FeatureUnion + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.56      0.66       383
           1       0.39      0.69      0.49       154

    accuracy                           0.60       537
   macro avg       0.60      0.62      0.58       537
weighted avg       0.69      0.60      0.61       537

--------------------
Recall Best Score: 0.6
--------------------
Train - Mean Cross Validation Score: 0.77
--------------------
Train - Mean Cross Validation - Recall: 0.67
--------------------
Train - Mean Explained Variance - Recall: -0.16
--------------------
Test - Mean Cross Validation Score: 0.63
--------------------
Test - Mean Cross Validation - Recall: 0.44
--------------------
Test - Mean Explained Variance - Recall: -0.83
--------------------
Explained Variance: -0.73
--------------------
Accuracy: 0.6
--------------------
Balanced

100%|██████████| 70/70 [00:00<00:00, 1048576.00it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + XGBClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + XGBClassifier (Save_protocol=5).pkl





  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + XGBClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + XGBClassifier (Save_protocol=5).pkl


 81%|████████  | 71/88 [1:00:36<01:56,  6.86s/it]

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + XGBClassifier (Save_protocol=5).pkl


100%|██████████| 70/70 [00:00<00:00, 500172.54it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl





Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Sear

 82%|████████▏ | 72/88 [1:00:46<02:01,  7.58s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           1       0.22      1.00      0.36       119

   micro avg       0.22      1.00      0.36       119
   macro avg       0.22      1.00      0.36       119
weighted avg       0.22      1.00      0.36       119

--------------------
Recall Best Score: 1.0
--------------------
Train - Mean Cross Validation Score: 0.22
--------------------
Train - Mean Cross Validation - Recall: 1.0
--------------------
Train - Mean Explained Variance - Recall: -0.0
--------------------
Test - Mean Cross Validation Score: 0.22
--------------------
Test - Mean Cross Validation - Recall: 1.0
--------------------
Test - Mean Explained Variance - Recall: 0.0
--------------------
Explained Variance: 0.0
--------------------
Accuracy: 0.22
--------------------
Balanced Accuracy: 0.5
--------------------
Precision: 0.22
-----

100%|██████████| 70/70 [00:00<00:00, 1169726.22it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + KNeighborsClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Searc

 83%|████████▎ | 73/88 [1:00:53<01:51,  7.46s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - FeatureUnion + KNeighborsClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.72      0.77       383
           1       0.47      0.61      0.53       154

    accuracy                           0.69       537
   macro avg       0.64      0.67      0.65       537
weighted avg       0.72      0.69      0.70       537

--------------------
Recall Best Score: 0.59
--------------------
Train - Mean Cross Validation Score: 0.64
--------------------
Train - Mean Cross Validation - Recall: 0.69
--------------------
Train - Mean Explained Variance - Recall: -0.57
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.64
--------------------
Test - Mean Explained Variance - Recall: -0.68
--------------------
Explained Variance: -0.48
--------------------
Accuracy: 0.69
--------------------

100%|██████████| 70/70 [00:00<00:00, 518730.18it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification




Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + BaggingClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and internatio

 84%|████████▍ | 74/88 [1:03:57<14:05, 60.37s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + BaggingClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.65      0.68       292
           1       0.62      0.69      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.7
--------------------
Train - Mean Cross Validation Score: 0.68
--------------------
Train - Mean Cross Validation - Recall: 0.71
--------------------
Train - Mean Explained Variance - Recall: -0.29
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.69
--------------------
Test - Mean Explained Variance - Recall: -0.36
--------------------
Explained Variance: -0.33
--------------------
Accuracy: 0.67
-----------------

100%|██████████| 70/70 [00:00<00:00, 489335.47it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised R




Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + DummyClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly 

 85%|████████▌ | 75/88 [1:04:00<09:21, 43.18s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + DummyClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.51      0.58       383
           1       0.26      0.42      0.32       154

    accuracy                           0.48       537
   macro avg       0.47      0.47      0.45       537
weighted avg       0.56      0.48      0.51       537

--------------------
Recall Best Score: 0.48
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.49
--------------------
Train - Mean Explained Variance - Recall: -1.21
--------------------
Test - Mean Cross Validation Score: 0.48
--------------------
Test - Mean Cross Validation - Recall: 0.51
--------------------
Test - Mean Explained Variance - Recall: -1.26
--------------------
Explained Variance: -1.36
--------------------
Accuracy: 0.48
--------------------
B

100%|██████████| 70/70 [00:00<00:00, 1276527.30it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + MLPClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + MLPClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Searc

 86%|████████▋ | 76/88 [1:04:05<06:22, 31.86s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + MLPClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           1       0.45      1.00      0.62       242

   micro avg       0.45      1.00      0.62       242
   macro avg       0.45      1.00      0.62       242
weighted avg       0.45      1.00      0.62       242

--------------------
Recall Best Score: 1.0
--------------------
Train - Mean Cross Validation Score: 0.45
--------------------
Train - Mean Cross Validation - Recall: 1.0
--------------------
Train - Mean Explained Variance - Recall: 0.0
--------------------
Test - Mean Cross Validation Score: 0.45
--------------------
Test - Mean Cross Validation - Recall: 1.0
--------------------
Test - Mean Explained Variance - Recall: -0.0
--------------------
Explained Variance: 0.0
--------------------
Accuracy: 0.45
--------------------
Balanced Accuracy: 0.5
--------------------
Precision: 0.45
-

100%|██████████| 70/70 [00:00<00:00, 1293397.71it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + XGBClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + XGBClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resu




  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + XGBClassifier (Save_protocol=5).pkl
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between s

 88%|████████▊ | 77/88 [1:04:14<04:35, 25.02s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + XGBClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.59      0.69       383
           1       0.40      0.68      0.50       154

    accuracy                           0.62       537
   macro avg       0.61      0.63      0.60       537
weighted avg       0.70      0.62      0.63       537

--------------------
Recall Best Score: 0.61
--------------------
Train - Mean Cross Validation Score: 0.74
--------------------
Train - Mean Cross Validation - Recall: 0.72
--------------------
Train - Mean Explained Variance - Recall: -0.23
--------------------
Test - Mean Cross Validation Score: 0.63
--------------------
Test - Mean Cross Validation - Recall: 0.58
--------------------
Test - Mean Explained Variance - Recall: -0.73
--------------------
Explained Variance: -0.69
--------------------
Accuracy: 0.62
--------------------
Bal

100%|██████████| 70/70 [00:00<00:00, 1012418.21it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervis




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and quali

 89%|████████▊ | 78/88 [1:08:03<14:20, 86.03s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.52      0.65       383
           1       0.40      0.80      0.53       154

    accuracy                           0.60       537
   macro avg       0.63      0.66      0.59       537
weighted avg       0.73      0.60      0.62       537

--------------------
Recall Best Score: 0.74
--------------------
Train - Mean Cross Validation Score: 0.68
--------------------
Train - Mean Cross Validation - Recall: 0.8
--------------------
Train - Mean Explained Variance - Recall: -0.33
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.67
--------------------
Test - Mean Explained Variance - Recall: -0.7
--------------------
Explained Variance: -0.56
--------------------
Accuracy: 0.6
--------------------
B

100%|██████████| 70/70 [00:00<00:00, 309705.99it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Sup

 90%|████████▉ | 79/88 [1:08:07<09:13, 61.55s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.43      0.57       383
           1       0.38      0.86      0.52       154

    accuracy                           0.55       537
   macro avg       0.63      0.64      0.55       537
weighted avg       0.74      0.55      0.56       537

--------------------
Recall Best Score: 0.86
--------------------
Train - Mean Cross Validation Score: 0.51
--------------------
Train - Mean Cross Validation - Recall: 0.93
--------------------
Train - Mean Explained Variance - Recall: -0.4
--------------------
Test - Mean Cross Validation Score: 0.5
--------------------
Test - Mean Cross Validation - Recall: 0.92
--------------------
Test - Mean Explained Variance - Recall: -0.41
--------------------
Explained Variance: -0.52
--------------------
Accuracy: 0.55
--------------------


100%|██████████| 70/70 [00:00<00:00, 1188669.15it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - TfidfVectorizer + LinearSVC
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl





Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - TfidfVectorizer + LinearSVC (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_va

 91%|█████████ | 80/88 [1:08:18<06:10, 46.33s/it]

Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (3997,)
----------
Training set example:
The job includes statistical programming and handling/processing of big data sets, for which you will need to have extensive experience in coding.
~~~~~~~~~~
Testing set shape: (534,)
----------
Testing set example:
1-3 or more years advisory/consulting/industry project experience in a high calibre and international environment, ideally working with industry, energy, manufacturing, or agricultural clients
~~~~~~~~~~
Validation set shape: (799,)
----------
Validation set example:
Developing your team and motivating them to achieve their goals.
~~~~~~~~~~
Training data class weights:
Ratio = 0.83 (0 = 0.92, 1 = 1.10)
----------
Testing data class weights:
Ratio = 0.83 (0 = 0.91, 1 = 1.10)
----------
Validation data class weights:
Ratio = 0.91 (0 = 0.95, 1 = 1.05)
Done loading Xy, CV data, and estimator!
['df_cv_results', 'df_train_data', 'df_test_data', 'Sea

100%|██████████| 70/70 [00:00<00:00, 968981.12it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + MultinomialNB (Save_protocol=5).pkl





Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Competence - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/

 92%|█████████▏| 81/88 [1:08:21<03:52, 33.25s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.75      0.70       319
           1       0.65      0.53      0.58       274

    accuracy                           0.65       593
   macro avg       0.65      0.64      0.64       593
weighted avg       0.65      0.65      0.64       593

--------------------
Recall Best Score: 0.56
--------------------
Train - Mean Cross Validation Score: 0.66
--------------------
Train - Mean Cross Validation - Recall: 0.57
--------------------
Train - Mean Explained Variance - Recall: -0.33
--------------------
Test - Mean Cross Validation Score: 0.65
--------------------
Test - Mean Cross Validation - Recall: 0.54
--------------------
Test - Mean Explained Variance - Recall: -0.38
--------------------
Explained Variance: -0.38
--------------------
Accuracy: 0.65
--------------------

100%|██████████| 70/70 [00:00<00:00, 1223338.67it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification 




Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development pr

 93%|█████████▎| 82/88 [1:08:24<02:25, 24.31s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.50      0.64       383
           1       0.40      0.84      0.54       154

    accuracy                           0.59       537
   macro avg       0.64      0.67      0.59       537
weighted avg       0.75      0.59      0.61       537

--------------------
Recall Best Score: 0.77
--------------------
Train - Mean Cross Validation Score: 0.6
--------------------
Train - Mean Cross Validation - Recall: 0.82
--------------------
Train - Mean Explained Variance - Recall: -0.52
--------------------
Test - Mean Cross Validation Score: 0.58
--------------------
Test - Mean Cross Validation - Recall: 0.81
--------------------
Test - Mean Explained Variance - Recall: -0.54
--------------------
Explained Variance: -0.51
--------------------
Accuracy: 0.59
----------------

100%|██████████| 70/70 [00:00<00:00, 334779.11it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Competence - CountVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Competence - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Competence - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl





Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Competence - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Competence - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Competence - CountVectorizer + DecisionTreeClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/

 94%|█████████▍| 83/88 [1:08:27<01:28, 17.75s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Competence - CountVectorizer + DecisionTreeClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.66      0.69       292
           1       0.62      0.68      0.65       242

    accuracy                           0.67       534
   macro avg       0.67      0.67      0.67       534
weighted avg       0.67      0.67      0.67       534

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.7
--------------------
Train - Mean Explained Variance - Recall: -0.32
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.69
--------------------
Test - Mean Explained Variance - Recall: -0.35
--------------------
Explained Variance: -0.32
--------------------
Accuracy: 0.67
------------

100%|██████████| 70/70 [00:00<00:00, 325500.31it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Resu




Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + MultinomialNB (Save_protocol=5).pkl
Loading Estimator.
Done splitting data into training and testing sets.
Training set shape: (4446,)
----------
Training set example:
This internship is for you if:
~~~~~~~~~~
Testing set shape: (593,)
----------
Testing set example:
General switchboard number +44 (0)207 801 3380.
~~~~~~~~~~
Validation set shape: (889,)
----------
Validation set example:
Effective ability to prioritize tasks and deliver on deadlines, with high performance standards and a commitm

 95%|█████████▌| 84/88 [1:08:30<00:53, 13.36s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.73      0.78       427
           1       0.47      0.61      0.53       166

    accuracy                           0.70       593
   macro avg       0.65      0.67      0.65       593
weighted avg       0.73      0.70      0.71       593

--------------------
Recall Best Score: 0.56
--------------------
Train - Mean Cross Validation Score: 0.68
--------------------
Train - Mean Cross Validation - Recall: 0.57
--------------------
Train - Mean Explained Variance - Recall: -0.56
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.55
--------------------
Test - Mean Explained Variance - Recall: -0.57
--------------------
Explained Variance: -0.46
--------------------
Accuracy: 0.7
--------------------
Bala

100%|██████████| 70/70 [00:00<00:00, 1151377.57it/s]


Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - FeatureUnion + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - FeatureUnion + MultinomialNB (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - FeatureUnion + MultinomialNB (Save_protocol=5).pkl
Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supe

 97%|█████████▋| 85/88 [1:08:36<00:33, 11.24s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - FeatureUnion + MultinomialNB
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.68      0.74       427
           1       0.43      0.63      0.51       166

    accuracy                           0.66       593
   macro avg       0.63      0.65      0.63       593
weighted avg       0.71      0.66      0.68       593

--------------------
Recall Best Score: 0.58
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.58
--------------------
Train - Mean Explained Variance - Recall: -0.58
--------------------
Test - Mean Cross Validation Score: 0.66
--------------------
Test - Mean Cross Validation - Recall: 0.54
--------------------
Test - Mean Explained Variance - Recall: -0.65
--------------------
Explained Variance: -0.59
--------------------
Accuracy: 0.66
--------------------
Balanc

100%|██████████| 70/70 [00:00<00:00, 1112126.06it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + LogisticRegression (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + LogisticRegression (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Su

 98%|█████████▊| 86/88 [1:08:39<00:17,  8.89s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - CountVectorizer + LogisticRegression
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.45      0.59       383
           1       0.38      0.84      0.52       154

    accuracy                           0.56       537
   macro avg       0.63      0.64      0.56       537
weighted avg       0.73      0.56      0.57       537

--------------------
Recall Best Score: 0.8
--------------------
Train - Mean Cross Validation Score: 0.57
--------------------
Train - Mean Cross Validation - Recall: 0.83
--------------------
Train - Mean Explained Variance - Recall: -0.57
--------------------
Test - Mean Cross Validation Score: 0.56
--------------------
Test - Mean Cross Validation - Recall: 0.81
--------------------
Test - Mean Explained Variance - Recall: -0.58
--------------------
Explained Variance: -0.56
--------------------
Accuracy: 0.56
--------------------

100%|██████████| 70/70 [00:00<00:00, 1044844.41it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - TfidfVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_data - Warmth - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - TfidfVectorizer + AdaBoostClassifier (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Su

 99%|█████████▉| 87/88 [1:12:51<01:21, 81.59s/it]

~~~~~~~~~~~~~~~~~~~~
 Testing Metrics for Warmth - TfidfVectorizer + AdaBoostClassifier
~~~~~~~~~~~~~~~~~~~~
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.54      0.66       383
           1       0.40      0.77      0.53       154

    accuracy                           0.61       537
   macro avg       0.63      0.66      0.59       537
weighted avg       0.72      0.61      0.62       537

--------------------
Recall Best Score: 0.71
--------------------
Train - Mean Cross Validation Score: 0.67
--------------------
Train - Mean Cross Validation - Recall: 0.76
--------------------
Train - Mean Explained Variance - Recall: -0.43
--------------------
Test - Mean Cross Validation Score: 0.61
--------------------
Test - Mean Cross Validation - Recall: 0.7
--------------------
Test - Mean Explained Variance - Recall: -0.63
--------------------
Explained Variance: -0.59
--------------------
Accuracy: 0.61
--------------------

100%|██████████| 70/70 [00:00<00:00, 375929.94it/s]

Loading table from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Supervised Estimators Table.pkl
Done loading table!
~~~~~~~~~~~~~~~~~~~~
Loading data for Warmth - CountVectorizer + Perceptron
~~~~~~~~~~~~~~~~~~~~
Loading df_train_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_train_data - Warmth - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading SearchCV from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised SearchCV - Warmth - CountVectorizer + Perceptron (Save_protocol=5).pkl





Loading df_cv_results from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_cv_results - Warmth - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_test_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_test_data - Warmth - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading Grid Search from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised Grid Search - Warmth - CountVectorizer + Perceptron (Save_protocol=5).pkl
Loading df_val_data from /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/classification models/Supervised Results/Search+Xy/Supervised df_val_d

100%|██████████| 88/88 [1:12:55<00:00, 49.72s/it]

Done splitting data into training and testing sets.
Training set shape: (4023,)
----------
Training set example:
Analysis (technical, quantitative and qualitative) of multiple sources of information (commercial Intelligence, OSINT, community, **ISACs sharing) to provide timely, actionable intelligence and reporting.
~~~~~~~~~~
Testing set shape: (537,)
----------
Testing set example:
Experience with agile development practices, particularly owning and running specific agile events such as backlog refinement and sprint reviews Knowledge of multi channel supply chain processes, preferable in a retail context.
~~~~~~~~~~
Validation set shape: (805,)
----------
Validation set example:
Duties and responsibilities: Handling incoming phone calls and emails from the website users;Acting as an intermediary between the customers and accommodations;Managing reservations, special requests, and complaints and finding solutions to website users inquiries.
~~~~~~~~~~
Training data class weights:
Rati


