In [167]:
import os # isort:skip # fmt:skip # noqa # nopep8
import sys # isort:skip # fmt:skip # noqa # nopep8
from pathlib import Path # isort:skip # fmt:skip # noqa # nopep8

mod = sys.modules[__name__]

code_dir = None
code_dir_name = 'Code'
unwanted_subdir_name = 'Analysis'

for _ in range(5):

    parent_path = str(Path.cwd().parents[_]).split('/')[-1]

    if (code_dir_name in parent_path) and (unwanted_subdir_name not in parent_path):

        code_dir = str(Path.cwd().parents[_])

        if code_dir is not None:
            break

sys.path.append(code_dir)
# %load_ext autoreload
# %autoreload 2


In [168]:
from setup_module.imports import *  # isort:skip # fmt:skip # noqa # nopep8


In [169]:
# Transformer variables
method = 'BERT'
results_save_path = f'{models_save_path}{method} Results/'
t = time.time()
n_jobs = -1
n_splits = 10
n_repeats = 3
random_state = 42
refit = True
class_weight = 'balanced'
cv = RepeatedStratifiedKFold(
    n_splits=n_splits, n_repeats=n_repeats, random_state=random_state
)
scoring = 'recall'
scores = [
    'recall', 'accuracy', 'f1', 'roc_auc',
    'explained_variance', 'matthews_corrcoef'
]
scorers = {
    'precision_score': make_scorer(precision_score),
    'recall_score': make_scorer(recall_score),
    'accuracy_score': make_scorer(accuracy_score),
}
analysis_columns = ['Warmth', 'Competence']
text_col = 'Job Description spacy_sentencized'
metrics_dict = {
    'Mean Cross Validation Train Score': np.nan,
    f'Mean Cross Validation Train - {scoring.title()}': np.nan,
    f'Mean Explained Train Variance - {scoring.title()}': np.nan,
    'Mean Cross Validation Test Score': np.nan,
    f'Mean Cross Validation Test - {scoring.title()}': np.nan,
    f'Mean Explained Test Variance - {scoring.title()}': np.nan,
    'Explained Variance': np.nan,
    'Accuracy': np.nan,
    'Balanced Accuracy': np.nan,
    'Precision': np.nan,
    'Recall': np.nan,
    'F1-score': np.nan,
    'Matthews Correlation Coefficient': np.nan,
    'Fowlkes–Mallows Index': np.nan,
    'ROC': np.nan,
    'AUC': np.nan,
    f'{scoring.title()} Best Threshold': np.nan,
    f'{scoring.title()} Best Score': np.nan,
    'Log Loss/Cross Entropy': np.nan,
    'Cohen’s Kappa': np.nan,
    'Geometric Mean': np.nan,
    'Classification Report': np.nan,
    'Imbalanced Classification Report': np.nan,
    'Confusion Matrix': np.nan,
    'Normalized Confusion Matrix': np.nan
}

# Transformer variables
max_length = 512
returned_tensor = 'pt'
cpu_counts = torch.multiprocessing.cpu_count()
device = torch.device('mps') if torch.has_mps and torch.backends.mps.is_built() and torch.backends.mps.is_available(
) else torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device_name = str(device.type)
print(f'Using {device_name.upper()}')
# Set random seed
random.seed(random_state)
np.random.seed(random_state)
torch.manual_seed(random_state)
DetectorFactory.seed = random_state
cores = multiprocessing.cpu_count()
bert_model_name = 'bert-base-uncased'
bert_tokenizer = BertTokenizerFast.from_pretrained(
    bert_model_name, strip_accents=True
)
bert_model = BertForSequenceClassification.from_pretrained(
    bert_model_name
).to(device)
accelerator = Accelerator()
optimizer = AdamW(bert_model.parameters(), lr=3e-5)

# Plotting variables
pp = pprint.PrettyPrinter(indent=4)
tqdm.tqdm.pandas(desc='progress-bar')
tqdm_auto.tqdm.pandas(desc='progress-bar')
tqdm.notebook.tqdm().pandas(desc='progress-bar')
tqdm_auto.notebook_tqdm().pandas(desc='progress-bar')
# pbar = progressbar.ProgressBar(maxval=10)
mpl.use('MacOSX')
mpl.style.use(f'{code_dir}/setup_module/apa.mplstyle-main/apa.mplstyle')
mpl.rcParams['text.usetex'] = True
font = {'family': 'arial', 'weight': 'normal', 'size': 10}
mpl.rc('font', **font)
plt.style.use('tableau-colorblind10')
plt.set_cmap('Blues')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 5000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)
pd.set_option('display.float_format', '{:.2f}'.format)


Using MPS


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

0it [00:00, ?it/s]

0it [00:00, ?it/s]

# Functions

In [170]:
def show_and_close_plots():
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()


In [171]:
def close_plots():
    plt.clf()
    plt.cla()
    plt.close()


In [172]:
def class_weights_print_Xy(
    X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
    X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
    X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
):
    # Check for consistent length
    check_consistent_length(X_train, y_train, X_train_bert_encodings['input_ids'], y_train_bert_encoded, bert_train_dataset)
    check_consistent_length(X_test, y_test, X_test_bert_encodings['input_ids'], y_test_bert_encoded, bert_test_dataset)
    check_consistent_length(
        X_val, y_val, X_val_bert_encodings['input_ids'], y_val_bert_encoded, bert_val_dataset)

    # Get train class weights
    train_class_weights = compute_class_weight(class_weight = class_weight, classes = np.unique(y_train), y = y_train)
    train_class_weights_ratio = train_class_weights[0]/train_class_weights[1]
    train_class_weights_dict = dict(zip(np.unique(y_train), train_class_weights))

    # Get train class weights
    test_class_weights = compute_class_weight(class_weight = class_weight, classes = np.unique(y_train), y = y_test)
    test_class_weights_ratio = test_class_weights[0]/test_class_weights[1]
    test_class_weights_dict = dict(zip(np.unique(y_test), test_class_weights))

    print('Done encoding training, testing, and validation sets.')
    print('='*20)
    print(f'Training set shape: {y_train.shape}')
    print('-'*10)
    print(f'Training set example:\n{X_train[0]}')
    print('-'*10)
    print(f'Training set BERT encodings example:\n{" ".join(bert_train_dataset.encodings[0].tokens[:30])}')
    print('-'*10)
    print(f'Training labels after BERT encoding: {set(y_train_bert_encoded)}')
    print('~'*10)
    print(f'Testing set shape: {y_test.shape}')
    print('-'*10)
    print(f'Testing set example:\n{X_test[0]}')
    print('-'*10)
    print(f'Testing set BERT encodings example:\n{" ".join(bert_test_dataset.encodings[0].tokens[:30])}')
    print('-'*10)
    print(f'Testing labels after BERT encoding: {set(y_test_bert_encoded)}')
    print('~'*10)
    print(f'Validation set shape: {y_val.shape}')
    print('-'*10)
    print(f'Validation set example:\n{X_val[0]}')
    print('-'*10)
    print(f'Validation set BERT encodings example:\n{" ".join(bert_val_dataset.encodings[0].tokens[:30])}')
    print('-'*10)
    print(f'Validation labels after BERT encoding: {set(y_val_bert_encoded)}')
    print('~'*10)
    print(f'Training data class weights:\nRatio = {train_class_weights_ratio:.2f} (0 = {train_class_weights[0]:.2f}, 1 = {train_class_weights[1]:.2f})')
    print('-'*10)
    print(f'Testing data class weights:\nRatio = {test_class_weights_ratio:.2f} (0 = {test_class_weights[0]:.2f}, 1 = {test_class_weights[1]:.2f})')
    print('='*20)

    return (
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
    )


In [173]:
def split_data(df, col, analysis_columns, text_col=text_col):

    train_ratio = 0.75
    test_ratio = 0.10
    validation_ratio = 0.15
    test_split = test_size = 1 - train_ratio
    validation_split = test_ratio / (test_ratio + validation_ratio)

    # Split
    print('='*20)
    print('Splitting data into training, testing, and validation sets:')
    print(f'Ratios: train_size = {train_ratio}, test size = {test_ratio}, validation size = {validation_ratio}')

    df = df.dropna(subset=analysis_columns, how='any')
    df = df.reset_index(drop=True)

    train, test = train_test_split(
        df, train_size = 1-test_split, test_size = test_split, random_state=random_state
    )

    val, test = train_test_split(
        test, test_size=validation_split, random_state=random_state
    )

    X_train = np.array(list(train[text_col].astype('str').values))
    y_train = column_or_1d(train[col].astype('int64').values.tolist(), warn=True)

    X_test = np.array(list(test[text_col].astype('str').values))
    y_test = column_or_1d(test[col].astype('int64').values.tolist(), warn=True)

    X_val = np.array(list(val[text_col].astype('str').values))
    y_val = column_or_1d(val[col].astype('int64').values.tolist(), warn=True)

    print('Done splitting data into training, testing, and validation sets.')

    return (
        train, X_train, y_train,
        test, X_test, y_test,
        val, X_val, y_val,
    )


In [174]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, encoded):
        self.encodings = encodings
        self.encoded = encoded

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.encoded[idx], device=device)
        return item

    def __len__(self):
        return len(self.encoded)


In [175]:
def encode_data(df, col, analysis_columns, text_col=text_col):

    # Split
    (
        train, X_train, y_train,
        test, X_test, y_test,
        val, X_val, y_val,
    ) = split_data(
        df=df_manual, col=col, analysis_columns=analysis_columns, text_col=text_col
    )

    print('='*20)
    print(f'Encoding training, testing, and validation sets with {bert_tokenizer.__class__.__name__}.from_pretrained using {bert_tokenizer.name_or_path}.')

    bert_label2id = {label: id_ for id_, label in enumerate(set(y_train))}
    bert_id2label = {id_: label for label, id_ in bert_label2id.items()}

    X_train_bert_encodings = bert_tokenizer(
        X_train.tolist(), truncation=True, padding=True, max_length=max_length, return_tensors=returned_tensor
    ).to(device)
    y_train_bert_encoded = [bert_label2id[y] for y in y_train]
    bert_train_dataset = MyDataset(X_train_bert_encodings, y_train_bert_encoded)

    X_test_bert_encodings = bert_tokenizer(
        X_test.tolist(), truncation=True, padding=True, max_length=max_length, return_tensors=returned_tensor
    ).to(device)
    y_test_bert_encoded = [bert_label2id[y] for y in y_test]
    bert_test_dataset = MyDataset(X_test_bert_encodings, y_test_bert_encoded)

    X_val_bert_encodings = bert_tokenizer(
        X_val.tolist(), truncation=True, padding=True, max_length=max_length, return_tensors=returned_tensor
    ).to(device)
    y_val_bert_encoded = [bert_label2id[y] for y in y_val]
    bert_val_dataset = MyDataset(X_val_bert_encodings, y_val_bert_encoded)

    (
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
    ) = class_weights_print_Xy(
        X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
        X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
        X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
    )

    return (
        train, X_train, X_train_bert_encodings, y_train, y_train_bert_encoded, bert_train_dataset,
        test, X_test, X_test_bert_encodings, y_test, y_test_bert_encoded, bert_test_dataset,
        val, X_val, X_val_bert_encodings, y_val, y_val_bert_encoded, bert_val_dataset,
        bert_label2id, bert_id2label,
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
    )


In [176]:
def load_Xy(
    col, results_save_path=results_save_path, method=method, protocol=None, path_suffix=None, data=None,
):
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if path_suffix is None:
        path_suffix = f' - {str(col)} - {vectorizer_name} + {classifier_name} (Save_protocol={protocol}).pkl'
    if data_dict is None:
        data_dict = {}

    print(f'Loading Xy from previous for {col}...')
    # Read all dfs into
    for file_path in glob.glob(f'{results_save_path}*'):
        file_name = file_path.split(f'{results_save_path}{method} ')[-1].split(path_suffix)[0]
        if 'df_' in file_name and 'cv_results' not in file_name and classifier_name not in ignore_classifiers_list:
            data_dict[file_name] = pd.read_pickle(file_path)

    try:
        # Train data
        df_train_data = data_dict['df_train_data']
        X_train = df_train_data['X_train'].values
        y_train = df_train_data['y_train'].values
        X_train_bert_encodings = df_train_data['X_train_bert_encodings'].values
        y_train_bert_encoded = df_train_data['y_train_bert_encoded'].values
        bert_train_dataset = df_train_data['bert_train_dataset'].values
        # Test data
        df_test_data = data_dict['df_test_data']
        X_test = df_test_data['X_test'].values
        y_test = df_test_data['y_test'].values
        X_test_bert_encodings = df_test_data['X_test_bert_encodings'].values
        y_test_bert_encoded = df_test_data['y_test_bert_encoded'].values
        bert_test_dataset = df_test_data['bert_test_dataset'].values
        # Val data
        df_val_data = data_dict['df_val_data']
        X_val = df_val_data['X_val'].values
        y_val = df_val_data['y_val'].values
        X_val_bert_encodings = df_val_data['X_val_bert_encodings'].values
        y_val_bert_encoded = df_val_data['y_val_bert_encoded'].values
        bert_val_dataset = df_val_data['bert_val_dataset'].values
        # Labels
        df_labels = data_dict['df_labels']
        bert_label2id = df_labels['bert_label2id'].values
        bert_id2label = df_labels['bert_id2label'].values 

        print(f'Done loading Xy from previous for {col}!')

        # Get class weights and print info
        (
            train_class_weights, train_class_weights_ratio, train_class_weights_dict,
            test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
        ) = class_weights_print_Xy(
            X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
            X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
            X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
        )

        return (
            X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
            X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
            X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
            bert_label2id, bert_id2label, 
            train_class_weights, train_class_weights_ratio, train_class_weights_dict,
            test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
        )
    except Exception:
        print(f'Error loading Xy from previous for {col}!')
        return None


In [177]:
def compute_metrics_with_y_pred(
    y_test, y_test_pred,
    pos_label=None, labels=None, zero_division=None, alpha=None
):
    if pos_label is None:
        pos_label = 1
    if labels is None:
        labels = np.unique(y_test_pred)
    if zero_division is None:
        zero_division = 0
    if alpha is None:
        alpha = 0.1

    # Using y_pred
    explained_variance = metrics.explained_variance_score(y_test, y_test_pred)
    accuracy = metrics.accuracy_score(y_test, y_test_pred)
    balanced_accuracy = metrics.balanced_accuracy_score(y_test, y_test_pred)
    precision = metrics.precision_score(y_test, y_test_pred, pos_label=pos_label, labels=labels, zero_division=zero_division)
    recall = metrics.recall_score(y_test, y_test_pred, pos_label=pos_label, labels=labels, zero_division=zero_division)
    f1 = metrics.f1_score(y_test, y_test_pred, pos_label=pos_label, labels=labels, zero_division=zero_division)
    mcc = metrics.matthews_corrcoef(y_test, y_test_pred)
    fm = metrics.fowlkes_mallows_score(y_test, y_test_pred)
    kappa = metrics.cohen_kappa_score(y_test, y_test_pred, labels=labels)
    gmean_iba = imblearn.metrics.make_index_balanced_accuracy(alpha=alpha, squared=True)(geometric_mean_score)
    gmean = gmean_iba(y_test, y_test_pred)
    report = metrics.classification_report(y_test, y_test_pred, labels=labels, zero_division=zero_division)
    imblearn_report = classification_report_imbalanced(y_test, y_test_pred, labels=labels, zero_division=zero_division)
    cm = metrics.confusion_matrix(y_test, y_test_pred, labels=labels)
    cm_normalized = metrics.confusion_matrix(y_test, y_test_pred, normalize='true', labels=labels)

    return (
        explained_variance, accuracy, balanced_accuracy, precision,
        recall, f1, mcc, fm, kappa, gmean, report, imblearn_report, cm, cm_normalized
    )


In [178]:
def plot_metrics_with_y_pred(
    y_test, y_test_pred, col, vectorizer_name, classifier_name,
    pos_label=None, labels=None
):
    if pos_label is None:
        pos_label = 1
    if labels is None:
        labels = np.unique(y_test_pred)

    # Displays
    close_plots()
    cm_curve = metrics.ConfusionMatrixDisplay.from_predictions(
        y_test, y_test_pred, display_labels=labels, cmap=plt.cm.Blues
    )
    cm_normalized_curve = metrics.ConfusionMatrixDisplay.from_predictions(
        y_test, y_test_pred, normalize='true', display_labels=labels, cmap=plt.cm.Blues
    )
    roc_curve = metrics.RocCurveDisplay.from_predictions(
        y_test, y_test_pred, pos_label=pos_label
    )
    pr_curve = metrics.PrecisionRecallDisplay.from_predictions(
        y_test, y_test_pred, pos_label=pos_label
    )
    calibration_curve = CalibrationDisplay.from_predictions(
        y_test, y_test_pred, pos_label=pos_label
    )
    show_and_close_plots()

    # Plots
    plots_dict = {
        'Confusion Matrix': cm_curve,
        'Normalized Confusion Matrix': cm_normalized_curve,
        'ROC Curve': roc_curve,
        'Precision-Recall Curve': pr_curve,
        'Calibration Curve': calibration_curve,
    }

    print('=' * 20)
    close_plots()
    print('Plotting metrics with y_pred_prob:')
    print('='*20)

    for plot_name, plot_ in plots_dict.items():
        close_plots()
        print(f'Plotting {plot_name}:')
        fig, ax = plt.subplots()
        ax.set_title(
            f'{str(col)} - {plot_name} - {vectorizer_name} + {classifier_name}'
            )
        if plot_name == 'ROC Curve':
            ax.plot([0, 1], [0, 1], 'r--', lw=1)
        try:
            plot_.plot(ax=ax, cmap=plt.cm.Blues)
        except Exception:
            plot_.plot(ax=ax)
        print('=' * 20)

        # Save Plots
        print(f'Saving {plot_name}...')
        for image_save_format in ['eps', 'png', 'svg']:
            plt.savefig(
                f'{plot_save_path}{method} {str(col)} - {plot_name} - {vectorizer_name} + {classifier_name}.{image_save_format}',
                format=image_save_format, dpi=3000, bbox_inches='tight'
            )
        show_and_close_plots()
        print(f'Saved {plot_name}!')
        print('=' * 20)

    # Visualisation with plot_metric
    bc = plot_metric.functions.BinaryClassification(y_test, y_test_pred, labels=[0, 1])

    # Figures
    close_plots()
    fig = plt.figure(figsize=(15, 10))
    plt.subplot2grid((2, 6), (1, 1), colspan=2)
    bc.plot_confusion_matrix(colorbar=True)
    plt.subplot2grid((2, 6), (1, 3), colspan=2)
    bc.plot_confusion_matrix(normalize=True, colorbar=True)
    plt.subplot2grid(shape=(2, 6), loc=(0, 0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2, 6), (0, 2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2, 6), (0, 4), colspan=2)
    bc.plot_class_distribution()
    bc.print_report()
    for image_save_format in ['eps', 'png', 'svg']:
        plt.savefig(
            f'{plot_save_path}{method} {str(col)} - plot_metric Curves - {vectorizer_name} + {classifier_name}.{image_save_format}',
            format=image_save_format,
            dpi=3000, bbox_inches='tight'
        )
    show_and_close_plots()

    # Heatmap
    print('Plotting Heatmap:')
    close_plots()
    classifications_dict = defaultdict(int)
    for _y_test, _y_test_pred in zip(y_test, y_test_pred):
        if _y_test != _y_test_pred:
            classifications_dict[(_y_test, _y_test_pred)] += 1

    dicts_to_plot = [
        {
            f'True {str(col)} value': _y_test,
            f'Predicted {str(col)} value': _y_test_pred,
            'Number of Classifications': _count,
        }
        for (_y_test, _y_test_pred), _count in classifications_dict.items()
    ]
    df_to_plot = pd.DataFrame(dicts_to_plot)
    df_wide = df_to_plot.pivot_table(
        index=f'True {str(col)} value', 
        columns=f'Predicted {str(col)} value', 
        values='Number of Classifications'
    )
    plt.figure(figsize=(9,7))
    sns.set(style='ticks', font_scale=1.2)
    sns.heatmap(df_wide, linewidths=1, cmap=plt.cm.Blues, annot=True)    
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.suptitle(f'{str(col)} Heatmap - {vectorizer_name} + {classifier_name}')
    print('Saving Heatmap...')
    for image_save_format in ['eps', 'png', 'svg']:
        plt.savefig(
            f'{plot_save_path}{method} {str(col)} - Heatmap - {vectorizer_name} + {classifier_name}.{image_save_format}',
            format=image_save_format,
            dpi=3000, bbox_inches='tight'
        )
    print('Saved Heatmap!')
    show_and_close_plots()



In [179]:
def compute_metrics_with_y_pred_prob(
    y_test, y_test_pred_prob,
    pos_label=None
):
    if pos_label is None:
        pos_label = 1

    # Using y_pred_prob
    average_precision = metrics.average_precision_score(y_test, y_test_pred_prob)
    roc_auc = metrics.roc_auc_score(y_test, y_test_pred_prob)
    fpr, tpr, threshold = metrics.roc_curve(y_test, y_test_pred_prob, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    loss = metrics.log_loss(y_test, y_test_pred_prob)
    precision_pr, recall_pr, threshold_pr = metrics.precision_recall_curve(y_test, y_test_pred_prob, pos_label=1)

    return (
        average_precision, roc_auc, auc,
        fpr, tpr, threshold,loss,
        precision_pr, recall_pr, threshold_pr
    )


In [180]:
def compute_metrics(
    predicted_results,
    with_y_pred=None,
    with_y_pred_prob=None
):
    if with_y_pred is None:
        with_y_pred = True
    if with_y_pred_prob is None:
        with_y_pred_prob = True

    # Get predictions
    print(f'Getting prediction results for {col}.')
    y_test_pred_prob, y_test_pred, metrics_dict, = predicted_results
    # for metric_name, metric_value in metrics_dict.items():
    #     if 'loss' not in metric_name:
    #         metrics_dict[f'{metric_name.split("test_")[1]}'] = metrics_dict.pop(metric_name)
    #     else:
    #         metrics_dict['Log Loss/Cross Entropy'] = metrics_dict.pop(metric_name)
    print(f'Predictions shape for {col}: {y_test_pred.shape}')
    print('-'*20)

    # Get y_test_pred
    print('-'*20)
    print(f'Find argmax and flattening y_test_pred for {col}')
    y_test_pred = [bert_label2id[l] for l in torch.tensor(y_test_pred, device=device).argmax(axis=-1).clone().detach().flatten().tolist()]
    print(f'Length of y_test_pred: {len(y_test_pred)}')
    print('-'*20)

    # Get y_test_pred_prob
    print('-'*20)
    print(f'Find softmax and flattening y_test_pred for {col}')
    try:
        y_test_pred_prob = torch.nn.functional.softmax(torch.tensor(
            y_test_pred_prob[:, 1], device=device), dim=-1).clone().detach()
        print('Using torch.nn.functional.softmax')
    except Exception:
        y_test_pred_prob = scipy.special.softmax(
            y_test_pred_prob[:, 1], axis=-1)
        print('Using scipy.special.softmax')
    finally:
        y_test_pred_prob = y_test_pred_prob.flatten().tolist()
    print(f'Length of y_test_pred_prob: {len(y_test_pred_prob)}')
    print('-'*20)

    # Get metrics
    print('='*20)
    # Using y_test_pred
    if with_y_pred:
        print('-'*20)
        print('Computing metrics using y_test_pred.')
        (
            explained_variance, accuracy, balanced_accuracy, precision,
            recall, f1, mcc, fm, kappa, gmean, report, imblearn_report, cm, cm_normalized
        ) = compute_metrics_with_y_pred(
            y_test, y_test_pred
        )
    # Using y_test_pred_prob
    if with_y_pred_prob:
        print('-'*20)
        print('Computing metrics using y_test_pred_prob.')
        (
            average_precision, roc_auc, auc,
            fpr, tpr, threshold, loss,
            precision_pr, recall_pr, threshold_pr
        ) = compute_metrics_with_y_pred_prob(
            y_test, y_test_pred_prob
        )

    # Place metrics into dict
    print('-'*20)
    print('Appending metrics to dict.')
    metrics_dict = {
        'Train - Mean Cross Validation Score': float(cv_train_scores),
        f'Train - Mean Cross Validation - {scoring.title()}': float(cv_train_recall),
        f'Train - Mean Explained Variance - {scoring.title()}': float(cv_train_explained_variance_recall),
        'Test - Mean Cross Validation Score': float(cv_test_scores),
        f'Test - Mean Cross Validation - {scoring.title()}': float(cv_test_recall),
        f'Test - Mean Explained Variance - {scoring.title()}': float(cv_test_explained_variance_recall),
        'Explained Variance': float(explained_variance),
        'Accuracy': float(accuracy),
        'Balanced Accuracy': float(balanced_accuracy),
        'Precision': float(precision),
        'Average Precision': float(average_precision),
        'Recall': float(recall),
        'F1-score': float(f1),
        'Matthews Correlation Coefficient': float(mcc),
        'Fowlkes–Mallows Index': float(fm),
        'ROC': float(roc_auc),
        'AUC': float(auc),
        f'{scoring.title()} Best Threshold': threshold,
        f'{scoring.title()} Best Score': float(best_train_score),
        'Log Loss/Cross Entropy': float(loss),
        'Cohen’s Kappa': float(kappa),
        'Geometric Mean': float(gmean),
        'Classification Report': report,
        'Imbalanced Classification Report': imblearn_report,
        'Confusion Matrix': cm,
        'Normalized Confusion Matrix': cm_normalized
    }

    return metrics_dict


In [181]:
def plot_metrics(
    estimator, X_test, y_test, y_test_pred, y_test_pred_prob,
    col, vectorizer_name, classifier_name, 
    with_y_pred=None, with_y_pred_prob=None
):
    if with_y_pred is None:
        with_y_pred = True
    if with_y_pred_prob is None:
        with_y_pred_prob = False

    # Plotting
    # Using y_test_pred
    if with_y_pred:
        (
            cm_curve, cm_normalized_curve, roc_curve, pr_curve, calibration_curve
        ) = plot_metrics_with_y_pred(
            y_test, y_test_pred,
            col, vectorizer_name, classifier_name, 
        )


In [182]:
def evaluation(
    metrics_dict, df_metrics,
    col, vectorizer_name, classifier_name
):

    # Print metrics
    print('=' * 20)
    print('~' * 20)
    print(' Metrics:')
    print('~' * 20)
    print(f'Classification Report:\n {metrics_dict["Classification Report"]}')
    print('-' * 20)
    for test_metric_name, metric_value in metrics_dict.items():
        if 'Threshold' not in metric_name and test_metric_name not in ['test_runtime', 'test_samples_per_second', 'test_steps_per_second']:
            metric_name = test_metric_name.split("test_")[1].replace('_', ' ').title()
            with contextlib.suppress(TypeError, ValueError):
                metric_value = float(metric_value)
            if isinstance(metric_name, (int, float)):
                print(f'{metric_name}: {round(metric_value, 2)}')
            else:
                print(f'{metric_name}: {metric_value}')
            print('-' * 20)

            # Fill Table DF
            if isinstance(metric_value, float):
                df_metrics.loc[
                    (classifier_name), (col, vectorizer_name, metric_name)
                ] = metric_value
            else:
                df_metrics.loc[
                    (classifier_name), (col, vectorizer_name, metric_name)
                ] = str(metric_value)

    print('=' * 20)

    # Plot Metrics
    plot_metrics(
        estimator, X_test, y_test, y_test_pred, y_test_pred_prob,
        col, vectorizer_name, classifier_name, 
    )

    return df_metrics


In [183]:
# Function to place Xy and CV data in df and save
def save_Xy_estimator(
    X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
    X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
    X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
    bert_label2id, bert_id2label,
    estimator,
    col, vectorizer_name, classifier_name,
    results_save_path=results_save_path,
    method=method, xy_save_path=None,
    compression=None, protocol=None,
    path_suffix=None, data=None
):

    if xy_save_path is None:
        xy_save_path = f'{results_save_path}Xy/'
    if compression is None:
        compression = False
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL
    if path_suffix is None:
        path_suffix = f' - {str(col)} - {vectorizer_name} + {classifier_name}.pkl'
    if data is None:
        data = {}

    # Make df_train_data
    df_train_data = pd.DataFrame(
        {
            'X_train': X_train,
            'y_train': y_train,
            'X_train_bert_encodings': X_train_bert_encodings,
            'y_train_bert_encoded': y_train_bert_encoded,
            'bert_train_dataset': bert_train_dataset,
            'y_train_pred': y_train_pred,
        },
    )
    # Make df_test_data
    df_test_data = pd.DataFrame(
        {
            'X_test': X_test,
            'y_test': y_test,
            'X_test_bert_encodings': X_test_bert_encodings,
            'y_test_bert_encoded': y_test_bert_encoded,
            'y_test_pred': y_test_pred,
            'y_test_pred_prob': y_test_pred_prob,
        },
    )
    # Make df_val_data
    df_val_data = pd.DataFrame(
        {
            'X_val': X_val,
            'y_val': y_val,
            'X_val_bert_encodings': X_val_bert_encodings,
            'y_val_bert_encoded': y_val_bert_encoded,
        },
    )
    # Make df_labels
    df_labels = pd.DataFrame(
        {
            'bert_label2id': bert_label2id,
            'bert_id2label': bert_id2label,
        },
    )

    # Make data dict
    data['df_train_data'] = df_train_data
    data['df_test_data'] = df_test_data
    data['df_val_data'] = df_val_data
    data['df_labels'] = df_labels
    data['Estimator'] = estimator

    # Save files
    print('='*20)
    print('Saving Xy, labels and estimator...')
    for file_name, file_ in data.items():
        if not isinstance(file_, pd.DataFrame) and file_name == 'Estimator' and 'df_' not in file_name:
            # Save as .model
            file_.save_model(f'{results_save_path}{method} {file_name}{path_suffix.split(".pkl")[0]}.model')
            # Save as .pkl
            with open(
                f'{results_save_path}{method} {file_name}{path_suffix}.model', 'wb'
            ) as f:
                joblib.dump(file_, f, compress=compression, protocol=protocol)
        elif isinstance(file_, pd.DataFrame) and file_name != 'Estimator' and 'df_' in file_name:
            file_.to_pickle(
                f'{xy_save_path}{method} {file_name}{path_suffix}', protocol=protocol
            )
    print(f'Done saving Xy, labels and estimator!\n{list(data.keys())}')
    print('='*20)


In [184]:
# Save Model
def save_table(
    df_metrics,
    col, vectorizer_name, classifier_name,
    results_save_path=results_save_path,
    table_save_path=table_save_path,
    method=method, save_name=None,
    compression=None, protocol=None,
):
    if save_name is None:
        save_name = 'Estimators Table'
    if compression is None:
        compression = False
    if protocol is None:
        protocol = pickle.HIGHEST_PROTOCOL

    # Save metrics df
    print(f'Saving fitted estimator and table for {vectorizer_name} + {classifier_name}.')
    df_metrics.to_csv(f'{table_save_path}{save_name}.csv')
    df_metrics.to_pickle(f'{table_save_path}{save_name}.pkl')
    df_metrics.to_excel(f'{table_save_path}{save_name}.xlsx')
    df_metrics.to_latex(f'{table_save_path}{save_name}.tex')
    df_metrics.to_markdown(f'{table_save_path}{save_name}.md')
    df_metrics.to_html(f'{table_save_path}{save_name}.html')

    print('Done saving fitted estimator and table!')


In [185]:
# Save Model
def get_completed_estimators(results_save_path=results_save_path, method=method):

    estimators_list = []

    for estimator_path in glob.glob(f'{results_save_path}{method} Estimator - *.model'):
        with open(estimator_path, 'rb') as f:
            estimators_list.append(joblib.load(f))

    return estimators_list


In [186]:
def comparison_plots(
    estimators_list, X_test, y_test, col,
    curves_dict=None, cmap=plt.cm.Blues
):

    curves_dict = {
        'ROC Curve': metrics.RocCurveDisplay,
        'Precision Recall Curve': metrics.PrecisionRecallDisplay,
        'Calibration Curve': metrics.CalibrationDisplay,
    }

    assert len(estimators_list) != 0

    for curve_name, curve_package in curves_dict.items():
        print('-' * 20)
        print(f'{str(curve_name)}: {str(col)}')
        fig, ax = plt.subplots()
        ax.set_title(f'{str(curve_name)}: {str(col)}')
        for estimator in estimators_list:
            curve = curve_package.from_estimator(
                estimator, X_test, y_test, pos_label=1, ax=ax,
                name=f'{estimator.steps[0][0]} + {estimator.steps[1][0]} + {estimator.steps[-1][0]}'
            )
        show_and_close_plots()

        # Save Plots
        print('Saving plots.')
        for image_save_format in ['eps', 'png', 'svg']:
            curve.figure_.savefig(
                f'{plot_save_path}{method} {str(col)} - All {str(curve_name)}s.{image_save_format}',
                format=image_save_format,
                dpi=3000, bbox_inches='tight'
            )


# Training

### READ DATA

In [187]:
df_manual = pd.read_pickle(f'{df_save_dir}df_manual_for_trainning.pkl').reset_index(drop=True)
# HACK REMOVE THIS!!!!!!
df_manual = df_manual.groupby(analysis_columns).sample(n=200).reset_index(drop = True)


In [192]:
%%time
print('#'*40)
print('Starting!')
print('#'*40)

analysis_columns = ['Warmth', 'Competence']
text_col = 'Job Description spacy_sentencized'

# Load Table DF
# df_metrics = pd.read_pickle(f'{table_save_path}Classifiers Table.pkl')

for col in tqdm.tqdm(analysis_columns):

    assert len(df_manual[df_manual[str(col)].map(df_manual[str(col)].value_counts() > 1)]) != 0, f'Dataframe has no {col} values!'
    print('-'*20)
    print(f'{"="*30} TRAINING DATASET OF LENGTH {len(df_manual)} ON {col.upper()} {"="*30}')
    print('-'*20)

    # Split
    (
        train, X_train, X_train_bert_encodings, y_train, y_train_bert_encoded, bert_train_dataset,
        test, X_test, X_test_bert_encodings, y_test, y_test_bert_encoded, bert_test_dataset,
        val, X_val, X_val_bert_encodings, y_val, y_val_bert_encoded, bert_val_dataset,
        bert_label2id, bert_id2label,
        train_class_weights, train_class_weights_ratio, train_class_weights_dict,
        test_class_weights_dict, test_class_weights_ratio, test_class_weights_dict
    ) = encode_data(
        df=df_manual, col=col, analysis_columns=analysis_columns, text_col=text_col
    )

    # Load pre-trained BERT model
    bert_model = BertForSequenceClassification.from_pretrained(
        bert_model_name, num_labels=len(bert_id2label)
    ).to(device)

    # Accelerate model
    (
        bert_model, bert_train_dataset, bert_test_dataset, bert_val_dataset
    ) = accelerator.prepare(
        bert_model, bert_train_dataset, bert_test_dataset, bert_val_dataset
    )
    # bert_model.eval()

    # Initialize BERT Trainer
    print('='*30)
    tokenizer_name = bert_tokenizer.__class__.__name__
    classifier_name = bert_model.__class__.__name__
    print(f'Initializing BERT Trainer using {tokenizer_name} + {classifier_name} for {col}')

    # Set BERT fine-tuning parameters
    bert_training_args = TrainingArguments(
        output_dir=f'{results_save_path}{method} Results',
        logging_dir=f'{results_save_path}{method} Logs',
        seed=random_state,
        torch_compile=True,
        use_mps_device=True if device_name == 'mps' else False,
        optim='adamw_torch',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=20,
        learning_rate=5e-5,
        warmup_steps=100,
        weight_decay=0.01,
        logging_steps=100,
        evaluation_strategy='steps',
    )

    # Pass data to trainer 
    print('-'*20)
    print('Passing arguments to estimator.')
    with joblib.parallel_backend(backend='loky', n_jobs=n_jobs):
        bert_model, bert_train_dataset, bert_test_dataset, bert_val_dataset = accelerator.prepare(bert_model, bert_train_dataset, bert_test_dataset, bert_val_dataset)
        estimator = Trainer(
            model=bert_model,
            tokenizer=bert_tokenizer,
            args=bert_training_args,
            train_dataset=bert_train_dataset,
            eval_dataset=bert_val_dataset,
            compute_metrics=compute_metrics,
        )
        if estimator.place_model_on_device:
            estimator.model.to(device)

        # Train trainer
        print('-'*20)
        print(f'Starting training for {col}.')
        estimator.train()
        print('Done training!')
        print('-'*20)

        # Save model
        print('-'*20)
        print(f'Saving model for {col}.')
        save_Xy_estimator(
            X_train, y_train, X_train_bert_encodings, y_train_bert_encoded, bert_train_dataset,
            X_test, y_test, X_test_bert_encodings, y_test_bert_encoded, bert_test_dataset,
            X_val, y_val, X_val_bert_encodings, y_val_bert_encoded, bert_val_dataset,
            bert_label2id, bert_id2label,
            estimator,
            col, vectorizer_name, classifier_name,
        )
        # estimator.save_model(f'{results_save_path}{method} Estimator {str(col)} - {tokenizer_name} + {classifier_name}.model')
        print('Done training!')
        print('-'*20)

        # Evaluate
        print('-'*20)
        print(f'Evaluating estimator for {col}.')
        estimator.evaluate()
        print('Done evaluating!')

        # Get predictions
        print(f'Getting prediction results for {col}.')
        y_test_pred_prob, y_test_pred, metrics_dict, = estimator.predict(bert_test_dataset)
        for metric_name, metric_value in metrics_dict.items():
            if 'loss' not in metric_name:
                metrics_dict[f'{metric_name.split("test_")[1]}'] = metrics_dict.pop(metric_name)
            else:
                metrics_dict['Log Loss/Cross Entropy'] = metrics_dict.pop(metric_name)
        print(f'Predictions shape for {col}: {y_test_pred.shape}')
        print('-'*20)

        # Get y_test_pred
        print('-'*20)
        print(f'Find argmax and flattening y_test_pred for {col}')
        y_test_pred = [bert_label2id[l] for l in torch.tensor(y_test_pred, device=device).argmax(axis=-1).clone().detach().flatten().tolist()]
        print(f'Length of y_test_pred: {len(y_test_pred)}')
        print('-'*20)

        # Get y_test_pred_prob
        print('-'*20)
        print(f'Find softmax and flattening y_test_pred for {col}')
        try:
            y_test_pred_prob = torch.nn.functional.softmax(torch.tensor(y_test_pred_prob[:, 1], device=device), dim=-1).clone().detach()
            print('Using torch.nn.functional.softmax')
        except Exception:
            y_test_pred_prob = scipy.special.softmax(y_test_pred_prob[:, 1], axis=-1)
            print('Using scipy.special.softmax')
        finally:
            y_test_pred_prob = y_test_pred_prob.flatten().tolist()
        print(f'Length of y_test_pred_prob: {len(y_test_pred_prob)}')
        print('-'*20)
    
    # # Examine predictions
    # print('-'*20)
    # print(f'Examining predictions for {col}')
    # print('Incorrectly Classified Reviews:')
    # for _y_test, _y_test_pred, _X_test in random.sample(list(zip(y_test, y_test_pred, X_test)), 20):
    #     if _y_test != _y_test_pred:
    #         print(f'TRUE LABEL: {_y_test}')
    #         print(f'PREDICTED LABEL: {_y_test_pred}')
    #         print(f'REVIEW TEXT: {_X_test[:100]}...')

    # # Evluate estimator
    # print('-'*20)
    # print(f'Probs evaluation and ploting metrics for {col}')
    # df_metrics = evaluation(predicted_results, df_metrics, col, tokenizer_name, classifier_name)
    # print()

    # # Save BERT Model
    # print('-'*20)
    # print(f'Saving estimator and metrics table for {col}')
    # save_table(df_metrics, estimator, col, tokenizer_name, classifier_name)
    # print()

    # # Compare Estimators
    # print('='*20)
    # print(f'Comparing estimators for {col}')
    # comparison_plots(get_completed_estimators().append(estimator), X_test, y_test, col)
    # print('='*20)
    # print()

print('#'*40)
print('DONE!')
print('#'*40)



########################################
Starting!
########################################


  0%|          | 0/2 [00:00<?, ?it/s]

--------------------
--------------------
Splitting data into training, testing, and validation sets:
Ratios: train_size = 0.75, test size = 0.1, validation size = 0.15
Done splitting data into training, testing, and validation sets.
Encoding training, testing, and validation sets with BertTokenizerFast.from_pretrained using bert-base-uncased.
Done encoding training, testing, and validation sets.
Training set shape: (600,)
----------
Training set example:
Automate) who will be responsible for preparation and execution of improvements working with the RPA and Microsoft Power
----------
Training set BERT encodings example:
[CLS] auto ##mate ) who will be responsible for preparation and execution of improvements working with the r ##pa and microsoft power [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]
----------
Training labels after BERT encoding: {0, 1}
~~~~~~~~~~
Testing set shape: (80,)
----------
Testing set example:
self-motivation and a clear dedication to your profession.
-------

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Initializing BERT Trainer using BertTokenizerFast + BertForSequenceClassification for Warmth
--------------------
Passing arguments to estimator.
--------------------
Starting training for Warmth.


  0%|          | 0/114 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}
  0%|          | 0/2 [02:03<?, ?it/s]

{'loss': 0.5578, 'learning_rate': 5e-05, 'epoch': 2.63}


  0%|          | 0/6 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}
  0%|          | 0/2 [02:13<?, ?it/s]

Getting prediction results for Warmth.





In [193]:
# Get y_test_pred_prob
print('-'*20)
print(f'Getting y_test_pred_prob for {col}')
try:
    y_test_pred_prob = torch.nn.functional.softmax(torch.tensor(predicted_results.predictions, device=device), dim=-1)
    print('Using torch.nn.functional.softmax')
except Exception:
    y_test_pred_prob = scipy.special.softmax(predicted_results.predictions, axis=1)
    print('Using scipy.special.softmax')
except Exception:
    y_test_pred_prob = predicted_results.predictions[:, 1]
    print('Using predicted_results.predictions[:, 1]')
finally:
    y_test_pred_prob = y_test_pred_prob.flatten().tolist()
print(f'Length of y_test_pred_prob: {len(y_test_pred_prob)}')

--------------------
Getting y_test_pred_prob for Warmth
Using torch.nn.functional.softmax
Length of y_test_pred_prob: 240


In [194]:
type(y_test)


numpy.ndarray

In [195]:
# Examine predictions
print('-'*20)
print(f'Examining predictions for {col}')
print('Correctly Classified Reviews:')
for y_test, _y_test_pred, _X_test in random.sample(list(zip(y_test, y_test_pred, X_test)), 20):
    if y_test == y_test_pred:
        print(f'LABEL: {y_test}')
        print(f'REVIEW TEXT: {_X_test[:100]}...')
        print('-'*20)
        print()

# print('Incorrectly Classified Reviews:')
# for y_test, y_test_pred, _X_test in random.sample(list(zip(y_test, y_test_pred, X_test)), 20):
#     if y_test != y_test_pred:
#         print(f'TRUE LABEL: {y_test}')
#         print(f'PREDICTED LABEL: {y_test_pred}')
#         print(f'REVIEW TEXT: {_X_test[:100]}...')
#         print()

--------------------
Examining predictions for Warmth
Correctly Classified Reviews:


In [196]:
metrics_dict

{'Log Loss/Cross Entropy': 0.4136017858982086,
 'Explained Variance': 0.21333333333333315,
 'Accuracy': 0.7875,
 'Balanced Accuracy': 0.8015873015873016,
 'Precision': 0.6956521739130435,
 'Recall': 0.9142857142857143,
 'F1-score': 0.7901234567901234,
 'Matthews Correlation Coefficient': 0.6052920249203222,
 'Fowlkes–Mallows Index': 0.6633173894335515,
 'Cohen’s Kappa': 0.5828220858895705,
 'Geometric Mean': 0.6298412698412699,
 'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.91      0.69      0.78        45\n           1       0.70      0.91      0.79        35\n\n    accuracy                           0.79        80\n   macro avg       0.80      0.80      0.79        80\nweighted avg       0.82      0.79      0.79        80\n',
 'Imbalanced Classification Report': '                   pre       rec       spe        f1       geo       iba       sup\n\n          0       0.91      0.69      0.91      0.78      0.79      0.62        

In [197]:
# Get y_test_pred_proba
print('-'*20)
print(f'Getting y_test_pred_prob for {col}')
try:
    y_test_pred_prob = torch.nn.functional.softmax(torch.tensor(predicted_results.predictions, device=device), dim=-1)
    print('Using torch.nn.functional.softmax')
except Exception:
    y_test_pred_prob = scipy.special.softmax(predicted_results.predictions, axis=1)
    print('Using scipy.special.softmax')
except Exception:
    y_test_pred_prob = predicted_results.predictions[:, 1]
    print('Using predicted_results.predictions[:, 1]')
finally:
    y_test_pred_prob = y_test_pred_prob.flatten().tolist()
print(f'Length of y_test_pred_prob: {len(y_test_pred_prob)}')



--------------------
Getting y_test_pred_prob for Warmth
Using torch.nn.functional.softmax
Length of y_test_pred_prob: 240


In [198]:
predicted_results.metrics

{'test_loss': 0.40211421251296997,
 'test_Explained Variance': 0.32441565756125046,
 'test_Accuracy': 0.825,
 'test_Balanced Accuracy': 0.811743170937764,
 'test_Precision': 0.7948717948717948,
 'test_Recall': 0.9253731343283582,
 'test_F1-score': 0.8551724137931035,
 'test_Matthews Correlation Coefficient': 0.6491279867142219,
 'test_Fowlkes–Mallows Index': 0.7215432668699364,
 'test_Cohen’s Kappa': 0.6373056994818653,
 'test_Geometric Mean': 0.6460152069839482,
 'test_Classification Report': '              precision    recall  f1-score   support\n\n           0       0.88      0.70      0.78        53\n           1       0.79      0.93      0.86        67\n\n    accuracy                           0.82       120\n   macro avg       0.84      0.81      0.82       120\nweighted avg       0.83      0.82      0.82       120\n',
 'test_Imbalanced Classification Report': '                   pre       rec       spe        f1       geo       iba       sup\n\n          0       0.88      0.70  

In [199]:
# Evluate estimator
print('-'*20)
print(f'Probs evaluation and ploting metrics for {col}')
df_metrics = evaluation(predicted_results, df_metrics, col, classifier_name, tokenizer_name)


--------------------
Probs evaluation and ploting metrics for Warmth


In [200]:
estimator.is_model_parallel


False

In [201]:
estimator.args.n_gpu

1

In [202]:
df_manual['Bert Preditcions'] = df_manual['Job Description spacy_sentencized'].progress_apply(
    lambda sentence: estimator.predict(sent)
    for sent in sentence
    if sent and isinstance(sent, (str, list)) and isinstance(sentence, list)
)

In [203]:
metrics_dict = estimator.evaluate()


  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}


Getting prediction results for Warmth.


In [204]:
metrics_dict


{'Log Loss/Cross Entropy': 0.4136017858982086,
 'Explained Variance': 0.21333333333333315,
 'Accuracy': 0.7875,
 'Balanced Accuracy': 0.8015873015873016,
 'Precision': 0.6956521739130435,
 'Recall': 0.9142857142857143,
 'F1-score': 0.7901234567901234,
 'Matthews Correlation Coefficient': 0.6052920249203222,
 'Fowlkes–Mallows Index': 0.6633173894335515,
 'Cohen’s Kappa': 0.5828220858895705,
 'Geometric Mean': 0.6298412698412699,
 'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.91      0.69      0.78        45\n           1       0.70      0.91      0.79        35\n\n    accuracy                           0.79        80\n   macro avg       0.80      0.80      0.79        80\nweighted avg       0.82      0.79      0.79        80\n',
 'Imbalanced Classification Report': '                   pre       rec       spe        f1       geo       iba       sup\n\n          0       0.91      0.69      0.91      0.78      0.79      0.62        

In [205]:
predicted_results = estimator.predict(bert_val_dataset)

  item = {key: torch.tensor(val[idx], device=device) for key, val in self.encodings.items()}


Getting prediction results for Warmth.


In [206]:
predicted_results.metrics


{'test_loss': 0.40211421251296997,
 'test_Explained Variance': 0.32441565756125046,
 'test_Accuracy': 0.825,
 'test_Balanced Accuracy': 0.811743170937764,
 'test_Precision': 0.7948717948717948,
 'test_Recall': 0.9253731343283582,
 'test_F1-score': 0.8551724137931035,
 'test_Matthews Correlation Coefficient': 0.6491279867142219,
 'test_Fowlkes–Mallows Index': 0.7215432668699364,
 'test_Cohen’s Kappa': 0.6373056994818653,
 'test_Geometric Mean': 0.6460152069839482,
 'test_Classification Report': '              precision    recall  f1-score   support\n\n           0       0.88      0.70      0.78        53\n           1       0.79      0.93      0.86        67\n\n    accuracy                           0.82       120\n   macro avg       0.84      0.81      0.82       120\nweighted avg       0.83      0.82      0.82       120\n',
 'test_Imbalanced Classification Report': '                   pre       rec       spe        f1       geo       iba       sup\n\n          0       0.88      0.70  