In this code we are going to handle the predictions of the test set for the different classifiers. This will also return the predicted probabilities so that we can compute the AUC of the different classifiers addressing one of the requirements of the PLOS One Reviewers.

# User Level Transformer

In [1]:
# Helper Function
import torch, numpy as np, random, os,  pandas as pd
from tqdm.notebook import tqdm, trange

def predict(model, data_loader):
    #model.eval()
    torch.no_grad()
    resultsDF_list = {'user_id': [], 'true_label': [], 'predicted_label':[], 'proba_class_1': [] }
    with torch.no_grad():   
        epoch_iterator = tqdm(data_loader, desc="Predicting")
        for step, batch in enumerate(epoch_iterator):        
            # Get Batch Elements
            batched_ids, user_embeddings, labels = batch['batched_ids'], batch['user_embeddings'].to(device), batch['labels']
            
            # Predict
            logits = model(user_embeddings) 
            _, preds = torch.max(logits, dim=1)
            proba_class1 = torch.softmax(logits, dim = 1)[:, 1]  
            
            # Compile results 
            resultsDF_list['user_id'].extend(batched_ids)
            if labels is None:
                resultsDF_list['true_label'].extend([None] * len(batched_ids))
            else:
                resultsDF_list['true_label'].extend(list(labels.data.numpy()))
            resultsDF_list['predicted_label'].extend(list(preds.data.cpu().numpy()))
            resultsDF_list['proba_class_1'].extend(list(proba_class1.data.cpu().numpy()))            

    resultsDF = pd.DataFrame(resultsDF_list)
    return resultsDF

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [2]:
from torch.utils.data import Dataset

class EmbeddingDataset(Dataset):
    '''
    Dataset used when working with pretrained user embeddings resulting from a User_Stance_Classifier
    '''
    def __init__(self, user_labels_file, embedding_file, label_column = 'true_label'):
        self.User_DF = pd.read_csv(user_labels_file, dtype = {'user_id': str})
        userEmbeddings = torch.load(embedding_file, weights_only=False)
        
        # Manage Label Column
        if label_column is not None:
            self.User_DF = self.User_DF.rename(columns = {label_column :'user_stance'})            
        else:
            self.User_DF['user_stance'] = None
           
        # Define parameters
        self.ids = self.User_DF.user_id.values
        self.labels = self.User_DF.user_stance.values
        self.embeddings = userEmbeddings
           
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, item):
        return self.ids[item], self.embeddings[item], self.labels[item]    

    def _embedding_datacollator(self, data_batch): # Could produce problems of memory overflow in multiprocessing (not sure if the self reference will cause duplication of the dataset) 
        # TODO: if it doest work for multiprocessing make it take tokenizer as a parameter
        ''' 
        Collate Batches of observations given by this dataset. 
            data_batch: List of Dictionaries as produced by self.__getitem__
        '''
        batched_ids, batched_embs, labels = zip(*data_batch)

        return {
            'batched_ids': batched_ids,
            'user_embeddings': torch.stack(batched_embs, dim = 0), 
            'labels': torch.tensor(labels).long()  
        }


In [5]:
from UserModules.StanceDataset import *
from torch.utils.data import Dataset, DataLoader
# Load BERT Tokenizer
import torch, numpy as np, random, os
from UserModules.ModelConfiguration import *
from UserModules.UserClassifier import User_Stance_Classifier, User_Embedding_Classifier

countries = ['Colombia', 'Chile', 'Ecuador', 'Bolivia']
## Global Parameters
MAX_SEQ_LEN, MAX_TW_LEN = 128, 15
BATCH_SIZE = 64
SEED = 1911
INTERACTION_TYPES = ['<cls>', '<pad>', 'Original', 'Quote', 'Reply', 'Retweet']
num_workers = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
workDir = r''

for COUNTRY in countries:
    embDir = r'{}\Results\Predictions\Main_Predictions\{}'.format(workDir, COUNTRY)
    outDir = r'{}\{}'.format(workDir, COUNTRY)
    if not os.path.isdir(outDir):
        os.mkdir(outDir)

    print(f'### Working with {COUNTRY} User Transformer')
    # Load Model
    tweet_enc_args = {
        'Model_Dir': r'{}\RoBETO_Weights'.format(workDir), 
        'dropout': 0.1,
        'activation': 'Tanh',
        'freeze_bert_embeddings': True
    }
    
    tweetConfig = RoBERTaEncoderConfig(**tweet_enc_args)
    
    emb_params = {
        'cls_idx': INTERACTION_TYPES.index('<cls>'),
        'pad_idx': INTERACTION_TYPES.index('<pad>'),
        'max_tweet_number': MAX_TW_LEN,
        'dropout': 0.1,
        'layer_norm_eps': 1e-12,
        'tweet_type_number': len(INTERACTION_TYPES),
        'mask_embeddings': True
    }    
    embConfig = ModelEmbeddingsConfig(tweetConfig, **emb_params)
    
    user_params = { # This are the default parameters in UserEncoderConfig
        'num_attention_heads': 6,
        'intermidiate_size': 2048,
        'num_encoder_layers': 3,
        'transformer_activation': 'gelu',
        'user_activation': 'Tanh',
        'dropout': 0.1, 
        'initializer_range': 0.02,
        'model_embedder_version': 'v3' # v3 leaves the CLS parameter for the type embeddings    
    }
    userConfig = UserEncoderConfig(embConfig, **user_params)
    
    # Instantiate model
    model = User_Embedding_Classifier(num_classes = 2, user_config = userConfig)    
    model.to(device)

    print(f'    Load Best Model')
    best_dir = r'{}\Best_Models\{}'.format(workDir, COUNTRY)
    
    user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)
    print(f'Model loaded from <== {best_dir}')

    # Set weights from best model for country
    model.load_weights_from_User_Stance_Classifier(user_classifier_sd)

    # Load Input data
    embDir = r'{}\Results\Predictions\Main_Predictions\{}'.format(workDir, COUNTRY)
    data_keys = {'train', 'test', 'val'}
    for key in data_keys:    
        # Set Data Files
        userLabelsFile = r'{}\{}_predictions.csv'.format(embDir, key)
        embFile = r'{}\{}_embeddings.pt'.format(embDir, key)
        
        data = EmbeddingDataset(userLabelsFile, embFile, label_column = 'true_label')
        data_loader =  DataLoader(data, batch_size = BATCH_SIZE, num_workers=num_workers, 
                                  collate_fn = data._embedding_datacollator, shuffle = True)
    
        resultsDF = predict(model, data_loader)
    
        # Validate the results
        previous_answers = pd.read_csv(userLabelsFile, dtype = {'user_id': str}).rename(columns = {'predicted_label': 'old_prediction'})
        temp = pd.merge(resultsDF, previous_answers, on = 'user_id')
    
        print(f'    The percentage of predictions that matched previous results was : {(temp.predicted_label == temp.old_prediction).sum() / len(temp):.2%}')
    
        # Save Results
        resultsDF.to_csv(os.path.join(outDir, '{}_predictions.csv'.format(key)), index = False)

    del(model, resultsDF, data, temp)
    torch.cuda.empty_cache()

### Working with Colombia User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Colombia


Predicting:   0%|          | 0/156 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/169 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/1367 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Chile User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Chile


Predicting:   0%|          | 0/361 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/397 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/3208 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Ecuador User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Ecuador


Predicting:   0%|          | 0/111 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/121 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/973 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Bolivia User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Bolivia


Predicting:   0%|          | 0/160 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/177 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


Predicting:   0%|          | 0/1428 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


### Cross-Country Predictions

In [5]:
from UserModules.StanceDataset import *
from torch.utils.data import Dataset, DataLoader
# Load BERT Tokenizer
import torch, numpy as np, random, os
from UserModules.ModelConfiguration import *
from UserModules.UserClassifier import User_Stance_Classifier, User_Embedding_Classifier

countries = ['Colombia', 'Chile', 'Ecuador', 'Bolivia']
## Global Parameters
MAX_SEQ_LEN, MAX_TW_LEN = 128, 15
BATCH_SIZE = 64
SEED = 1911
INTERACTION_TYPES = ['<cls>', '<pad>', 'Original', 'Quote', 'Reply', 'Retweet']
num_workers = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
workDir = r''

for COUNTRY in countries:
    embDir = r'{}\Results\Predictions\Main_Predictions\{}'.format(workDir, COUNTRY)
    outDir = r'{}\{}'.format(workDir, COUNTRY)
    if not os.path.isdir(outDir):
        os.mkdir(outDir)

    print(f'### Working with {COUNTRY} User Transformer')
    # Load Model
    tweet_enc_args = {
        'Model_Dir': r'{}\RoBETO_Weights'.format(workDir), 
        'dropout': 0.1,
        'activation': 'Tanh',
        'freeze_bert_embeddings': True
    }
    
    tweetConfig = RoBERTaEncoderConfig(**tweet_enc_args)
    
    emb_params = {
        'cls_idx': INTERACTION_TYPES.index('<cls>'),
        'pad_idx': INTERACTION_TYPES.index('<pad>'),
        'max_tweet_number': MAX_TW_LEN,
        'dropout': 0.1,
        'layer_norm_eps': 1e-12,
        'tweet_type_number': len(INTERACTION_TYPES),
        'mask_embeddings': True
    }    
    embConfig = ModelEmbeddingsConfig(tweetConfig, **emb_params)
    
    user_params = { # This are the default parameters in UserEncoderConfig
        'num_attention_heads': 6,
        'intermidiate_size': 2048,
        'num_encoder_layers': 3,
        'transformer_activation': 'gelu',
        'user_activation': 'Tanh',
        'dropout': 0.1, 
        'initializer_range': 0.02,
        'model_embedder_version': 'v3' # v3 leaves the CLS parameter for the type embeddings    
    }
    userConfig = UserEncoderConfig(embConfig, **user_params)
    
    # Instantiate model
    model = User_Embedding_Classifier(num_classes = 2, user_config = userConfig)    
    model.to(device)

    print(f'    Load Best Model')
    best_dir = r'{}\Best_Models\{}'.format(workDir, COUNTRY)
    
    user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)
    print(f'Model loaded from <== {best_dir}')

    # Set weights from best model for country
    model.load_weights_from_User_Stance_Classifier(user_classifier_sd)
    
    for other_country in countries:
        if other_country == COUNTRY:   continue
        print('### Predicting stances in country: ' + other_country)

        # Load Input data
        embDir = r'{}\Results\Predictions\Cross_Predictions\{}_Classifier'.format(workDir, COUNTRY)
                  
        data_keys = {'train', 'test', 'val'}
        for key in data_keys:
            set_seed(SEED)
            print(f' ## Working with Files in {key} set')
            # Set Data Files
            userLabelsFile = r'{}\{}_{}_predictions.csv'.format(embDir, other_country, key)
            embFile = r'{}\{}_{}_embeddings.pt'.format(embDir, other_country, key)
            
            data = EmbeddingDataset(userLabelsFile, embFile, label_column = 'true_label')
            data_loader =  DataLoader(data, batch_size = BATCH_SIZE, num_workers=num_workers, 
                                      collate_fn = data._embedding_datacollator, shuffle = True)    
    
            resultsDF = predict(model, data_loader)
        
            # Validate the results
            previous_answers = pd.read_csv(userLabelsFile, dtype = {'user_id': str}).rename(columns = {'predicted_label': 'old_prediction'})
            temp = pd.merge(resultsDF, previous_answers, on = 'user_id')
        
            print(f'    The percentage of predictions that matched previous results was : {(temp.predicted_label == temp.old_prediction).sum() / len(temp):.2%}')
        
            # Save Results
            resultsDF.to_csv(r'{}\{}_{}_predictions.csv'.format(outDir, other_country, key), index = False)
    
    del(model, resultsDF, data, temp)
    torch.cuda.empty_cache()
    


### Working with Colombia User Transformer
    Load Best Model
Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Colombia


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


### Predicting stances in country: Chile
 ## Working with Files in val set


Predicting:   0%|          | 0/397 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/361 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/3208 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Ecuador
 ## Working with Files in val set


Predicting:   0%|          | 0/121 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/111 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/973 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Bolivia
 ## Working with Files in val set


Predicting:   0%|          | 0/177 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/160 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1428 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Chile User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Chile
### Predicting stances in country: Colombia
 ## Working with Files in val set


Predicting:   0%|          | 0/169 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/156 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1367 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Ecuador
 ## Working with Files in val set


Predicting:   0%|          | 0/121 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/111 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/973 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Bolivia
 ## Working with Files in val set


Predicting:   0%|          | 0/177 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/160 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1428 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Ecuador User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Ecuador
### Predicting stances in country: Colombia
 ## Working with Files in val set


Predicting:   0%|          | 0/169 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/156 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1367 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Chile
 ## Working with Files in val set


Predicting:   0%|          | 0/397 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/361 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/3208 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Bolivia
 ## Working with Files in val set


Predicting:   0%|          | 0/177 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/160 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1428 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Working with Bolivia User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== E:\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Bolivia
### Predicting stances in country: Colombia
 ## Working with Files in val set


Predicting:   0%|          | 0/169 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/156 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/1367 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Chile
 ## Working with Files in val set


Predicting:   0%|          | 0/397 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/361 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/3208 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
### Predicting stances in country: Ecuador
 ## Working with Files in val set


Predicting:   0%|          | 0/121 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in test set


Predicting:   0%|          | 0/111 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/973 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%


## Chilean Referendum

In [7]:
from UserModules.StanceDataset import *
from torch.utils.data import Dataset, DataLoader
# Load BERT Tokenizer
import torch, numpy as np, random, os
from UserModules.ModelConfiguration import *
from UserModules.UserClassifier import User_Stance_Classifier, User_Embedding_Classifier

## Global Parameters
MAX_SEQ_LEN, MAX_TW_LEN = 128, 15
BATCH_SIZE = 64
SEED = 1911
INTERACTION_TYPES = ['<cls>', '<pad>', 'Original', 'Quote', 'Reply', 'Retweet']
num_workers = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

workDir = r''

COUNTRY  = 'Chile'
embDir = r'{}\Results\Predictions\Chile_Referendum'.format(workDir)
outDir = r'{}\Results\Predictions\Revisions\Chilean_Referendum'.format(workDir)
if not os.path.isdir(outDir):
    os.mkdir(outDir)

print(f'### Working with {COUNTRY} User Transformer')
# Load Model
tweet_enc_args = {
    'Model_Dir': r'{}\RoBETO_Weights'.format(oneDriveDir), 
    'dropout': 0.1,
    'activation': 'Tanh',
    'freeze_bert_embeddings': True
}

tweetConfig = RoBERTaEncoderConfig(**tweet_enc_args)

emb_params = {
    'cls_idx': INTERACTION_TYPES.index('<cls>'),
    'pad_idx': INTERACTION_TYPES.index('<pad>'),
    'max_tweet_number': MAX_TW_LEN,
    'dropout': 0.1,
    'layer_norm_eps': 1e-12,
    'tweet_type_number': len(INTERACTION_TYPES),
    'mask_embeddings': True
}    
embConfig = ModelEmbeddingsConfig(tweetConfig, **emb_params)

user_params = { # This are the default parameters in UserEncoderConfig
    'num_attention_heads': 6,
    'intermidiate_size': 2048,
    'num_encoder_layers': 3,
    'transformer_activation': 'gelu',
    'user_activation': 'Tanh',
    'dropout': 0.1, 
    'initializer_range': 0.02,
    'model_embedder_version': 'v3' # v3 leaves the CLS parameter for the type embeddings    
}
userConfig = UserEncoderConfig(embConfig, **user_params)

# Instantiate model
model = User_Embedding_Classifier(num_classes = 2, user_config = userConfig)    
model.to(device)

print(f'    Load Best Model')
best_dir = r'{}\Results\Best_Models\{}'.format(oneDriveDir, COUNTRY)

user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)
print(f'Model loaded from <== {best_dir}')

# Set weights from best model for country
model.load_weights_from_User_Stance_Classifier(user_classifier_sd)

data_keys = {'train', 'test', 'val'}
for key in data_keys:
    set_seed(SEED)
    print(f' ## Working with Files in {key} set')
    # Set Data Files
    userLabelsFile = r'{}\{}_predictions.csv'.format(embDir, key)
    embFile = r'{}\{}_embeddings.pt'.format(embDir, key)
    
    data = EmbeddingDataset(userLabelsFile, embFile, label_column = 'true_label')
    data_loader =  DataLoader(data, batch_size = BATCH_SIZE, num_workers=num_workers, 
                              collate_fn = data._embedding_datacollator, shuffle = True)    

    resultsDF = predict(model, data_loader)

    # Validate the results
    previous_answers = pd.read_csv(userLabelsFile, dtype = {'user_id': str}).rename(columns = {'predicted_label': 'old_prediction'})
    temp = pd.merge(resultsDF, previous_answers, on = 'user_id')

    print(f'    The percentage of predictions that matched previous results was : {(temp.predicted_label == temp.old_prediction).sum() / len(temp):.2%}')

    # Save Results
    resultsDF.to_csv(r'{}\User_Transformer_{}_predictions.csv'.format(outDir, key), index = False)


### Working with Chile User Transformer
    Load Best Model


  user_classifier_sd = torch.load(os.path.join(best_dir, 'model_results.pth'), map_location=device)


Model loaded from <== C:\Users\Ramon\OneDrive\Research Group\Papers\Target_Stance_Classification\Results\Best_Models\Chile
 ## Working with Files in test set


Predicting:   0%|          | 0/53 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in val set


Predicting:   0%|          | 0/40 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
 ## Working with Files in train set


Predicting:   0%|          | 0/239 [00:00<?, ?it/s]

    The percentage of predictions that matched previous results was : 100.00%
