In [1]:
import pandas as pd
import os
from glob import glob
import torch
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import tensorflow as tf
from transformers import BertTokenizer
import warnings
warnings.filterwarnings("ignore")
# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda:0")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


2023-05-11 10:44:10.913108: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-11 10:44:11.080191: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-05-11 10:44:11.567753: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-05-11 10:44:11.567814: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

There are 2 GPU(s) available.
We will use the GPU: NVIDIA A100-PCIE-40GB


In [3]:
path = '../../speeches/'
files = sorted(glob(path+'*.xlsx'))
print(files)


['../../speeches/100_fs.xlsx', '../../speeches/103_fs.xlsx', '../../speeches/106_ak.xlsx', '../../speeches/108_ak.xlsx', '../../speeches/108_ln.xlsx', '../../speeches/10_ln.xlsx', '../../speeches/110_fs.xlsx', '../../speeches/111_ak.xlsx', '../../speeches/112_ln.xlsx', '../../speeches/114_ln.xlsx', '../../speeches/115_Interview_ak.xlsx', '../../speeches/116_ak.xlsx', '../../speeches/117_ak.xlsx', '../../speeches/118_ak.xlsx', '../../speeches/118_fs.xlsx', '../../speeches/11_fs.xlsx', '../../speeches/120_ak.xlsx', '../../speeches/120_fs.xlsx', '../../speeches/121_fs.xlsx', '../../speeches/122_fs.xlsx', '../../speeches/123_ak.xlsx', '../../speeches/124_ak.xlsx', '../../speeches/125_fs.xlsx', '../../speeches/128_fs.xlsx', '../../speeches/12_fs.xlsx', '../../speeches/130_fs.xlsx', '../../speeches/131_ak.xlsx', '../../speeches/133_ak.xlsx', '../../speeches/134_fs.xlsx', '../../speeches/135_fs.xlsx', '../../speeches/137_ln.xlsx', '../../speeches/138_ln.xlsx', '../../speeches/139_ln.xlsx', '.

In [4]:
# load models
charisma = torch.load('../secondary_models/label_label_20epoch.pt')
Metaphor = torch.load('../secondary_models/Metaphor_secondary_label_20epoch_with_corpus.pt')
Simile = torch.load('../secondary_models/Simile_secondary_label_20epoch.pt')
Rhetorical_questions = torch.load('../secondary_models/Rhetorical_questions_secondary_label_20epoch.pt')
Stories_anecdotes = torch.load('../secondary_models/Stories_anecdotes_secondary_label_20epoch.pt')
Contrasts = torch.load('../secondary_models/Contrasts_secondary_label_20epoch.pt')
Lists = torch.load('../secondary_models/Lists_secondary_label_20epoch.pt')
Repetition = torch.load('../secondary_models/Repetition_secondary_label_20epoch.pt')
Moral_conviction = torch.load('../secondary_models/Moral_conviction_secondary_label_with_corpus_20epoch.pt')
Sentiment_of_the_collective = torch.load('../secondary_models/Sentiment_of_the_collective_secondary_label_20epoch.pt')
Setting_high_expectations = torch.load('../secondary_models/Setting_high_expectations_secondary_label_20epoch.pt')
Confidence_in_goals = torch.load('../secondary_models/Confidence_in_goals_secondary_label_20epoch.pt')

secondary_models = [Metaphor,
                    Simile,
                    Rhetorical_questions,
                    Stories_anecdotes,
                    Contrasts,
                    Lists,
                    Repetition,
                    Moral_conviction,
                    Sentiment_of_the_collective,
                    Setting_high_expectations,
                    Confidence_in_goals
                   ]

In [5]:

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


In [6]:
def preprocess_data(test_data):    
    label=[]
    for i in range(len(test_data)):
        if any(test_data.iloc[i]==1):
            label.append(1)
        else:
            label.append(0)
    test_data["label"]=label
    #print('Number of test sentences: {:,}\n'.format(test_data.shape[0]))
# Create sentence and label lists
    sentences = test_data.sentence.values
    labels = test_data.drop(columns=['sentence'])['label'].values
    MAX_LEN = 126
# Tokenize all of the sentences and map the tokens to thier word IDs.
    input_ids = []
# For every sentence...
    for sent in sentences:
    # `encode` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
        encoded_sent = tokenizer.encode(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                   )
    
        input_ids.append(encoded_sent)
# Pad our input tokens
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, 
                          dtype="long", truncating="post", padding="post")
    labels = torch.tensor(labels,dtype=torch.long)
# Create attention masks
    attention_masks = []
# Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask) 
# Convert to tensors.
    prediction_inputs = torch.tensor(input_ids)
    prediction_masks = torch.tensor(attention_masks)
    prediction_labels = torch.tensor(labels)
# Set the batch size.  
    batch_size = 32  
# Create the DataLoader.
    prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
    prediction_sampler = SequentialSampler(prediction_data)
    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
    return  prediction_inputs, prediction_masks, prediction_labels,  prediction_data, prediction_sampler, prediction_dataloader

In [7]:
def make_prediction(model, test_data):
    prediction_inputs, prediction_masks, prediction_labels, prediction_data, prediction_sampler, prediction_dataloader = preprocess_data(test_data)
    # Prediction on test set
    #print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))
    # Put model in evaluation mode
    model.eval()
    model.to(device)
    # Tracking variables 
    predictions , true_labels, prediction_probs = [], [],[] #Atefeh
    # Predict 
    for batch in prediction_dataloader:
      # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
  
      # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
  
      # Telling the model not to compute or store gradients, saving memory and 
      # speeding up prediction
        with torch.no_grad():
      # Forward pass, calculate logit predictions
            outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)
        
        logits = outputs[0]
  # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.detach().to('cpu').numpy()
  
        prediction_probs.append (tf.nn.softmax(logits))

  # Store predictions and true labels
        predictions.append(logits)
        true_labels.append(label_ids)
    
    print('DONE.')
    
    
    for i in range(len(predictions)):  
      # The predictions for this batch are a 2-column ndarray (one column for "0" 
      # and one column for "1"). Pick the label with the highest value and turn this
      # in to a list of 0s and 1s.
        pred_labels_i = np.argmax(predictions[i], axis=1).flatten()
    
    flat_predictions = [item for sublist in prediction_probs for item in sublist]
    flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
    model.cpu()
    return flat_predictions

In [8]:
# for file in files do: 
# read_data
# for sentence in sentences
# primary model
# if yes: secondary models for each tactic
tactics = ["Metaphor",
           "Simile",
           "Rhetorical_questions", 
           "Stories_anecdotes", 
           "Contrasts", 
           "Lists",
           "Repetition",
           "Moral_conviction",
           "Sentiment_of_the_collective",
           "Setting_high_expectations",
           "Confidence_in_goals"]
cols = pd.read_excel(files[0], skiprows=7).drop(columns=['Unnamed: 0']).columns
for file in files:
    data = pd.read_excel(file, skiprows=7).drop(columns=['Unnamed: 0'])
    new_cols = dict(zip(cols, ["sentence","Metaphor","Simile","Rhetorical_questions", "Stories_anecdotes", "Contrasts", "Lists", "Repetition", "Moral_conviction", "Sentiment_of_the_collective", "Setting_high_expectations", "Confidence_in_goals"]))
    data = data.rename(columns = new_cols).fillna(0)
    charisma_predictions = make_prediction(charisma, data)
    data.label = charisma_predictions
    secondary_data = data[data.label==1]
    
    for i, pred_model in enumerate(secondary_models):
        print(tactics[i])
        prediction = make_prediction(pred_model, secondary_data)
        secondary_data[tactics[i]] = prediction
        
    final_df = data[['sentence','label']].join(secondary_data.drop(columns=['sentence','label'])).fillna(0)
 
    final_df.to_excel('../../speeches/predictions/'+file[15:-8]+'_AI.xlsx')
    print('File written to: ' + '../../speeches/predictions/'+file[15:-8]+'_AI.xlsx')

2023-05-11 10:44:30.542383: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-11 10:44:30.578909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 34570 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:21:00.0, compute capability: 8.0
2023-05-11 10:44:30.580475: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 35106 MB memory:  -> device: 1, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:81:00.0, compute capability: 8.0


DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/100_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/103_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/106_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DO

Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/133_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/134_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/135_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
D

DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/162_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/163_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/164_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/164_AI.xlsx
DONE.
Met

DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/208_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/209_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/210_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DO

DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/255_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/259_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/25_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.


DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/308_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/309_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/30B_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expe

DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/56B_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/57_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/predictions/58_AI.xlsx
DONE.
Metaphor
DONE.
Simile
DONE.
Rhetorical_questions
DONE.
Stories_anecdotes
DONE.
Contrasts
DONE.
Lists
DONE.
Repetition
DONE.
Moral_conviction
DONE.
Sentiment_of_the_collective
DONE.
Setting_high_expectations
DONE.
Confidence_in_goals
DONE.
File written to: ../../speeches/pre