In [None]:
!pip install transformers

In [None]:
import torch
import numpy as np
from transformers import BertForSequenceClassification, BertTokenizer

In [None]:
# Load pre-trained model and tokenizer
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=1
    )
tokenizer = BertTokenizer.from_pretrained(
    'bert-base-uncased'
    )

In [None]:
# Load and preprocess data
with open('original_rt_snippets.txt', 'r') as f:
    data_texts = f.readlines()

with open('sentiment_labels.txt', 'r') as f:
    data_label = f.readlines()

data_texts = data_texts[0:100]
data_label = data_label[1:101]

In [None]:
labels = []
sentences = []

for i in range(len(data_texts)):
  label_id, label_val = data_label[i].split('|')
  labels.append(float(label_val[:-1]))
  sentences.append(data_texts[i][:-1])

In [None]:
# Tokenize input sentences
encoded_inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")

# Convert labels to tensor
labels_tensor = torch.FloatTensor(labels).unsqueeze(1)

In [None]:
def remover_str(my_string, value3):
    for item in my_string:
      if item not in values3:
        my_string = my_string.replace(item, "")
    return my_string

In [None]:
# Methodology to assign label when no label given
# The map Reduce will base on the sentiment to assign
# an appropriate score the a sentence
def map_reduce(dataFrame):
  if len(dataFrame) == 0:
    return None
  
  sentimentString = dataFrame.astype(str)
  tDesc = sentimentString['description']        # has all the original tweets

  values3 = list("abcdefghijklmnopqrstuvwxyz")  # Original list of alphabet

  sentimentString = dataFrame.astype(str)
  desWords = sentimentString['description'].apply(str.lower).str.split()

  n = 0;  #assigns a unique n value to each sentence

  reviewSentiment = {} # used to save n as key and score as value
  descLen = {}         # saves n as key and score/length as value

  for string in desWords:
    length = 0
    reviewSentiment[n] = 0
    for word in string:
      length += 1
      word = remover_str(word, value3)
      if word in positive:
        reviewSentiment[n] += 1
      if word in negative:
        reviewSentiment[n] -= 1

    if reviewSentiment[n] != 0:
      descLen[n] = reviewSentiment[n]/length
    n += 1

  return reviewSentiment, descLen, n

In [None]:
# Train the model
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(**encoded_inputs, labels=labels_tensor)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

    print('Epoch:', epoch, 'Training Loss:', loss.item())

In [None]:
# Evaluate the model
model.eval()
with torch.no_grad():
    outputs = model(**encoded_inputs)
    predictions = outputs.logits.squeeze().tolist()

In [None]:
# Print some example predictions
for i in range(10):
    print("Sentence:", sentences[i])
    print("True score:", labels[i])
    print("Predicted score:", predictions[i])
    print()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Prediction score graph visualization
predict_score = predictions
texting_label = range(0, len(predict_score))

plt.figure()
plt.plot(texting_label, predict_score, marker='')
plt.xlabel('Text Label Contents ID')
plt.ylabel('Text Label Scores w/ID')
plt.title('Plot Prediction score graph visualization using Transformer-Based Model')
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

plt.figure()
plt.plot(texting_label, labels, marker='')
plt.xlabel('Text Label Contents ID')
plt.ylabel('Text Label True Scores')
plt.title('Plot True Scores graph visualization using Transformer-Based Model')
plt.grid(True)
plt.show()

In [None]:
text_string = "Hello, I kinda love this movie and I may join that again in the future"

# Tokenize input string
encoded_input = tokenizer(text_string, padding=True, truncation=True, return_tensors="pt")

# Pass input string through the model to obtain predicted score
with torch.no_grad():
    outputs = model(**encoded_input)
    predicted_score = outputs.logits.item()

print("Predicted score for the input text is:", predicted_score)