In [1]:
!pip install transformers



In [4]:
import re
import random
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelWithLMHead
import torch.optim as optim
import seaborn as sns
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction
from transformers import AutoModelForCausalLM, AutoTokenizer

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

**Loading the finetuned model**

In [8]:
model = AutoModelForCausalLM.from_pretrained("/kaggle/input/fine-tuned-gpt")
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/tokenizer-fine-tuned")

In [9]:
model = model.to(device)
learning_rate = 3e-4
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

In [10]:
def topk(probs, n=9):
    # The scores are initially softmaxed to convert to probabilities
    probs = torch.softmax(probs, dim= -1)

    # PyTorch has its own topk method, which we use here
    tokensProb, topIx = torch.topk(probs, k=n)

    # The new selection pool (9 choices) is normalized
    tokensProb = tokensProb / torch.sum(tokensProb)

    # Send to CPU for numpy handling
    tokensProb = tokensProb.cpu().detach().numpy()

    # Make a random choice from the pool based on the new prob distribution
    choice = np.random.choice(n, 1, p = tokensProb)
    tokenId = topIx[choice][0]

    return int(tokenId)

In [11]:
def model_infer(model, tokenizer, review, max_length=15):
    # Preprocess the init token (task designator)
    review_encoded = tokenizer.encode(review)
    result = review_encoded
    initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device)

    with torch.set_grad_enabled(False):
        # Feed the init token to the model
        output = model(initial_input)

        # Flatten the logits at the final time step
        logits = output.logits[0,-1]

        # Make a top-k choice and append to the result
        result.append(topk(logits))

        # For max_length times:
        for _ in range(max_length):
            # Feed the current sequence to the model and make a choice
            input = torch.tensor(result).unsqueeze(0).to(device)
            output = model(input)
            logits = output.logits[0,-1]
            res_id = topk(logits)

            # If the chosen token is EOS, return the result
            if res_id == tokenizer.eos_token_id:
                return tokenizer.decode(result)
            else: # Append to the sequence
                result.append(res_id)
    # IF no EOS is generated, return after the max_len
    return tokenizer.decode(result)

In [12]:
review_input = input("Enter a review")
summary_input = input("Enter the summary of review")

Enter a review "The Fender CD-60S Dreadnought Acoustic Guitar is a great instrument for beginners. It has a solid construction, produces a rich sound, and feels comfortable to play. However, some users have reported issues with the tuning stability."
Enter the summary of review "Good for beginners but has tuning stability issues."


In [13]:
summary = model_infer(model, tokenizer, review_input+ " TL;DR ").split(" TL;DR ")[1].strip()
print("Generated Summary: ", summary)

2024-04-22 05:12:25.219043: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-22 05:12:25.219196: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-22 05:12:25.382249: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Generated Summary:  The Fender CD-100S Dreadnought Ac Guitar


In [14]:
hypothesis = summary
reference = summary_input

**Calculating ROGUE Scores**

In [15]:
def calculate_rouge_precision(hypothesis, reference):
    common_tokens = set(hypothesis) & set(reference)
    return len(common_tokens) / len(hypothesis)

def calculate_rouge_recall(hypothesis, reference):
    common_tokens = set(hypothesis) & set(reference)
    return len(common_tokens) / len(reference)

def calculate_rouge_f1_score(precision, recall):
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

In [17]:
rouge_1_precision = calculate_rouge_precision(hypothesis, reference)
rouge_1_recall = calculate_rouge_recall(hypothesis, reference)
rouge_1_f1_score = calculate_rouge_f1_score(rouge_1_precision, rouge_1_recall)

hypothesis_bigrams = set(zip(hypothesis, hypothesis[1:]))
reference_bigrams = set(zip(reference, reference[1:]))

rouge_2_precision = calculate_rouge_precision(hypothesis_bigrams, reference_bigrams)
rouge_2_recall = calculate_rouge_recall(hypothesis_bigrams, reference_bigrams)
rouge_2_f1_score = calculate_rouge_f1_score(rouge_2_precision, rouge_2_recall)


# Longest Common Subsequence (LCS) for ROUGE-L
def lcs(X, Y):
    m = len(X)
    n = len(Y)

    # Create a table to store lengths of longest common suffixes of substrings
    # Note that LCSuff[i][j] contains length of longest common suffix of X[0..i-1] and Y[0..j-1].
    LCSuff = [[0] * (n + 1) for _ in range(m + 1)]

    # To store the length of the longest common substring
    length = 0

    # To store the index of the cell which contains the maximum value.
    # This will be used to print the substring.
    row, col = 0, 0

    # Following steps build LCSuff[m+1][n+1] in bottom-up fashion.
    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0 or j == 0:
                LCSuff[i][j] = 0
            elif X[i - 1] == Y[j - 1]:
                LCSuff[i][j] = LCSuff[i - 1][j - 1] + 1
                if LCSuff[i][j] > length:
                    length = LCSuff[i][j]
                    row = i
                    col = j
            else:
                LCSuff[i][j] = 0

    # If we have non-zero length, then LCSuff contains the longest common substring.
    if length != 0:
        lcs_str = X[row - length: row]
    else:
        lcs_str = []

    return len(lcs_str) / len(X)

rouge_l_precision = calculate_rouge_precision(hypothesis, reference)
rouge_l_recall = calculate_rouge_recall(hypothesis, reference)
rouge_l_f1_score = calculate_rouge_f1_score(rouge_l_precision, rouge_l_recall)


In [18]:
print("ROUGE-1: Precision: {:.2f}, Recall: {:.2f}, F1-Score: {:.2f}".format(rouge_1_precision, rouge_1_recall, rouge_1_f1_score))
print("ROUGE-2: Precision: {:.2f}, Recall: {:.2f}, F1-Score: {:.2f}".format(rouge_2_precision, rouge_2_recall, rouge_2_f1_score))
print("ROUGE-L: Precision: {:.2f}, Recall: {:.2f}, F1-Score: {:.2f}".format(rouge_l_precision, rouge_l_recall, rouge_l_f1_score))

ROUGE-1: Precision: 0.33, Recall: 0.25, F1-Score: 0.28
ROUGE-2: Precision: 0.13, Recall: 0.10, F1-Score: 0.11
ROUGE-L: Precision: 0.33, Recall: 0.25, F1-Score: 0.28
