In [80]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import pandas as pd
import torch
from tqdm import tqdm
import nltk
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge
import rouge
from nltk.tokenize import word_tokenize

In [3]:
# tokenizer = GPT2Tokenizer.from_pretrained('./gpt2-finetuned')
tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
model = GPT2LMHeadModel.from_pretrained('./gpt2-finetuned')
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [91]:
df = pd.read_csv('training_data/QnA_intent_processed_amazon_reviews_us_Health.tsv', sep= '\t')
df.head()

Unnamed: 0,customer_id,review_id,product_id,product_title,product_category,star_rating,helpful_votes,vine,verified_purchase,review_sentence,intent,QnA
0,15053459,r2aqkuzbxmhl71,b000y4yo54,core products soothe-a-ciser natural foam trac...,health & personal care,5,61,n,y,painsaver!.,express_dissatisfaction,core products soothe-a-ciser natural foam trac...
1,15053459,r2aqkuzbxmhl71,b000y4yo54,core products soothe-a-ciser natural foam trac...,health & personal care,5,61,n,y,this soothe-a-ciser really works for relieving...,verify_authenticity,core products soothe-a-ciser natural foam trac...
2,15053459,r2aqkuzbxmhl71,b000y4yo54,core products soothe-a-ciser natural foam trac...,health & personal care,5,61,n,y,.no bad smells.good quality item.and so simple...,usage,core products soothe-a-ciser natural foam trac...
3,15053459,r2aqkuzbxmhl71,b000y4yo54,core products soothe-a-ciser natural foam trac...,health & personal care,5,61,n,y,"even thought my pinched nerve is gone, i can t...",express_dissatisfaction,core products soothe-a-ciser natural foam trac...
4,15053459,r2aqkuzbxmhl71,b000y4yo54,core products soothe-a-ciser natural foam trac...,health & personal care,5,61,n,y,it helps your back and neck to properly align ...,express_dissatisfaction,core products soothe-a-ciser natural foam trac...


In [7]:
inputs = tokenizer.encode("10-28 inch 100% raw virgin brazilian remy human hair extensions wavy weave weft bundle #1b Question: Can this product be used daily? Answer:", return_tensors="pt")

output_sequences = model.generate(
    inputs,
    max_length=200,  # Set the maximum length for generated text
    temperature=1.0,  # Control the randomness of predictions
    num_return_sequences=1  # Number of sentences to generate
)

# Decode generated sequences into text
generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
print(generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


10-28 inch 100% raw virgin brazilian remy human hair extensions wavy weave weft bundle #1b Question: Can this product be used daily? Answer: 5 stars i have been using the first. the first. the same.i-3.i.i and i have been using the same.i.i-up.i. the same..i.i.i to the same.i.i. the same.. the same. the same.i.i..i.i the same.i.i.i and the same.i. the same. the same........................................................................


In [92]:
def truncate_after_answer(qna):
    parts = qna.split("Answer:")  # Split the text at "Answer:"
    if len(parts) > 1:
        return parts[0] + "Answer:"  # Return the part before "Answer:" and include "Answer:"
    return qna  # Return the original text if "Answer:" is not found

# Apply the function to each row in the QnA column
df['prompts'] = df['QnA'].apply(truncate_after_answer)

In [93]:
def generate_response(query, model, tokenizer, max_length=200):
    # Preparing the prompt text
    # prompt_text = f"{query['product_title']} Question: {query['query']} Answer:"
    # query.split("Answer:")
    # Encoding the text
    input_ids = tokenizer.encode(query, return_tensors='pt')
    # Generating a response
    with torch.no_grad():
        outputs = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
        
    # Decoding and returning the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response[len(query):]  # Return only the generated part


In [94]:
sampled = df.sample(n= 100)

In [95]:
tqdm.pandas()
sampled['generated_response'] = sampled.progress_apply(lambda row: generate_response(row['prompts'], model, tokenizer), axis=1)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [06:49<00:00,  4.10s/it]


In [96]:
sampled.generated_response.iloc[0]

" 5 stars i've been a few days.i.-up. the morning.i.i and i've been taking the morning.i weeks. the morning.i and i've been taking the morning. the morning. the morning.i days.i. weeks. the morning. the morning. the morning. the morning. the morning. the morning.i.. the morning.. the morning the morning. the morning.5.i weeks. the morning. the morning. the morning. the morning........................................................................"

In [75]:
def calculate_evaluation_metrics(generated_responses, reference_reviews):
    """
    Calculates BLEU, ROUGE, and METEOR scores for the given generated responses against reference reviews.
    
    Args:
    - generated_responses (list of str): The responses generated by the model.
    - reference_reviews (list of list of str): The reference reviews for comparison, with each review being a list of sentences.
    
    Returns:
    - dict: A dictionary containing the BLEU, ROUGE, and METEOR scores.
    """
    
    # Prepare data for BLEU score calculation
    references = [[nltk.word_tokenize(ref) for ref in refs] for refs in reference_reviews]
    candidates = [nltk.word_tokenize(resp) for resp in generated_responses]
    
    # Calculate BLEU score
    bleu_score = corpus_bleu(references, candidates, smoothing_function=SmoothingFunction().method1)
    
    # Prepare data for METEOR score calculation
    # METEOR expects a single reference string per example, so we join multiple references into one string per set of references
    meteor_scores = [meteor_score([tokenizer.tokenize(refs)], tokenizer.tokenize(resp)) for resp, refs in zip(generated_responses, reference_reviews)]
    avg_meteor_score = sum(meteor_scores) / len(meteor_scores)
    
    # Initialize ROUGE scorer
    rouge = Rouge()
    # Prepare data for ROUGE score calculation
    # Joining multiple references into one string per set of references for ROUGE
    rouge_scores = rouge.get_scores(generated_responses, [' '.join(refs) for refs in reference_reviews], avg=True)
    
    # Combine the scores into a single dictionary for output
    scores = {
        'BLEU': bleu_score,
        'METEOR': avg_meteor_score,
        'ROUGE': rouge_scores
    }
    
    return scores

In [97]:
def calculate_evaluation_metrics(generated_responses, reference_reviews):
    # Tokenize references and responses
    references = [[word_tokenize(ref) for ref in refs] for refs in reference_reviews]
    candidates = [word_tokenize(resp) for resp in generated_responses]
 
    # Calculate BLEU score
    bleu_score = corpus_bleu(references, candidates, smoothing_function=SmoothingFunction().method1)
 
    # Calculate METEOR scores for each candidate against its references
    meteor_scores = [meteor_score(refs, cand) for refs, cand in zip(references, candidates)]
    avg_meteor_score = sum(meteor_scores) / len(meteor_scores)
 
    # Initialize ROUGE scorer
    rouge = Rouge()
    # Prepare data for ROUGE score calculation
    rouge_scores = rouge.get_scores([' '.join(cand) for cand in candidates], 
                                    [' '.join([' '.join(ref) for ref in refs]) for refs in reference_reviews], avg=True)
 
    # Combine the scores into a single dictionary for output
    scores = {
        'BLEU': bleu_score,
        'METEOR': avg_meteor_score,
        'ROUGE': rouge_scores
    }
 
    return scores

In [98]:
calculate_evaluation_metrics(sampled.generated_response, sampled.review_sentence.values)

{'BLEU': 9.891700617936448e-05,
 'METEOR': 0.06383208037422934,
 'ROUGE': {'rouge-1': {'r': 0.08444499445683695,
   'p': 0.0976012992451678,
   'f': 0.08747757641387119},
  'rouge-2': {'r': 5.882352941176471e-05,
   'p': 0.0003125,
   'f': 9.900987432605364e-05},
  'rouge-l': {'r': 0.08145354146538397,
   'p': 0.0957213972843835,
   'f': 0.08527029350070593}}}

In [90]:
sampled

Unnamed: 0,review_id,QnA,prompts,generated_response
94564,r3tipsnyir9ahe,natracure dress shoe forefoot cushions Questio...,natracure dress shoe forefoot cushions Questio...,5 stars i have been a day. the morning.i..i. ...
3520,r13q4a7z75ck4,nature's answer lobelia herb with organic alco...,nature's answer lobelia herb with organic alco...,5 stars i have been taking it. the morning. t...
94738,r3tp2n6oroktiu,legion athletics pulse pre-workout supplement ...,legion athletics pulse pre-workout supplement ...,5 stars i have been taking. this product.i.. ...
109842,rhkuny80okk4y,mason vitamins new advance ear health formula ...,mason vitamins new advance ear health formula ...,5 stars i have been using this product. the m...
59113,r2rlnw6gdqrfb8,"yan hu suo - corydalis yanhusuo tuber, 100 gra...","yan hu suo - corydalis yanhusuo tuber, 100 gra...",5 stars i have been using the same.i... the s...
...,...,...,...,...
109760,rhhgo5do7j30k,vermont soapworks - foaming hand soap unscente...,vermont soapworks - foaming hand soap unscente...,5 stars i've used. the best i would have used...
82978,r3h2vzci1j1tju,thermoskin open knee wrap stabilizer knee brac...,thermoskin open knee wrap stabilizer knee brac...,5 stars i have to use the same.i to use. the ...
21279,r1msvysw98oqjd,"wl 100% pure yacon syrup, 100% raw ★certified ...","wl 100% pure yacon syrup, 100% raw ★certified ...","5 stars i have been using the real, but i hav..."
608,r10n0uxk4q3vmc,best weight-loss with meratrim - healthy weigh...,best weight-loss with meratrim - healthy weigh...,5 stars i have to take to take to take to tak...
