In [None]:
!pip install transformers accelerate -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/297.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m297.0/297.6 kB[0m [31m9.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h

## Mount Drive

In [None]:
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Import Libraries

In [None]:
# Import libraries
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm

## Load OPT Model and Tokenizer

In [None]:
# Load opt model and tokenizer
opt_model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
opt_tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m", use_fast=True)

In [None]:
# Read the test data into a pandas DataFrame
test = pd.read_csv('/content/drive/MyDrive/rephrase_test.csv', dtype={'prompt': str})

# Specify GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Iterate over each prompt in the list
for idx, row in tqdm(test.iterrows(), desc="Generating Responses", total=len(test)):
    prompt = row['prompt']

    # Encode the prompt with EOS token
    input_ids = opt_tokenizer(prompt, return_tensors="pt")

    # Generate responses with top-k sampling
    output = opt_model.generate(
        **input_ids,
        max_new_tokens=7,
        num_return_sequences=5,
        pad_token_id=opt_tokenizer.eos_token_id,
        do_sample=True,
        top_k=50
    )

    #print(tokenizer.decode(output[0]))

    # Decode and store the generated responses (temporary list)
    generated_responses = []
    for i in range(output.shape[0]):
        generated_response = opt_tokenizer.decode(output[i], skip_special_tokens=True, clean_up_tokenization_spaces=False)
        generated_responses.append(generated_response)

    # Last utterance by C (for overlap calculation)
    last_utterance_by_c = prompt.split('\n')[-2].split(':')[-1].strip()

    # Re-rank based on overlap with last utterance by C
    def overlap_score(response):
        response_words = set(response.split())
        last_utterance_words = set(last_utterance_by_c.split())
        return len(response_words.intersection(last_utterance_words))

    ranked_responses = sorted(generated_responses, key=lambda x: overlap_score(x), reverse=True)

    # Store the top 5 ranked responses
    for i in range(5):
        processed_response = ranked_responses[i].replace(prompt.replace('', ''), '').replace('\n', ' ')
        test.at[idx, f'generated_response_{i+1}'] = processed_response

# Save the DataFrame to a CSV file
test.to_csv('opt_base_rephrase_generated_responses.csv', index=False)

Using device: cuda


Generating Responses: 100%|██████████| 69/69 [05:31<00:00,  4.81s/it]


## Load Dialogpt Model and Tokenizer

In [None]:
# Load model and tokenizer
dgpt_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium", use_fast=True)
dgpt_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

In [None]:
# Read the test data into a pandas DataFrame
test = pd.read_csv('/content/drive/MyDrive/feedback_test1.csv', dtype={'prompt': str})

# Specify GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Iterate over each prompt in the list
for idx, row in tqdm(test.iterrows(), desc="Generating Responses", total=len(test)):
    prompt = row['prompt']

    # Encode the prompt with EOS token
    input_ids = dgpt_tokenizer(prompt, return_tensors="pt")

    # Generate responses with top-k sampling
    output = dgpt_model.generate(**input_ids,
        max_new_tokens=10,
        num_return_sequences=5,
        pad_token_id=dgpt_tokenizer.eos_token_id,
                                 temperature=0.7,
        do_sample=True,
        top_k=50
    )

    #print(tokenizer.decode(output[0]))

    # Decode and store the generated responses (temporary list)
    generated_responses = []
    for i in range(output.shape[0]):
        generated_response = dgpt_tokenizer.decode(output[i], skip_special_tokens=True, clean_up_tokenization_spaces=False)
        generated_responses.append(generated_response)

    # Last utterance by C (for overlap calculation)
    last_utterance_by_c = prompt.split('\n')[-2].split(':')[-1].strip()

    # Re-rank based on overlap with last utterance by C
    def overlap_score(response):
        response_words = set(response.split())
        last_utterance_words = set(last_utterance_by_c.split())
        return len(response_words.intersection(last_utterance_words))

    ranked_responses = sorted(generated_responses, key=lambda x: overlap_score(x), reverse=True)

    # Store the top 5 ranked responses
    for i in range(5):
        processed_response = ranked_responses[i].replace(prompt.replace('', ''), '').replace('\n', ' ')
        test.at[idx, f'generated_response_{i+1}'] = processed_response

# Save the DataFrame to a CSV file
test.to_csv('dialogpt_base_feedback_generated_responses.csv', index=False)

Using device: cuda


Generating Responses: 100%|██████████| 80/80 [07:44<00:00,  5.81s/it]


## Test Generation for a Single Prompt

In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")

# Load the model
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

# Define the conversation context
conversation_context = "A: but they're all striped .\nA: so we don't wear them together .\nC: why ?\nA: cause it's not a good idea to wear different stripes together .\nA: in general .\nC: dis this is general ?\nC: I gon going to put salt all over you .\nA: "

# Tokenize the conversation context
input_ids = tokenizer.encode(conversation_context, return_tensors="pt")

# Generate response from the model
generated_output = model.generate(input_ids, max_new_tokens=10, pad_token_id=tokenizer.eos_token_id)

# Decode and print the generated response
generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print("Predicted Response:")
print(generated_response)


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Predicted Response:
A: but they're all striped.
A: so we don't wear them together.
C: why?
A: cause it's not a good idea to wear different stripes together.
A: in general.
C: dis this is general?
C: I gon going to put salt all over you.
A: iz gud.
