In [None]:
import pandas as pd
# Load the dataset
file_path = '/content/RickAndMortyScripts.csv'
dialogue_data = pd.read_csv(file_path)

# Combine all lines into a single text
dialogue_data['line'] = dialogue_data['name'] + ": " + dialogue_data['line']
text_data = "\n".join(dialogue_data['line'].tolist())

# Save the combined text to a file
with open('/content/combined dialogues.txt', 'w') as f:
    f.write(text_data)


In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments


tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')


def load_dataset(tokenizer, file_path, block_size=128):
    return TextDataset(
        tokenizer=tokenizer,
        file_path=file_path,
        block_size=block_size
    )

def load_data_collator(tokenizer):
    return DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
    )
text_file_path= '/content/combined dialogues.txt'
train_dataset = load_dataset(tokenizer, text_file_path)
data_collator = load_data_collator(tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    overwrite_output_dir=True,
    num_train_epochs=1,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
    logging_steps=500,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

# Train the model
trainer.train()
# Save the fine-tuned model
model.save_pretrained('./fine-tuned-gpt2-rm')
tokenizer.save_pretrained('./fine-tuned-gpt2-rm')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



Step,Training Loss


('./fine-tuned-gpt2-rm/tokenizer_config.json',
 './fine-tuned-gpt2-rm/special_tokens_map.json',
 './fine-tuned-gpt2-rm/vocab.json',
 './fine-tuned-gpt2-rm/merges.txt',
 './fine-tuned-gpt2-rm/added_tokens.json')

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained('/content/fine-tuned-gpt2-rm')
tokenizer = GPT2Tokenizer.from_pretrained('/content/fine-tuned-gpt2-rm')

def generate_dialogue(prompt, max_length=70, num_return_sequences=1, top_k=100, top_p=1, temperature=0.7):
    inputs = tokenizer.encode(prompt, return_tensors='pt')


    # Generate text with sampling-based techniques
    outputs = model.generate(
        inputs,

        max_length=max_length,
        num_return_sequences=num_return_sequences,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode outputs
    decoded_outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

    # Filter out repetitive responses
    unique_outputs = list(set(decoded_outputs))

    return unique_outputs

# Example usage
prompt = "Rick: I want to "
dialogues = generate_dialogue(prompt, num_return_sequences=3)
for dialogue in dialogues:
    print(dialogue)


Rick: I want to —I want to be the one to make sense of everything.
Rick: I know, you mean, but I don't know what's going on here.
Rick: I hope it's okay.
Rick: I hope it is.
Dr. Parson: Yeah, I think it's really important
Rick: I want to urn the thing.
Beth: Oh, Morty. I mean, not just a nice piece of furniture!
Morty: Oh, come on!
Rick: I'm not doing anything. Your man is in the middle of a bathroom stall.
Beth: What is that? What's that
Rick: I want to ________?
Dr. Niles: I love that.
Morty: I love you.
Dr. Niles:       
Dr. Niles: You're not in danger, Morty. You're in danger. I want to ________.
Morty:


In [None]:
# Example usage
prompt = "Jerry: "
dialogues = generate_dialogue(prompt, num_return_sequences=3)
for dialogue in dialogues:
    print(dialogue)

Jerry:  Rick, you're a cop, okay?
Rick:  Okay. So, this is when I saw the last of the evil people...
Rick:  I mean, look at that! This is how it all ends...
Rick:  I'm not getting out of here.
Rick:  Rick, the hell
Jerry:  Yeah, you know, I suppose that's what you guys say to us.
J.C.:  It's not how we're supposed to do it, Morty. What can I say?
Rick:  What do you think is happening here?
Morty:  That's what I call a "reneg
Jerry:  There's no denying that and I think it's going to be fun.
Rick: I think you're gonna like this.  I think you're gonna like this.
Beth: Oh god, I know you did.
Rick: Yeah, the guy is back.
Beth: He went through a lot.


In [None]:
# Example usage
prompt = "Morty: Let's go Rick "
dialogues = generate_dialogue(prompt, num_return_sequences=3)
for dialogue in dialogues:
    print(dialogue)

Morty: Let's go Rick!
Rick: I need you to talk to him now, Morty.
Morty: Ahh! Rick, you're coming out of the door!
Rick: Not so fast, Morty. We got you two-way!
Morty: Oh, god, and he was
Morty: Let's go Rick!!!
Rick: AAAAAAAAAAAAAAAAAAAAAA!
Morty: Then we'll go Morty.
Jerry: I know, I know, Morty. I know, I know. It's a good thing that we had to get from the hospital to a hospital after all.
Morty
Morty: Let's go Rick!
Beth: Oh, my God!
Rick: No, I need you to hear this.
Beth: I like it!
Rick: I'm sorry. I'm sorry!
Beth: What are you talking about?
Rick: I'm sorry you have to hear


In [None]:
# Example usage
prompt = "Rick: "
dialogues = generate_dialogue(prompt, num_return_sequences=1)
for dialogue in dialogues:
    print(dialogue)

Rick:  Yeah, good on you.
Jazz: Oh, my God, that's gonna be fun.
Morty: Come on and do it.
Jerry: Oh, my God, it's so fucking awesome! I could just pull this off!
Rick: What the fuck? You're gonna be like, "


In [None]:
# Example usage
prompt = "Morty: Let's go Rick "
dialogues = generate_dialogue(prompt, num_return_sequences=2)
for dialogue in dialogues:
    print(dialogue)

Morty: Let's go Rick  Rick, you know how to get a gun?
Rick: Oh, thank you very much, Morty.
Morty: Oh, thank you, Morty.
Rick: You're my only hope, Morty. I see what you're doing here, Morty.
Morty: Are
Morty: Let's go Rick!!!
Rick: I was a bit pissed off, Morty.
Morty: Wait, what are you saying?
Rick: They don't have much choice. They take out my daughter and my daughter's sister.
Morty: I don't think it's worth it!

