#Custom Checkpoint

In [None]:
# Step 1: Setup Environment
# Install Hugging Face Transformers library and other necessary libraries
!pip install transformers datasets torch

# Import Libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset

# Step 2: Load and Prepare the Dataset
# Load a general conversational dataset (e.g., DailyDialog)
dataset = load_dataset("daily_dialog")

# Split dataset into train and validation
train_testvalid = dataset['train'].train_test_split(test_size=0.1)
train_dataset = train_testvalid['train']
val_dataset = train_testvalid['test']

# Preprocess Dataset
def tokenize_function(example):
    return tokenizer(example['dialogue'], padding="max_length", truncation=True, max_length=128)

# Step 3: Load Tokenizer and Model
# Load from a custom checkpoint if available
model_name = "gpt2"
custom_checkpoint = "path/to/your/custom_checkpoint"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(custom_checkpoint)

# Tokenize datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set Format to PyTorch
def set_format_to_torch(dataset):
    dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'], output_all_columns=True)

set_format_to_torch(train_dataset)
set_format_to_torch(val_dataset)

# Step 4: Set Training Arguments and Fine-tune the Model
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Fine-tune the model
trainer.train()

# Step 5: Save the Model
trainer.save_model("custom-conversational-model")

# Step 6: Test the Model
# Function to chat with the fine-tuned model
def generate_response(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    output = model.generate(input_ids, max_length=50, num_return_sequences=1)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example Usage
user_input = "How is the weather today?"
response = generate_response(user_input)
print(response)
