# Exploration Notebook

This notebook is used for exploratory data analysis and experimentation with the dataset and model. It allows for interactive coding and visualization.

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import DistilGPT2Tokenizer, DistilGPT2LMHeadModel

# Set visualization style
sns.set(style='whitegrid')

In [None]:
# Load the dataset
data = pd.read_csv('../src/data/dataset.csv')
data.head()

In [None]:
# Visualize the dataset
plt.figure(figsize=(10, 6))
sns.countplot(y='prompt', data=data, order=data['prompt'].value_counts().index)
plt.title('Prompt Frequency')
plt.xlabel('Frequency')
plt.ylabel('Prompt')
plt.show()

In [None]:
# Load the pre-trained model and tokenizer
tokenizer = DistilGPT2Tokenizer.from_pretrained('distilgpt2')
model = DistilGPT2LMHeadModel.from_pretrained('distilgpt2')

In [None]:
# Function to generate a response
def generate_response(prompt):
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test the response generation
test_prompt = "Hello, how are you?"
response = generate_response(test_prompt)
print(f'Prompt: {test_prompt}')
print(f'Response: {response}')