Install Jupyter Notebook (if not installed)

If you haven't installed Jupyter yet, you can install it using pip:


    pip install notebook

In [1]:
import setup

setup.init_django()



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from courses.models import Course

In [18]:
# Fetch all courses with necessary fields (title, description, etc.)
courses = Course.objects.all().values('title', 'description', 'instructor__user', 'subject__title')

# Prepare data for fine-tuning (for example, using T5 for text-to-text retrieval)
train_data = []

for course in courses:
    query = "What is this course about?"
    course_desc = course['description']
    train_data.append({
        'input_text': query,
        'output_text': course_desc
    })

print(train_data)



[{'input_text': 'What is this course about?', 'output_text': 'Understand the fundamentals of project management, including Agile and Scrum methodologies.'}, {'input_text': 'What is this course about?', 'output_text': 'Learn to manage personal finances, budget effectively, and invest wisely.'}, {'input_text': 'What is this course about?', 'output_text': 'Learn to manage personal finances, budget effectively, and invest wisely.'}, {'input_text': 'What is this course about?', 'output_text': 'Create immersive VR games using Unity and C#.'}, {'input_text': 'What is this course about?', 'output_text': 'Understand user interface and user experience design principles, prototyping, and usability testing.'}, {'input_text': 'What is this course about?', 'output_text': 'Optimize websites for better search engine rankings.'}, {'input_text': 'What is this course about?', 'output_text': 'Create beautiful digital art using Procreate.'}, {'input_text': 'What is this course about?', 'output_text': 'Unde

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from datasets import Dataset

# Load the pre-trained T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Prepare dataset for training (example list of input-output pairs)

# Convert training data into Hugging Face Dataset
train_dataset = Dataset.from_dict({
    "input_text": [item['input_text'] for item in train_data],
    "output_text": [item['output_text'] for item in train_data],
})

# Tokenization function
def preprocess_function(examples):
    model_inputs = tokenizer(examples["input_text"], max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["output_text"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize the dataset
train_dataset = train_dataset.map(preprocess_function, batched=True)

# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="no",  # Disable evaluation
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_steps=10,  # Save checkpoint every 10 steps
    logging_dir='./logs',  # Log directory
    logging_steps=10,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

# Fine-tune the model
trainer.train()


Map: 100%|██████████| 304/304 [00:02<00:00, 120.66 examples/s]
  trainer = Trainer(
  4%|▎         | 2/57 [15:15<7:07:31, 466.39s/it]

In [10]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load your fine-tuned model
model = T5ForConditionalGeneration.from_pretrained('results/checkpoint-3')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

def get_course_response(query):
    # Prepare the input query
    input_text = f"What is this course about? {query}"
    
    # Tokenize the input
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    
    # Generate the model output
    output = model.generate(inputs['input_ids'], max_length=200, num_beams=5, early_stopping=True)
    
    # Decode the output to get the course description
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    return response


In [12]:
input_query = "Fine Turning"

print(get_course_response(input_query))

Fine Turning
