In [None]:
!pip install --upgrade --force-reinstall torch torchvision torchaudio transformers numpy pandas torch datasets openai

Collecting torch
  Using cached torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Using cached torchvision-0.21.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached torchaudio-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting transformers
  Using cached transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
Collecting numpy
  Using cached numpy-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting datasets
  Using cached datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting openai
  Downloading openai-1.63.0-py3-none-any.whl.metadata (27 kB)
Collecting filelock (from torch)
  Using cached filelock-3.17.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.12.2-py

In [1]:
import numpy as np
import torch
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from datasets import load_dataset

In [2]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [2]:
import pandas as pd
import numpy as np
from datasets import load_dataset

# Load dataset
df = pd.read_csv("/content/cleaned_cognitive_activities.csv")

# Identify category columns (Ensure unique column names)
category_columns = ["Memory", "Reasoning", "Association","Reasoning"]

# Extract cognitive skills
def get_skills(row):
    return ", ".join([col for col in category_columns if row[col] == "Yes"]) or "General Cognitive Skills"

df['skills_text'] = df.apply(get_skills, axis=1)

# Create structured input for fine-tuning
df['input_text'] = ("Generate an activity for Zone: " + df['Zone'] +
                    ", Age Range: " + df['Age'] +
                    ", Cognitive Skills: " + df['skills_text'])

# Generate default fields
df['Activity Name'] = df['skills_text'] + " Challenge"
df['description'] = "An engaging cognitive activity focused on " + df['skills_text'].fillna("multiple skills") + "."
df['Instructions'] = "Step 1: Read the instructions carefully.\nStep 2: Follow the steps based on " + df['skills_text'] + ".\nStep 3: Complete the task and analyze your performance."
df['time_required'] = df.apply(lambda x: str(np.random.randint(12, 20)) + " minutes", axis=1)

# Assign Zone & Objective
df['zone'] = df['Zone'].fillna("yellow")
df['objective'] = df['skills_text'].apply(lambda skills: "Improve " + skills.lower() + " skills.")

# Format dataset for training
df['output_text'] = (
    "Activity: " + df['Activity Name'] +
    "\nDescription: " + df['description'] +
    "\nInstructions: " + df['Instructions'] +
    "\nMaterials Required: None" +
    "\nTime Required: " + df['time_required'] +
    "\nZone: " + df['zone'] +
    "\nObjective: " + df['objective']
)

# Save dataset
df[['input_text', 'output_text']].to_json("/content/activity_dataset.json", orient="records", lines=True)

print("✅ Dataset prepared successfully!")


✅ Dataset prepared successfully!


In [44]:
import json

with open("/content/activity_dataset.json", "r") as f:
    for i in range(3):  # Print the first 3 examples
        print(json.loads(f.readline()))


{'input_text': 'Generate an activity for Zone: green, Age Range: 5-13, Cognitive Skills: Reasoning, Reasoning', 'output_text': 'Activity: Reasoning, Reasoning Challenge\nDescription: An engaging cognitive activity focused on Reasoning, Reasoning.\nInstructions: Step 1: Read the instructions carefully.\nStep 2: Follow the steps based on Reasoning, Reasoning.\nStep 3: Complete the task and analyze your performance.\nMaterials Required: None\nTime Required: 15 minutes\nZone: green\nObjective: Improve reasoning, reasoning skills.'}
{'input_text': 'Generate an activity for Zone: yellow, Age Range: 5-13, Cognitive Skills: Memory', 'output_text': 'Activity: Memory Challenge\nDescription: An engaging cognitive activity focused on Memory.\nInstructions: Step 1: Read the instructions carefully.\nStep 2: Follow the steps based on Memory.\nStep 3: Complete the task and analyze your performance.\nMaterials Required: None\nTime Required: 17 minutes\nZone: yellow\nObjective: Improve memory skills.'}


In [45]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
import torch

# Load dataset
dataset = load_dataset("json", data_files="/content/activity_dataset.json", split="train")

# Load tokenizer
tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-small")

# Tokenization function
def tokenize_data(batch):
     if "input_text" not in batch or "output_text" not in batch:
        print("❌ Missing 'input_text' or 'output_text':", batch)  # Debugging
        return batch  # Skip invalid data

     inputs = tokenizer(batch["input_text"], padding="max_length", truncation=True, max_length=256)
     outputs = tokenizer(batch["output_text"], padding="max_length", truncation=True, max_length=512)

     batch["input_ids"] = inputs["input_ids"]
     batch["attention_mask"] = inputs["attention_mask"]
     batch["labels"] = outputs["input_ids"]

     return batch

dataset = dataset.map(tokenize_data)

# Load model
model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-small")

# Training settings
training_args = TrainingArguments(
    output_dir="./t5_activity_generator",
    per_device_train_batch_size=8,
    num_train_epochs=5,
    save_steps=500,
    evaluation_strategy="no",
    fp16=True,  # Use GPU if available
)

trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
trainer.train()

# Save model
model.save_pretrained("./activity_generator_model")
tokenizer.save_pretrained("./activity_generator_model")

print("✅ Model training complete! Saved as 'activity_generator_model'.")


Map:   0%|          | 0/99 [00:00<?, ? examples/s]



Step,Training Loss


✅ Model training complete! Saved as 'activity_generator_model'.


In [43]:
# Load fine-tuned model
model_path = "./activity_generator_model"
try:
    tokenizer = T5Tokenizer.from_pretrained(model_path)
    model = T5ForConditionalGeneration.from_pretrained(model_path)
    print("✅ Model loaded successfully!")
except Exception as e:
    print("❌ Error loading model:", e)

def generate_activity(cognitive_skills, zone):
    """Generate a cognitive activity based on skills & zone."""
    prompt = (f"in English,generate an activity for Cognitive Skills: {cognitive_skills}, The response should include: Activity name, Description, "
              f"Instructions, Materials Required, Time Required, Zone, and Objective.")

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=256)
    print(inputs)

    with torch.no_grad():
        output_ids = model.generate(**inputs, max_length=512, num_return_sequences=1,temperature=0.7, top_p=0.9,
            do_sample=True)

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    if not generated_text.strip():
          generated_text = (
                "Activity: Memory Puzzle\n"
                "Description: A fun game to improve memory skills.\n"
                "Instructions: Match the cards to test memory.\n"
                "Materials Required: Cards\n"
                "Time Required: 15 minutes\n"
                "Objective: Enhance memory retention."
            )

    return generated_text

# Example usage:
print(generate_activity("Memory, Reasoning", "Green"))


✅ Model loaded successfully!
{'input_ids': tensor([[   16,  1566,     6,   729,    49,   342,    46,  1756,    21, 31109,
         19559,    10, 19159,     6, 21272,    53,     6,    37,  1773,   225,
           560,    10, 22536,   564,     6,  7726,     6, 21035,     7,     6,
         16158, 31377,     6,  2900, 31377,     6, 11628,     6,    11, 27919,
             5,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
, the website. this website.in English,return. this article.in English,in English,reflect. this page. this page. | Michael.? ??????????????????????????


In [50]:
prompt = (f"Generate an activity in English for Cognitive Skills: memory Zone: green "
              f"The response must be in English and 7 lines must have one line for each: Activity name, Description, Instructions, "
              f"Materials Required, Time Required, Zone, and Objective.")

inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=256)

with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=512,
            num_return_sequences=1,
            temperature=0.7,  # Encourage creativity
            top_p=0.9,  # Use nucleus sampling
            do_sample=True  # Enable sampling
        )

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("🔹 Debug: Raw Model Output ->", repr(generated_text))  # Debugging step


🔹 Debug: Raw Model Output -> "thist 2016,ulphLL: TI, Pres' goall  canyon'FT-y"


In [None]:
!zip -r activity_generator_model.zip /content/activity_generator_model
from google.colab import files
files.download("activity_generator_model.zip")

['README.md', 'anscombe.json', 'mnist_test.csv', 'mnist_train_small.csv', 'california_housing_test.csv', 'california_housing_train.csv']
