In [3]:
import json
import os

data_dir = "./flutter_dataset"
missing = []

for filename in sorted(os.listdir(data_dir)):
    if filename.endswith(".json"):
        path = os.path.join(data_dir, filename)
        try:
            with open(path, "r", encoding="utf-8") as f:
                item = json.load(f)
            if "prompt" not in item or "flutter_code" not in item:
                missing.append(filename)
        except Exception as e:
            missing.append(f"{filename} (error: {str(e)})")

print("⚠️ Files missing keys or invalid JSON:")
print(missing if missing else "✅ All files are valid.")


⚠️ Files missing keys or invalid JSON:
✅ All files are valid.


In [5]:
import json
import os

data_dir = "./flutter_dataset"
output_file = "flutter_code_dataset.jsonl"

with open(output_file, "w", encoding="utf-8") as out:
    for filename in sorted(os.listdir(data_dir)):
        if filename.endswith(".json"):
            path = os.path.join(data_dir, filename)
            with open(path, "r", encoding="utf-8") as f:
                item = json.load(f)
                prompt = item["prompt"]
                code = item["flutter_code"]
                json.dump({"input_text": prompt, "target_text": code}, out)
                out.write("\n")

print("✅ Dataset prepared as flutter_code_dataset.jsonl")


✅ Dataset prepared as flutter_code_dataset.jsonl


In [6]:
pip install transformers datasets sentencepiece


Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# Load dataset
dataset = load_dataset("json", data_files="flutter_code_dataset.jsonl", split="train")
dataset = dataset.train_test_split(test_size=0.1)

# Load model and tokenizer
model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [3]:
def preprocess(examples):
    inputs = [ex for ex in examples["input_text"]]
    targets = [ex for ex in examples["target_text"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)
    labels = tokenizer(targets, max_length=512, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized = dataset.map(preprocess, batched=True)


Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [4]:
args = TrainingArguments(
    output_dir="codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=2,
    # evaluation_strategy not supported in older versions
)


In [5]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)

trainer.train()
model.save_pretrained("flutter_code_generator_model")
tokenizer.save_pretrained("flutter_code_generator_model")


  trainer = Trainer(


Step,Training Loss


ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`labels` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [6]:
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

In [8]:
model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# If tokenizer has no pad_token, use eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [9]:
def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    # Tokenize inputs (prompts)
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",   # Pad sequences
        truncation=True
    )
    
    # Tokenize targets (Flutter code)
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace padding token IDs in labels with -100 for loss calculation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs


In [10]:
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and test
dataset = dataset.train_test_split(test_size=0.1)

# Apply tokenization & preprocessing
tokenized = dataset.map(preprocess, batched=True)

Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [12]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=2,
    logging_steps=50,
    # evaluation_strategy removed for older transformers
)


In [13]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)
trainer.train()

  trainer = Trainer(


Step,Training Loss
50,1.0067
100,0.6141
150,0.5332


KeyboardInterrupt: 

In [14]:

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model and tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Some code models may not have pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Preprocessing function
# -------------------------------
def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    # Tokenize inputs
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    # Tokenize targets (Flutter code)
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace padding tokens with -100 so loss ignores them
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

# -------------------------------
# 5️⃣ Load dataset
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train/test
dataset = dataset.train_test_split(test_size=0.1)

# Tokenize
tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 6️⃣ Training arguments with checkpointing
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=1,             # Train 1 epoch at a time
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,             # Keep last 5 checkpoints
    logging_steps=50,
    save_strategy="epoch",          # Save checkpoint at end of each epoch
    fp16=True                        # Mixed precision if GPU available
)

# -------------------------------
# 7️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)

# -------------------------------
# 8️⃣ Train the model (first session)
# -------------------------------
trainer.train()  # Saves checkpoint automatically at the end of epoch

# -------------------------------
# 9️⃣ Save final model after training session
# -------------------------------
model.save_pretrained("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training session complete! Checkpoints saved.")


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss


KeyboardInterrupt: 

In [15]:
from transformers import Trainer, AutoModelForSeq2SeqLM, AutoTokenizer

model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained("flutter_codegen_model")  # Last saved model

# Trainer arguments (same as before)
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=1,             # 1 epoch per resume
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,
    logging_steps=50,
    save_strategy="epoch",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)

# ✅ Resume from last checkpoint automatically
trainer.train(resume_from_checkpoint=True)


ValueError: Unrecognized configuration class <class 'transformers.models.codegen.configuration_codegen.CodeGenConfig'> for this kind of AutoModel: AutoModelForSeq2SeqLM.
Model type should be one of BartConfig, BigBirdPegasusConfig, BlenderbotConfig, BlenderbotSmallConfig, EncoderDecoderConfig, FSMTConfig, GPTSanJapaneseConfig, GraniteSpeechConfig, LEDConfig, LongT5Config, M2M100Config, MarianConfig, MBartConfig, MT5Config, MvpConfig, NllbMoeConfig, PegasusConfig, PegasusXConfig, PLBartConfig, ProphetNetConfig, Qwen2AudioConfig, SeamlessM4TConfig, SeamlessM4Tv2Config, SwitchTransformersConfig, T5Config, T5GemmaConfig, UMT5Config, VoxtralConfig, XLMProphetNetConfig.

In [16]:

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model & tokenizer setup (CodeGen)
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # CodeGen model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Preprocessing function
# -------------------------------
def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    # Tokenize inputs
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    # Tokenize targets (Flutter code)
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace padding tokens with -100 for loss
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

# -------------------------------
# 5️⃣ Load dataset
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train/test
dataset = dataset.train_test_split(test_size=0.1)

# Tokenize dataset
tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 6️⃣ Training arguments with checkpointing
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=1,             # Train 1 epoch per session
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,             # Keep last 5 checkpoints
    logging_steps=50,
    save_strategy="epoch",          # Save checkpoint at end of each epoch
    fp16=True,                       # Mixed precision if GPU available
)

# -------------------------------
# 7️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)

# -------------------------------
# 8️⃣ Start training (first session)
# -------------------------------
trainer.train()  # Automatically saves checkpoint at end of epoch

# -------------------------------
# 9️⃣ Save final model after session
# -------------------------------
model.save_pretrained("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training session complete! Checkpoints saved.")


ValueError: Unrecognized configuration class <class 'transformers.models.t5.configuration_t5.T5Config'> for this kind of AutoModel: AutoModelForCausalLM.
Model type should be one of ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, JambaConfig, JetMoeConfig, Lfm2Config, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, Speech2Text2Config, StableLmConfig, Starcoder2Config, TransfoXLConfig, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.

In [17]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model & tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # T5-based CodeT5
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Dataset loading and preprocessing
# -------------------------------
# Load JSONL dataset
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and test
dataset = dataset.train_test_split(test_size=0.1)

# Preprocessing function
def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    # Tokenize inputs
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    # Tokenize targets (Flutter code)
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace padding tokens with -100 for loss calculation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

# Tokenize dataset
tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 5️⃣ Training arguments (1 epoch per session)
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=1,   # Train 1 epoch per session
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,   # Keep last 5 checkpoints
    logging_steps=50,
    save_strategy="epoch", # Save checkpoint at end of each epoch
    fp16=True              # Mixed precision if GPU available
)

# -------------------------------
# 6️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# -------------------------------
# 7️⃣ Start training
# -------------------------------
# First time: train normally
trainer.train()  # Checkpoints will be saved at the end of epoch

# -------------------------------
# 8️⃣ Save final model after session
# -------------------------------
model.save_pretrained("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training session complete! Checkpoints saved.")


  trainer = Trainer(


Step,Training Loss
50,1.0047


KeyboardInterrupt: 

In [18]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# Load last checkpoint
tokenizer = AutoTokenizer.from_pretrained("flutter_codegen_model")
model = AutoModelForSeq2SeqLM.from_pretrained("flutter_codegen_model")

args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=1,  # Train 1 epoch per resume
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,
    logging_steps=50,
    save_strategy="epoch",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# Resume from latest checkpoint
trainer.train(resume_from_checkpoint=True)


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [20]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from datasets import load_dataset

# =============================
# 1️⃣ Load your dataset
# (Example: Replace with your dataset loading)
# =============================
dataset = load_dataset("json", data_files={"train": "train.json", "validation": "valid.json"})

train_dataset = dataset["train"]
eval_dataset = dataset["validation"]

# =============================
# 2️⃣ Load model and tokenizer
# =============================
model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# =============================
# 3️⃣ Define training arguments
# =============================
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    overwrite_output_dir=False,     # Do NOT overwrite past checkpoints
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_strategy="steps",          # ✅ Save every X steps
    save_steps=50,                  # ✅ Save checkpoint every 50 steps
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=200,
    save_total_limit=3,             # Keep only last 3 checkpoints
    learning_rate=5e-5,
    weight_decay=0.01,
    fp16=True,
)

# =============================
# 4️⃣ Initialize trainer
# =============================
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# =============================
# 5️⃣ Train (with resume support)
# =============================
# If a checkpoint exists, resume automatically
trainer.train(resume_from_checkpoint=True)

# =============================import os
import json
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Trainer,
    TrainingArguments,
)

# ======================================================
# 1️⃣ Load your dataset from folder (your original way)
# ======================================================
data_dir = "./flutter_dataset"   # folder containing 0.json, 1.json, etc.

data = []
for filename in sorted(os.listdir(data_dir)):
    if filename.endswith(".json"):
        path = os.path.join(data_dir, filename)
        with open(path, "r", encoding="utf-8") as f:
            item = json.load(f)
            # ✅ Ensure both keys exist
            if "prompt" in item and "flutter_code" in item:
                data.append({
                    "prompt": item["prompt"],
                    "code": item["flutter_code"]
                })
            else:
                print(f"⚠️ Skipped file missing keys: {filename}")

# Create Hugging Face Dataset
dataset = Dataset.from_list(data)

# Split into train/test (90/10)
dataset = dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# ======================================================
# 2️⃣ Load tokenizer and model
# ======================================================
model_name = "Salesforce/codet5p-220m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ======================================================
# 3️⃣ Tokenization
# ======================================================
def preprocess_function(examples):
    inputs = tokenizer(
        examples["prompt"],
        max_length=256,
        padding="max_length",
        truncation=True
    )
    labels = tokenizer(
        examples["code"],
        max_length=512,
        padding="max_length",
        truncation=True
    )
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized = dataset.map(preprocess_function, batched=True, remove_columns=["prompt", "code"])

# ======================================================
# 4️⃣ Training Arguments (Save checkpoints)
# ======================================================
args = TrainingArguments(
    output_dir="flutter_codegen_model",   # Save folder
    overwrite_output_dir=False,           # Don’t overwrite old checkpoints
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_strategy="steps",                # Save every few steps
    save_steps=50,                        # Save every 50 steps
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=200,
    save_total_limit=3,                   # Keep 3 recent checkpoints
    fp16=True,
)

# ======================================================
# 5️⃣ Create Trainer
# ======================================================
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)

# ======================================================
# 6️⃣ Train (Resume automatically)
# ======================================================
trainer.train(resume_from_checkpoint=True)

# ======================================================
# 7️⃣ Save final model
# ======================================================
trainer.save_model("flutter_codegen_model/final")
tokenizer.save_pretrained("flutter_codegen_model/final")

print("✅ Training finished successfully.")

# 6️⃣ Save the final model
# =============================
trainer.save_model("flutter_codegen_model/final")


FileNotFoundError: Unable to find 'E:/Kushan/Studies/App/Flutter Apps/Projects/flutter_ui_generator/Training_Model\train.json'

In [23]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model & tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # T5-based CodeT5
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Dataset loading and preprocessing
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace pad token IDs with -100 to ignore them in loss computation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 5️⃣ Training arguments (compatible version)
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,
    logging_steps=50,
    save_steps=50,          # ✅ Save checkpoint every 50 steps
    save_strategy="steps",  # ✅ Save based on steps
    fp16=True               # Mixed precision (if GPU supports)
)

# -------------------------------
# 6️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# -------------------------------
# 7️⃣ Resume from last checkpoint if available
# -------------------------------
last_checkpoint = None
if os.path.isdir("flutter_codegen_model"):
    checkpoints = [os.path.join("flutter_codegen_model", d)
                   for d in os.listdir("flutter_codegen_model")
                   if d.startswith("checkpoint-")]
    if checkpoints:
        last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]
        print(f"🔁 Resuming from checkpoint: {last_checkpoint}")

# -------------------------------
# 8️⃣ Start / resume training
# -------------------------------
trainer.train(resume_from_checkpoint=last_checkpoint)

# -------------------------------
# 9️⃣ Save final model
# -------------------------------
trainer.save_model("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training complete! Checkpoints saved every 50 steps.")


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(


🔁 Resuming from checkpoint: flutter_codegen_model\checkpoint-50


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


Step,Training Loss


KeyboardInterrupt: 

In [2]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Configuration
# -------------------------------
MODEL_NAME = "Salesforce/codet5p-220m"  # T5-based CodeT5
OUTPUT_DIR = "flutter_codegen_model"
DATASET_FILE = "flutter_code_dataset.jsonl"

# -------------------------------
# 4️⃣ Check for existing checkpoints
# -------------------------------
last_checkpoint = None
if os.path.isdir(OUTPUT_DIR):
    checkpoints = [
        os.path.join(OUTPUT_DIR, d)
        for d in os.listdir(OUTPUT_DIR)
        if d.startswith("checkpoint-") and os.path.isdir(os.path.join(OUTPUT_DIR, d))
    ]
    if checkpoints:
        last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]
        print(f"🔁 Resuming from checkpoint: {last_checkpoint}")
    else:
        print("🆕 No checkpoints found. Starting fresh training.")
else:
    print("🆕 No output directory found. Starting fresh training.")

# -------------------------------
# 5️⃣ Model & tokenizer setup
# -------------------------------
# Load tokenizer (always from base model)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model from checkpoint if available, otherwise from base
if last_checkpoint:
    print(f"📦 Loading model from checkpoint: {last_checkpoint}")
    model = AutoModelForSeq2SeqLM.from_pretrained(last_checkpoint)
else:
    print(f"📦 Loading base model: {MODEL_NAME}")
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

# -------------------------------
# 6️⃣ Dataset loading and preprocessing
# -------------------------------
print("📂 Loading dataset...")
dataset = load_dataset(
    "json",
    data_files=DATASET_FILE,
    split="train"
)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1, seed=42)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace pad token IDs with -100 to ignore them in loss computation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

print("🔄 Preprocessing dataset...")
tokenized = dataset.map(
    preprocess, 
    batched=True,
    load_from_cache_file=True,  # Reuse cached preprocessing
    desc="Tokenizing dataset"
)

# -------------------------------
# 7️⃣ Training arguments
# -------------------------------
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,           # Keep only last 5 checkpoints
    logging_steps=50,
    save_steps=50,                # Save checkpoint every 50 steps
    eval_steps=50,                # Evaluate every 50 steps
    save_strategy="steps",
    eval_strategy="steps",        # Run evaluation periodically (renamed from evaluation_strategy)
    fp16=True,                    # Mixed precision (if GPU supports)
    seed=42,                      # Reproducible training
    report_to="none",             # Disable wandb/tensorboard if not needed
    load_best_model_at_end=True,  # Load best model at the end
    metric_for_best_model="eval_loss",  # Track evaluation loss
    greater_is_better=False,      # Lower eval_loss is better
)

# -------------------------------
# 8️⃣ Trainer setup
# -------------------------------
print("🏋️ Setting up trainer...")
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# -------------------------------
# 9️⃣ Start / resume training
# -------------------------------
print("🚀 Starting training...")
try:
    trainer.train(resume_from_checkpoint=last_checkpoint)
    print("✅ Training completed successfully!")
except KeyboardInterrupt:
    print("\n⏸️ Training interrupted. Progress saved to checkpoint.")
except Exception as e:
    print(f"❌ Training failed with error: {e}")
    raise

# -------------------------------
# 🔟 Save final model
# -------------------------------
print("💾 Saving final model...")
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"✅ Training complete! Model saved to '{OUTPUT_DIR}'")
print(f"📊 Checkpoints saved every 50 steps (keeping last 5)")

# -------------------------------
# 1️⃣1️⃣ Optional: Display training summary
# -------------------------------
if hasattr(trainer.state, 'log_history') and trainer.state.log_history:
    final_metrics = trainer.state.log_history[-1]
    print("\n📈 Final Metrics:")
    for key, value in final_metrics.items():
        if isinstance(value, float):
            print(f"   {key}: {value:.4f}")

🆕 No checkpoints found. Starting fresh training.
📦 Loading base model: Salesforce/codet5p-220m
📂 Loading dataset...
🔄 Preprocessing dataset...
🏋️ Setting up trainer...


  trainer = Trainer(


🚀 Starting training...


Step,Training Loss,Validation Loss
50,1.0067,0.62478





⏸️ Training interrupted. Progress saved to checkpoint.
💾 Saving final model...
✅ Training complete! Model saved to 'flutter_codegen_model'
📊 Checkpoints saved every 50 steps (keeping last 5)

📈 Final Metrics:
   loss: 0.6136
   grad_norm: 2.2309
   learning_rate: 0.0000
   epoch: 0.0222


In [1]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model & tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # T5-based CodeT5
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Dataset loading and preprocessing
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace pad token IDs with -100 to ignore them in loss computation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 5️⃣ Training arguments (compatible version)
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,
    logging_steps=50,
    save_steps=50,          # ✅ Save checkpoint every 50 steps
    save_strategy="steps",  # ✅ Save based on steps
    fp16=True               # Mixed precision (if GPU supports)
)

# -------------------------------
# 6️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# -------------------------------
# 7️⃣ Resume from last checkpoint if available
# -------------------------------
last_checkpoint = None
if os.path.isdir("flutter_codegen_model"):
    checkpoints = [os.path.join("flutter_codegen_model", d)
                   for d in os.listdir("flutter_codegen_model")
                   if d.startswith("checkpoint-")]
    if checkpoints:
        last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]
        print(f"🔁 Resuming from checkpoint: {last_checkpoint}")

# -------------------------------
# 8️⃣ Start / resume training
# -------------------------------
trainer.train(resume_from_checkpoint=last_checkpoint)

# -------------------------------
# 9️⃣ Save final model
# -------------------------------
trainer.save_model("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training complete! Checkpoints saved every 50 steps.")


Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(


🔁 Resuming from checkpoint: flutter_codegen_model\checkpoint-9600


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


Step,Training Loss
9650,0.0293
9700,0.0336
9750,0.0281
9800,0.033
9850,0.0291
9900,0.0307
9950,0.03
10000,0.027
10050,0.0317
10100,0.0299


✅ Training complete! Checkpoints saved every 50 steps.


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_dir = "flutter_codegen_model"  # your folder
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)

prompt = "Create a login form"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
outputs = model.generate(**inputs, max_length=512, num_beams=5, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


import 'package:flutter/material.dart';

class GeneratedWidget extends StatefulWidget {
  @override
  _GeneratedWidgetState createState() => _GeneratedWidgetState();
}

class _GeneratedWidgetState extends State<GeneratedWidget> {
  bool _obscurePassword = true;
  final _emailController = TextEditingController();
  final _passwordController = TextEditingController();

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      backgroundColor: Colors.green[50],
      body: Center(
        child: SingleChildScrollView(
          padding: EdgeInsets.all(24.0),
          child: Container(
            padding: EdgeInsets.all(24.0),
            decoration: BoxDecoration(
              color: Colors.white,
              borderRadius: BorderRadius.circular(12),
              boxShadow: [
                BoxShadow(
                  color: Colors.grey[200]!,
                  blurRadius: 10,
                  offset: Offset(0, 2),
                ),
              ],
          

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_dir = "flutter_codegen_model"  # your folder
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)

prompt = "Create a splash screen"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
outputs = model.generate(**inputs, max_length=5000, num_beams=5, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


import 'package:flutter/material.dart';

class GeneratedWidget extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      body: Container(
        decoration: BoxDecoration(
          gradient: LinearGradient(
            begin: Alignment.topCenter,
            end: Alignment.bottomCenter,
            colors: [Color(0xFF00B7EB), Color(0xFFFF00FF)],
          ),
        ),
        child: SafeArea(
          child: Column(
            mainAxisAlignment: MainAxisAlignment.center,
            children: [
              Spacer(flex: 2),
              Container(
                width: 100,
                height: 100,
                decoration: BoxDecoration(
                  shape: BoxShape.circle,
                  color: Colors.white,
                  boxShadow: [
                    BoxShadow(
                      color: Color(0xFF00B7EB).withOpacity(0.5),
                      blurRadius: 15,
                      spreadRadius: 5,
        

In [25]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import os

model_dir = "flutter_codegen_model"  # your folder
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)

prompt = "Create a Login page"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
outputs = model.generate(**inputs, max_length=10000, num_beams=1, temperature=0.7)

# Decode the output
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Option 1: Save to file
output_file = "generated_splash_screen.dart"
with open(output_file, "w") as f:
    f.write(output_text)
print(f"✓ Full code saved to '{output_file}'")

# Option 2: Print full output in console
print("\n" + "="*80)
print("GENERATED CODE:")
print("="*80 + "\n")
print(output_text)
print("\n" + "="*80)

# Option 3: Print with statistics
print(f"\nCode Statistics:")
print(f"  Total characters: {len(output_text)}")
print(f"  Total tokens: {len(outputs[0])}")
print(f"  Lines of code: {output_text.count(chr(10)) + 1}")

✓ Full code saved to 'generated_splash_screen.dart'

GENERATED CODE:

import 'package:flutter/material.dart';

class GeneratedWidget extends StatefulWidget {
  @override
  _GeneratedWidgetState createState() => _GeneratedWidgetState();
}

class _GeneratedWidgetState extends State<GeneratedWidget> {
  bool _obscurePassword = true;
  final _emailController = TextEditingController();
  final _passwordController = TextEditingController();

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      backgroundColor: Colors.white,
      body: Center(
        child: SingleChildScrollView(
          padding: EdgeInsets.all(24.0),
          child: Container(
            padding: EdgeInsets.all(24.0),
            decoration: BoxDecoration(
              color: Colors.white,
              borderRadius: BorderRadius.circular(12),
              boxShadow: [
                BoxShadow(
                  color: Colors.grey[200]!,
                  blurRadius: 8,
                  off

In [13]:
# ================================
# MODEL EVALUATION & ACCURACY
# ================================

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
import torch
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
import nltk
from difflib import SequenceMatcher
nltk.download('punkt')

# --------------------------------
# 1️⃣ Load your trained model
# --------------------------------
model_dir = "flutter_codegen_model"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)

# Set to evaluation mode
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# --------------------------------
# 2️⃣ Load and prepare test dataset
# --------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Use the same split as training
dataset = dataset.train_test_split(test_size=0.1)
test_dataset = dataset["test"]

# --------------------------------
# 3️⃣ Generate predictions
# --------------------------------
def generate_predictions(dataset, model, tokenizer, device, num_samples=None):
    predictions = []
    references = []
    
    if num_samples:
        dataset = dataset.select(range(min(num_samples, len(dataset))))
    
    for example in dataset:
        input_text = example["input_text"]
        target_text = example["target_text"]
        
        # Tokenize input
        inputs = tokenizer(
            input_text,
            max_length=512,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        ).to(device)
        
        # Generate output
        with torch.no_grad():
            outputs = model.generate(
                inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=512,
                num_beams=4,
                early_stopping=True
            )
        
        # Decode predictions
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(pred_text)
        references.append(target_text)
    
    return predictions, references

print("🔄 Generating predictions... (this may take a while)")
predictions, references = generate_predictions(
    test_dataset, 
    model, 
    tokenizer, 
    device,
    num_samples=50
)

# --------------------------------
# 4️⃣ Direct Accuracy (Similarity Score)
# --------------------------------
print("\n" + "="*50)
print("⚡ DIRECT ACCURACY")
print("="*50)

def similarity_score(pred, ref):
    """Calculate similarity ratio between prediction and reference"""
    return SequenceMatcher(None, pred, ref).ratio()

accuracy_scores = []
for pred, ref in zip(predictions, references):
    score = similarity_score(pred.strip(), ref.strip())
    accuracy_scores.append(score)

avg_accuracy = sum(accuracy_scores) / len(accuracy_scores) * 100
print(f"Average Similarity Score: {avg_accuracy:.2f}%")
print(f"(0% = completely different, 100% = identical)")

# --------------------------------
# 5️⃣ Calculate ROUGE scores
# --------------------------------
print("\n" + "="*50)
print("📊 ROUGE SCORES")
print("="*50)

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

for pred, ref in zip(predictions, references):
    scores = scorer.score(ref, pred)
    rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
    rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
    rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

print(f"ROUGE-1 (avg): {sum(rouge_scores['rouge1']) / len(rouge_scores['rouge1']):.4f}")
print(f"ROUGE-2 (avg): {sum(rouge_scores['rouge2']) / len(rouge_scores['rouge2']):.4f}")
print(f"ROUGE-L (avg): {sum(rouge_scores['rougeL']) / len(rouge_scores['rougeL']):.4f}")

# --------------------------------
# 6️⃣ Calculate BLEU score
# --------------------------------
print("\n" + "="*50)
print("📊 BLEU SCORE")
print("="*50)

ref_tokens = [nltk.word_tokenize(ref) for ref in references]
pred_tokens = [nltk.word_tokenize(pred) for pred in predictions]

bleu_score = corpus_bleu(
    [[ref] for ref in ref_tokens],
    pred_tokens
)
print(f"BLEU Score: {bleu_score:.4f}")

# --------------------------------
# 7️⃣ Show sample predictions
# --------------------------------
print("\n" + "="*50)
print("🔍 SAMPLE PREDICTIONS")
print("="*50)

for i in range(min(5, len(predictions))):
    print(f"\n--- Sample {i+1} ---")
    print(f"Input (truncated): {test_dataset[i]['input_text'][:200]}...")
    print(f"\n✅ Reference:\n{references[i][:300]}...")
    print(f"\n🤖 Prediction:\n{predictions[i][:300]}...")
    print(f"Match Score: {accuracy_scores[i]:.2%}")

# --------------------------------
# 8️⃣ Exact Match Accuracy
# --------------------------------
print("\n" + "="*50)
print("📊 EXACT MATCH ACCURACY")
print("="*50)

exact_matches = sum(1 for pred, ref in zip(predictions, references) if pred.strip() == ref.strip())
exact_match_acc = exact_matches / len(predictions) * 100

print(f"Exact Matches: {exact_matches}/{len(predictions)}")
print(f"Exact Match Accuracy: {exact_match_acc:.2f}%")

# --------------------------------
# 9️⃣ Run Trainer evaluation
# --------------------------------
print("\n" + "="*50)
print("📊 TRAINER EVALUATION (Loss & Perplexity)")
print("="*50)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=512,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

tokenized_test = test_dataset.map(preprocess, batched=True)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="eval_results",
        per_device_eval_batch_size=2,
    ),
    eval_dataset=tokenized_test,
    tokenizer=tokenizer
)

eval_results = trainer.evaluate()
print(f"\nEvaluation Loss: {eval_results['eval_loss']:.4f}")
print(f"Perplexity: {torch.exp(torch.tensor(eval_results['eval_loss'])):.4f}")

print("\n✅ Evaluation complete!")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kusha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


🔄 Generating predictions... (this may take a while)

⚡ DIRECT ACCURACY
Average Similarity Score: 51.22%
(0% = completely different, 100% = identical)

📊 ROUGE SCORES
ROUGE-1 (avg): 0.6139
ROUGE-2 (avg): 0.5747
ROUGE-L (avg): 0.5821

📊 BLEU SCORE


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\kusha/nltk_data'
    - 'c:\\Users\\kusha\\anaconda3\\nltk_data'
    - 'c:\\Users\\kusha\\anaconda3\\share\\nltk_data'
    - 'c:\\Users\\kusha\\anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\kusha\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [11]:
pip install rouge-score nltk

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting absl-py (from rouge-score)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Downloading absl_py-2.3.1-py3-none-any.whl (135 kB)
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py): started
  Building wheel for rouge-score (setup.py): finished with status 'done'
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24972 sha256=0a1fe4537e5f2b1609417ffc82801a405017abe064683c3b38dea71ef1b00498
  Stored in directory: c:\users\kusha\appdata\local\pip\cache\wheels\85\9d\af\01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge-score
Installing collected packages: absl-py, rouge-score
Successfully installed absl-py-2.3.1 rouge-score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
# ========================================================================
# FLUTTER CODE GENERATION - CPU TRAINING (OPTIMIZED TO PREVENT FREEZING)
# ========================================================================
# This version includes fixes to prevent your laptop from freezing
# Key improvements:
# - Smaller batch sizes
# - Limited data loading
# - Reduced memory usage
# - Progressive training approach
# ========================================================================

# -------------------------------
# STEP 1: Install Required Packages
# -------------------------------
!pip install transformers datasets sentencepiece accelerate -q

# -------------------------------
# STEP 2: Import Libraries
# -------------------------------
import json
import os
import glob
import gc
from datasets import Dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM, 
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq
)
import torch

print("✅ Libraries imported successfully!")
print(f"🖥️  Training on: CPU")
print(f"💾 Available CPU cores: {os.cpu_count()}")

# Force CPU usage
os.environ["CUDA_VISIBLE_DEVICES"] = ""
device = torch.device("cpu")
print(f"✅ Device set to: {device}")

# Set thread limits to prevent CPU overload
torch.set_num_threads(max(1, os.cpu_count() // 2))
print(f"🔧 PyTorch threads limited to: {torch.get_num_threads()}")

# -------------------------------
# STEP 3: Configuration (FIXED FOR CPU)
# -------------------------------
CONFIG = {
    # Paths
    "data_dir": "./data",
    "output_dir": "./flutter_codegen_model",
    
    # Model settings
    "model_name": "Salesforce/codet5p-220m",
    "max_input_length": 512,        # REDUCED from 512
    "max_target_length": 2048,       # REDUCED from 2048
    
    # Training settings - OPTIMIZED TO PREVENT FREEZING
    "batch_size": 2,                # REDUCED from 8
    "gradient_accumulation_steps": 4,  # INCREASED to maintain effective batch size
    "num_epochs": 1,                # REDUCED for initial test
    "learning_rate": 5e-5,
    "save_steps": 50,               # Save more frequently
    "logging_steps": 10,            # Log more frequently to see progress
    "warmup_steps": 20,
    
    # CPU optimization - CRITICAL FIXES
    "dataloader_num_workers": 0,    # CHANGED from 4 to 0 (prevents freezing!)
    "max_train_samples": 1000,      # LIMIT dataset size for testing
}

print("\n📋 Configuration (CPU Training - Optimized):")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")

print("\n⚠️  IMPORTANT CHANGES TO PREVENT FREEZING:")
print("   ✓ Batch size reduced to 2 (was 8)")
print("   ✓ Dataloader workers set to 0 (was 4) - CRITICAL FIX")
print("   ✓ Sequence lengths reduced")
print("   ✓ Training on limited samples (1000)")
print("   ✓ PyTorch threads limited")

# -------------------------------
# STEP 4: Load Dataset (WITH SIZE LIMIT)
# -------------------------------
def load_flutter_dataset(data_dir, max_samples=None):
    print(f"\n📂 Loading dataset from: {data_dir}")
    json_files = sorted(glob.glob(os.path.join(data_dir, "*.json")))
    
    print(f"   Found {len(json_files)} JSON files")
    
    if max_samples:
        print(f"   ⚠️  Limiting to {max_samples} samples to prevent freezing")
    
    data = []
    for i, file_path in enumerate(json_files):
        if max_samples and len(data) >= max_samples:
            break
            
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                item = json.load(f)
                data.append({
                    'prompt': item['prompt'],
                    'flutter_code': item['flutter_code']
                })
            
            if (i + 1) % 100 == 0:
                print(f"   Loaded {len(data)} examples...")
        except Exception as e:
            print(f"   ⚠️ Error loading {file_path}: {e}")
    
    print(f"✅ Successfully loaded {len(data)} examples")
    
    # Clear memory
    gc.collect()
    
    return Dataset.from_list(data)

dataset = load_flutter_dataset(CONFIG["data_dir"], max_samples=CONFIG.get("max_train_samples"))
dataset = dataset.train_test_split(test_size=0.1, seed=42)

print(f"\n📊 Dataset split:")
print(f"   Training: {len(dataset['train'])}")
print(f"   Validation: {len(dataset['test'])}")

# -------------------------------
# STEP 5: Load Model and Tokenizer
# -------------------------------
print(f"\n🤖 Loading model: {CONFIG['model_name']}")

tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'])
model = AutoModelForSeq2SeqLM.from_pretrained(CONFIG['model_name'])

# Explicitly move model to CPU
model = model.to(device)

# Set pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.pad_token_id

print(f"✅ Model loaded on CPU!")
print(f"   Model parameters: {model.num_parameters():,}")

# Clear memory
gc.collect()

# -------------------------------
# STEP 6: Preprocessing
# -------------------------------
def preprocess_function(examples):
    model_inputs = tokenizer(
        examples['prompt'],
        max_length=CONFIG['max_input_length'],
        padding='max_length',
        truncation=True,
        return_tensors=None
    )
    
    labels = tokenizer(
        examples['flutter_code'],
        max_length=CONFIG['max_target_length'],
        padding='max_length',
        truncation=True,
        return_tensors=None
    )
    
    labels['input_ids'] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in labels_example]
        for labels_example in labels['input_ids']
    ]
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

print("\n⚙️ Preprocessing dataset...")
tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    batch_size=100,  # Process in smaller batches
    remove_columns=dataset['train'].column_names,
    desc="Tokenizing"
)
print("✅ Preprocessing complete!")

# Clear memory
gc.collect()

# -------------------------------
# STEP 7: Training Arguments (FIXED FOR CPU)
# -------------------------------
training_args = TrainingArguments(
    # Output and logging
    output_dir=CONFIG['output_dir'],
    logging_dir=f"{CONFIG['output_dir']}/logs",
    logging_steps=CONFIG['logging_steps'],
    
    # Checkpointing
    save_strategy="steps",
    save_steps=CONFIG['save_steps'],
    save_total_limit=2,  # Keep fewer checkpoints
    
    # No evaluation for faster training
    eval_strategy="no",
    
    # Training hyperparameters
    num_train_epochs=CONFIG['num_epochs'],
    per_device_train_batch_size=CONFIG['batch_size'],
    per_device_eval_batch_size=CONFIG['batch_size'],
    gradient_accumulation_steps=CONFIG['gradient_accumulation_steps'],
    learning_rate=CONFIG['learning_rate'],
    warmup_steps=CONFIG['warmup_steps'],
    weight_decay=0.01,
    
    # CPU-specific settings - CRITICAL FIXES
    no_cuda=True,
    fp16=False,
    dataloader_num_workers=CONFIG['dataloader_num_workers'],  # 0 to prevent freezing
    dataloader_pin_memory=False,
    
    # Memory optimization
    gradient_checkpointing=False,  # Uses more memory but faster on CPU
    max_grad_norm=1.0,
    
    # Other
    load_best_model_at_end=False,
    report_to="none",
    push_to_hub=False,
    optim="adamw_torch",
    
    # Additional memory settings
    logging_first_step=True,
    disable_tqdm=False,  # Show progress bar
)

effective_batch = CONFIG['batch_size'] * CONFIG['gradient_accumulation_steps']
total_steps = len(tokenized_dataset['train']) // effective_batch * CONFIG['num_epochs']

print("\n📋 Training Configuration (CPU - Anti-Freeze Settings):")
print(f"   Device: CPU only")
print(f"   CPU cores available: {os.cpu_count()}")
print(f"   PyTorch threads: {torch.get_num_threads()}")
print(f"   Dataloader workers: {CONFIG['dataloader_num_workers']} (0 = no separate processes)")
print(f"   Batch size: {CONFIG['batch_size']}")
print(f"   Gradient accumulation: {CONFIG['gradient_accumulation_steps']}")
print(f"   Effective batch size: {effective_batch}")
print(f"   Epochs: {CONFIG['num_epochs']}")
print(f"   Total steps: ~{total_steps}")
print(f"   Training samples: {len(tokenized_dataset['train'])}")

# -------------------------------
# STEP 8: Data Collator
# -------------------------------
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

# -------------------------------
# STEP 9: Initialize Trainer
# -------------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("✅ Trainer initialized for CPU training!")

# -------------------------------
# STEP 10: Find Last Checkpoint
# -------------------------------
def find_last_checkpoint(output_dir):
    if not os.path.isdir(output_dir):
        return None
    
    checkpoints = [
        os.path.join(output_dir, d)
        for d in os.listdir(output_dir)
        if d.startswith("checkpoint-")
    ]
    
    if not checkpoints:
        return None
    
    return sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]

last_checkpoint = find_last_checkpoint(CONFIG['output_dir'])

if last_checkpoint:
    print(f"\n🔁 Found checkpoint: {last_checkpoint}")
    print("   Training will resume from this checkpoint")
else:
    print("\n🆕 Starting fresh training")

# -------------------------------
# STEP 11: Start Training
# -------------------------------
print("\n" + "="*70)
print("🚀 STARTING CPU TRAINING (OPTIMIZED)")
print("="*70)
print("\n⚠️  WHAT TO EXPECT:")
print("   • Training will be SLOW (this is normal for CPU)")
print("   • Your laptop may run hot (this is normal)")
print("   • Each step takes 10-30 seconds (be patient!)")
print("   • Progress updates every 10 steps")
print("   • Checkpoints saved every 50 steps")
print("\n💡 IF IT STILL FREEZES:")
print("   • Close other applications")
print("   • Reduce batch_size to 1")
print("   • Reduce max_train_samples to 500")
print("   • Check Task Manager/Activity Monitor")
print("\n" + "="*70 + "\n")

try:
    trainer.train(resume_from_checkpoint=last_checkpoint)
    print("\n" + "="*70)
    print("✅ TRAINING COMPLETED SUCCESSFULLY!")
    print("="*70)
except KeyboardInterrupt:
    print("\n" + "="*70)
    print("⚠️ Training interrupted by user")
    print("   Progress saved. Re-run to resume from last checkpoint.")
    print("="*70)
except Exception as e:
    print("\n" + "="*70)
    print(f"❌ Training failed: {e}")
    print("="*70)
    raise

# -------------------------------
# STEP 12: Save Final Model
# -------------------------------
print("\n💾 Saving final model...")
trainer.save_model(CONFIG['output_dir'])
tokenizer.save_pretrained(CONFIG['output_dir'])
print(f"✅ Model saved to: {CONFIG['output_dir']}")

# Clear memory
del model, trainer
gc.collect()

# -------------------------------
# STEP 13: Test the Model
# -------------------------------
print("\n" + "="*70)
print("🧪 TESTING THE MODEL")
print("="*70)

print("\nLoading trained model for testing...")
trained_model = AutoModelForSeq2SeqLM.from_pretrained(CONFIG['output_dir'])
trained_tokenizer = AutoTokenizer.from_pretrained(CONFIG['output_dir'])

# Keep model on CPU
trained_model = trained_model.to(device)

def generate_flutter_code(prompt, max_length=512):
    """Generate Flutter code from a prompt"""
    inputs = trained_tokenizer(
        prompt,
        return_tensors="pt",
        max_length=CONFIG['max_input_length'],
        truncation=True
    )
    
    outputs = trained_model.generate(
        inputs['input_ids'],
        max_length=max_length,
        num_beams=3,  # Reduced from 5
        early_stopping=True,
        temperature=0.7,
    )
    
    code = trained_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return code

# Test prompts
test_prompts = [
    "Create a simple login screen with email and password fields",
    "Build a card widget with an image, title, and description",
]

print("\n📝 Generating Flutter code for test prompts:\n")
for i, prompt in enumerate(test_prompts, 1):
    print(f"\n{'='*70}")
    print(f"Test {i}: {prompt}")
    print('='*70)
    print(generate_flutter_code(prompt))
    print()

print("\n" + "="*70)
print("✅ ALL DONE!")
print("="*70)
print(f"\n📁 Model saved in: {CONFIG['output_dir']}")
print("\n💡 Next steps:")
print("   1. If training worked: Increase max_train_samples gradually")
print("   2. If still freezing: Reduce batch_size to 1")
print("   3. For full training: Consider using Google Colab with GPU")
print("\n" + "="*70)

✅ Libraries imported successfully!
🖥️  Training on: CPU
💾 Available CPU cores: 8
✅ Device set to: cpu
🔧 PyTorch threads limited to: 4

📋 Configuration (CPU Training - Optimized):
   data_dir: ./data
   output_dir: ./flutter_codegen_model
   model_name: Salesforce/codet5p-220m
   max_input_length: 512
   max_target_length: 2048
   batch_size: 2
   gradient_accumulation_steps: 4
   num_epochs: 1
   learning_rate: 5e-05
   save_steps: 50
   logging_steps: 10
   warmup_steps: 20
   dataloader_num_workers: 0
   max_train_samples: 1000

⚠️  IMPORTANT CHANGES TO PREVENT FREEZING:
   ✓ Batch size reduced to 2 (was 8)
   ✓ Dataloader workers set to 0 (was 4) - CRITICAL FIX
   ✓ Sequence lengths reduced
   ✓ Training on limited samples (1000)
   ✓ PyTorch threads limited

📂 Loading dataset from: ./data
   Found 0 JSON files
   ⚠️  Limiting to 1000 samples to prevent freezing
✅ Successfully loaded 0 examples

📊 Dataset split:
   Training: 0
   Validation: 0

🤖 Loading model: Salesforce/codet5p-22

  trainer = Trainer(


✅ Trainer initialized for CPU training!

🆕 Starting fresh training

🚀 STARTING CPU TRAINING (OPTIMIZED)

⚠️  WHAT TO EXPECT:
   • Training will be SLOW (this is normal for CPU)
   • Your laptop may run hot (this is normal)
   • Each step takes 10-30 seconds (be patient!)
   • Progress updates every 10 steps
   • Checkpoints saved every 50 steps

💡 IF IT STILL FREEZES:
   • Close other applications
   • Reduce batch_size to 1
   • Reduce max_train_samples to 500
   • Check Task Manager/Activity Monitor



❌ Training failed: No columns in the dataset match the model's forward method signature: (input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, labels, label, label_ids). The following columns have been ignored: []. Please check the dataset and model. You may n

ValueError: No columns in the dataset match the model's forward method signature: (input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, labels, label, label_ids). The following columns have been ignored: []. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`.

In [None]:
# ========================================================================
# FLUTTER CODE GENERATION - CPU TRAINING (FIXED PREPROCESSING ERROR)
# ========================================================================
# This version fixes the "No columns in dataset" error
# Key improvements:
# - Fixed preprocessing function to properly return columns
# - Smaller batch sizes
# - Limited data loading
# - Reduced memory usage
# ========================================================================

# -------------------------------
# STEP 1: Install Required Packages
# -------------------------------
!pip install transformers datasets sentencepiece accelerate -q

# -------------------------------
# STEP 2: Import Libraries
# -------------------------------
import json
import os
import glob
import gc
from datasets import Dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM, 
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq
)
import torch

print("✅ Libraries imported successfully!")
print(f"🖥️  Training on: CPU")
print(f"💾 Available CPU cores: {os.cpu_count()}")

# Force CPU usage
os.environ["CUDA_VISIBLE_DEVICES"] = ""
device = torch.device("cpu")
print(f"✅ Device set to: {device}")

# Set thread limits to prevent CPU overload
torch.set_num_threads(max(1, os.cpu_count() // 2))
print(f"🔧 PyTorch threads limited to: {torch.get_num_threads()}")

# -------------------------------
# STEP 3: Configuration (FIXED FOR CPU)
# -------------------------------
CONFIG = {
    # Paths
    "data_dir": "./flutter_dataset",
    "output_dir": "./flutter_codegen_model",
    
    # Model settings
    "model_name": "Salesforce/codet5p-220m",
    "max_input_length": 512,
    "max_target_length": 2048,
    
    # Training settings - OPTIMIZED TO PREVENT FREEZING
    "batch_size": 2,                # REDUCED from 8
    "gradient_accumulation_steps": 4,  # INCREASED to maintain effective batch size
    "num_epochs": 1,                # REDUCED for initial test
    "learning_rate": 5e-5,
    "save_steps": 50,               # Save more frequently
    "logging_steps": 10,            # Log more frequently to see progress
    "warmup_steps": 20,
    
    # CPU optimization - CRITICAL FIXES
    "dataloader_num_workers": 0,    # CHANGED from 4 to 0 (prevents freezing!)
}

print("\n📋 Configuration (CPU Training - Optimized):")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")

print("\n⚠️  IMPORTANT CHANGES TO PREVENT FREEZING:")
print("   ✓ Batch size reduced to 2 (was 8)")
print("   ✓ Dataloader workers set to 0 (was 4) - CRITICAL FIX")
print("   ✓ Training on limited samples (1000)")
print("   ✓ PyTorch threads limited")
print("   ✓ FIXED: Preprocessing to properly create columns")

# -------------------------------
# STEP 4: Load Dataset (WITH SIZE LIMIT)
# -------------------------------
def load_flutter_dataset(data_dir, max_samples=None):
    print(f"\n📂 Loading dataset from: {data_dir}")
    json_files = sorted(glob.glob(os.path.join(data_dir, "*.json")))
    
    print(f"   Found {len(json_files)} JSON files")
    
    if max_samples:
        print(f"   ⚠️  Limiting to {max_samples} samples to prevent freezing")
    
    data = []
    for i, file_path in enumerate(json_files):
        if max_samples and len(data) >= max_samples:
            break
            
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                item = json.load(f)
                data.append({
                    'prompt': item['prompt'],
                    'flutter_code': item['flutter_code']
                })
            
            if (i + 1) % 100 == 0:
                print(f"   Loaded {len(data)} examples...")
        except Exception as e:
            print(f"   ⚠️ Error loading {file_path}: {e}")
    
    print(f"✅ Successfully loaded {len(data)} examples")
    
    # Clear memory
    gc.collect()
    
    return Dataset.from_list(data)

dataset = load_flutter_dataset(CONFIG["data_dir"], max_samples=CONFIG.get("max_train_samples"))
dataset = dataset.train_test_split(test_size=0.1, seed=42)

print(f"\n📊 Dataset split:")
print(f"   Training: {len(dataset['train'])}")
print(f"   Validation: {len(dataset['test'])}")

# -------------------------------
# STEP 5: Load Model and Tokenizer
# -------------------------------
print(f"\n🤖 Loading model: {CONFIG['model_name']}")

tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'])
model = AutoModelForSeq2SeqLM.from_pretrained(CONFIG['model_name'])

# Explicitly move model to CPU
model = model.to(device)

# Set pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.pad_token_id

print(f"✅ Model loaded on CPU!")
print(f"   Model parameters: {model.num_parameters():,}")

# Clear memory
gc.collect()

# -------------------------------
# STEP 6: Preprocessing (FIXED VERSION)
# -------------------------------
def preprocess_function(examples):
    """Fixed preprocessing that properly returns all required columns"""
    # Tokenize inputs
    model_inputs = tokenizer(
        examples['prompt'],
        max_length=CONFIG['max_input_length'],
        padding='max_length',
        truncation=True,
        return_tensors=None
    )
    
    # Tokenize targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples['flutter_code'],
            max_length=CONFIG['max_target_length'],
            padding='max_length',
            truncation=True,
            return_tensors=None
        )
    
    # Replace padding token id with -100 so it's ignored by loss
    labels['input_ids'] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in labels_example]
        for labels_example in labels['input_ids']
    ]
    
    # CRITICAL FIX: Add labels to model_inputs
    model_inputs['labels'] = labels['input_ids']
    
    return model_inputs

print("\n⚙️ Preprocessing dataset...")
tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    batch_size=100,  # Process in smaller batches
    remove_columns=dataset['train'].column_names,
    desc="Tokenizing"
)

# VERIFICATION: Check that columns exist
print(f"\n✅ Preprocessing complete!")
print(f"   Train dataset columns: {tokenized_dataset['train'].column_names}")
print(f"   Sample shape - input_ids: {len(tokenized_dataset['train'][0]['input_ids'])}")
print(f"   Sample shape - labels: {len(tokenized_dataset['train'][0]['labels'])}")

# Clear memory
gc.collect()

# -------------------------------
# STEP 7: Training Arguments (FIXED FOR CPU)
# -------------------------------
training_args = TrainingArguments(
    # Output and logging
    output_dir=CONFIG['output_dir'],
    logging_dir=f"{CONFIG['output_dir']}/logs",
    logging_steps=CONFIG['logging_steps'],
    
    # Checkpointing
    save_strategy="steps",
    save_steps=CONFIG['save_steps'],
    save_total_limit=2,  # Keep fewer checkpoints
    
    # No evaluation for faster training
    eval_strategy="no",
    
    # Training hyperparameters
    num_train_epochs=CONFIG['num_epochs'],
    per_device_train_batch_size=CONFIG['batch_size'],
    per_device_eval_batch_size=CONFIG['batch_size'],
    gradient_accumulation_steps=CONFIG['gradient_accumulation_steps'],
    learning_rate=CONFIG['learning_rate'],
    warmup_steps=CONFIG['warmup_steps'],
    weight_decay=0.01,
    
    # CPU-specific settings - CRITICAL FIXES
    no_cuda=True,
    fp16=False,
    dataloader_num_workers=CONFIG['dataloader_num_workers'],  # 0 to prevent freezing
    dataloader_pin_memory=False,
    
    # IMPORTANT: Don't remove columns automatically
    remove_unused_columns=True,  # This is fine now since we have the right columns
    
    # Memory optimization
    gradient_checkpointing=False,  # Uses more memory but faster on CPU
    max_grad_norm=1.0,
    
    # Other
    load_best_model_at_end=False,
    report_to="none",
    push_to_hub=False,
    optim="adamw_torch",
    
    # Additional memory settings
    logging_first_step=True,
    disable_tqdm=False,  # Show progress bar
)

effective_batch = CONFIG['batch_size'] * CONFIG['gradient_accumulation_steps']
total_steps = len(tokenized_dataset['train']) // effective_batch * CONFIG['num_epochs']

print("\n📋 Training Configuration (CPU - Anti-Freeze Settings):")
print(f"   Device: CPU only")
print(f"   CPU cores available: {os.cpu_count()}")
print(f"   PyTorch threads: {torch.get_num_threads()}")
print(f"   Dataloader workers: {CONFIG['dataloader_num_workers']} (0 = no separate processes)")
print(f"   Batch size: {CONFIG['batch_size']}")
print(f"   Gradient accumulation: {CONFIG['gradient_accumulation_steps']}")
print(f"   Effective batch size: {effective_batch}")
print(f"   Epochs: {CONFIG['num_epochs']}")
print(f"   Total steps: ~{total_steps}")
print(f"   Training samples: {len(tokenized_dataset['train'])}")

# -------------------------------
# STEP 8: Data Collator
# -------------------------------
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

# -------------------------------
# STEP 9: Initialize Trainer
# -------------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("✅ Trainer initialized for CPU training!")

# -------------------------------
# STEP 10: Find Last Checkpoint
# -------------------------------
def find_last_checkpoint(output_dir):
    if not os.path.isdir(output_dir):
        return None
    
    checkpoints = [
        os.path.join(output_dir, d)
        for d in os.listdir(output_dir)
        if d.startswith("checkpoint-")
    ]
    
    if not checkpoints:
        return None
    
    return sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]

last_checkpoint = find_last_checkpoint(CONFIG['output_dir'])

if last_checkpoint:
    print(f"\n🔁 Found checkpoint: {last_checkpoint}")
    print("   Training will resume from this checkpoint")
else:
    print("\n🆕 Starting fresh training")

# -------------------------------
# STEP 11: Start Training
# -------------------------------
print("\n" + "="*70)
print("🚀 STARTING CPU TRAINING (OPTIMIZED)")
print("="*70)
print("\n⚠️  WHAT TO EXPECT:")
print("   • Training will be SLOW (this is normal for CPU)")
print("   • Your laptop may run hot (this is normal)")
print("   • Each step takes 10-30 seconds (be patient!)")
print("   • Progress updates every 10 steps")
print("   • Checkpoints saved every 50 steps")
print("\n💡 IF IT STILL FREEZES:")
print("   • Close other applications")
print("   • Reduce batch_size to 1")
print("   • Reduce max_train_samples to 500")
print("   • Check Task Manager/Activity Monitor")
print("\n" + "="*70 + "\n")

try:
    trainer.train(resume_from_checkpoint=last_checkpoint)
    print("\n" + "="*70)
    print("✅ TRAINING COMPLETED SUCCESSFULLY!")
    print("="*70)
except KeyboardInterrupt:
    print("\n" + "="*70)
    print("⚠️ Training interrupted by user")
    print("   Progress saved. Re-run to resume from last checkpoint.")
    print("="*70)
except Exception as e:
    print("\n" + "="*70)
    print(f"❌ Training failed: {e}")
    print("="*70)
    import traceback
    traceback.print_exc()
    raise

# -------------------------------
# STEP 12: Save Final Model
# -------------------------------
print("\n💾 Saving final model...")
trainer.save_model(CONFIG['output_dir'])
tokenizer.save_pretrained(CONFIG['output_dir'])
print(f"✅ Model saved to: {CONFIG['output_dir']}")

# Clear memory
del model, trainer
gc.collect()

# -------------------------------
# STEP 13: Test the Model
# -------------------------------
print("\n" + "="*70)
print("🧪 TESTING THE MODEL")
print("="*70)

print("\nLoading trained model for testing...")
trained_model = AutoModelForSeq2SeqLM.from_pretrained(CONFIG['output_dir'])
trained_tokenizer = AutoTokenizer.from_pretrained(CONFIG['output_dir'])

# Keep model on CPU
trained_model = trained_model.to(device)

def generate_flutter_code(prompt, max_length=512):
    """Generate Flutter code from a prompt"""
    inputs = trained_tokenizer(
        prompt,
        return_tensors="pt",
        max_length=CONFIG['max_input_length'],
        truncation=True
    )
    
    outputs = trained_model.generate(
        inputs['input_ids'],
        max_length=max_length,
        num_beams=3,  # Reduced from 5
        early_stopping=True,
        temperature=0.7,
    )
    
    code = trained_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return code

# Test prompts
test_prompts = [
    "Create a simple login screen with email and password fields",
    "Build a card widget with an image, title, and description",
]

print("\n📝 Generating Flutter code for test prompts:\n")
for i, prompt in enumerate(test_prompts, 1):
    print(f"\n{'='*70}")
    print(f"Test {i}: {prompt}")
    print('='*70)
    print(generate_flutter_code(prompt))
    print()

print("\n" + "="*70)
print("✅ ALL DONE!")
print("="*70)
print(f"\n📁 Model saved in: {CONFIG['output_dir']}")
print("\n💡 Next steps:")
print("   1. If training worked: Increase max_train_samples gradually")
print("   2. If still freezing: Reduce batch_size to 1")
print("   3. For full training: Consider using Google Colab with GPU")
print("\n" + "="*70)

✅ Libraries imported successfully!
🖥️  Training on: CPU
💾 Available CPU cores: 8
✅ Device set to: cpu
🔧 PyTorch threads limited to: 4

📋 Configuration (CPU Training - Optimized):
   data_dir: ./flutter_dataset
   output_dir: ./flutter_codegen_model
   model_name: Salesforce/codet5p-220m
   max_input_length: 512
   max_target_length: 2048
   batch_size: 2
   gradient_accumulation_steps: 4
   num_epochs: 1
   learning_rate: 5e-05
   save_steps: 50
   logging_steps: 10
   warmup_steps: 20
   dataloader_num_workers: 0

⚠️  IMPORTANT CHANGES TO PREVENT FREEZING:
   ✓ Batch size reduced to 2 (was 8)
   ✓ Dataloader workers set to 0 (was 4) - CRITICAL FIX
   ✓ Training on limited samples (1000)
   ✓ PyTorch threads limited
   ✓ FIXED: Preprocessing to properly create columns

📂 Loading dataset from: ./flutter_dataset
   Found 10000 JSON files
   Loaded 100 examples...
   Loaded 200 examples...
   Loaded 300 examples...
   Loaded 400 examples...
   Loaded 500 examples...
   Loaded 600 examples

Tokenizing:   0%|          | 0/9000 [00:00<?, ? examples/s]



Tokenizing:   0%|          | 0/1000 [00:00<?, ? examples/s]


✅ Preprocessing complete!
   Train dataset columns: ['input_ids', 'attention_mask', 'labels']
   Sample shape - input_ids: 512
   Sample shape - labels: 2048

📋 Training Configuration (CPU - Anti-Freeze Settings):
   Device: CPU only
   CPU cores available: 8
   PyTorch threads: 4
   Dataloader workers: 0 (0 = no separate processes)
   Batch size: 2
   Gradient accumulation: 4
   Effective batch size: 8
   Epochs: 1
   Total steps: ~1125
   Training samples: 9000
✅ Trainer initialized for CPU training!

🆕 Starting fresh training

🚀 STARTING CPU TRAINING (OPTIMIZED)

⚠️  WHAT TO EXPECT:
   • Training will be SLOW (this is normal for CPU)
   • Your laptop may run hot (this is normal)
   • Each step takes 10-30 seconds (be patient!)
   • Progress updates every 10 steps
   • Checkpoints saved every 50 steps

💡 IF IT STILL FREEZES:
   • Close other applications
   • Reduce batch_size to 1
   • Reduce max_train_samples to 500
   • Check Task Manager/Activity Monitor




  trainer = Trainer(


In [None]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# -------------------------------
# 3️⃣ Model & tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # T5-based CodeT5
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 4️⃣ Dataset loading and preprocessing
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=2048,
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace pad token IDs with -100 to ignore them in loss computation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

tokenized = dataset.map(preprocess, batched=True)

# -------------------------------
# 5️⃣ Training arguments (compatible version)
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=5,
    logging_steps=50,
    save_steps=50,          # ✅ Save checkpoint every 50 steps
    save_strategy="steps",  # ✅ Save based on steps
    fp16=True               # Mixed precision (if GPU supports)
)

# -------------------------------
# 6️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# -------------------------------
# 7️⃣ Resume from last checkpoint if available
# -------------------------------
last_checkpoint = None
if os.path.isdir("flutter_codegen_model"):
    checkpoints = [os.path.join("flutter_codegen_model", d)
                   for d in os.listdir("flutter_codegen_model")
                   if d.startswith("checkpoint-")]
    if checkpoints:
        last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]
        print(f"🔁 Resuming from checkpoint: {last_checkpoint}")

# -------------------------------
# 8️⃣ Start / resume training
# -------------------------------
trainer.train(resume_from_checkpoint=last_checkpoint)

# -------------------------------
# 9️⃣ Save final model
# -------------------------------
trainer.save_model("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training complete! Checkpoints saved every 50 steps.")


Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(


OutOfMemoryError: CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 2.00 GiB of which 0 bytes is free. Of the allocated memory 1.71 GiB is allocated by PyTorch, and 15.94 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# -------------------------------
# 1️⃣ Install packages if needed
# -------------------------------
# !pip install transformers datasets sentencepiece
# !pip install torch --index-url https://download.pytorch.org/whl/cu118  # For CUDA support

# -------------------------------
# 2️⃣ Imports
# -------------------------------
import json
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# ✅ FORCE CPU TRAINING (no GPU)
device = torch.device("cpu")
print("🖥️ Using CPU for training (no GPU)")

# Clear CUDA cache if GPU was used before
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# -------------------------------
# 3️⃣ Model & tokenizer setup
# -------------------------------
model_name = "Salesforce/codet5p-220m"  # T5-based CodeT5
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Add pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Move model to CPU
model = model.to(device)

# -------------------------------
# 4️⃣ Dataset loading and preprocessing
# -------------------------------
dataset = load_dataset(
    "json",
    data_files="flutter_code_dataset.jsonl",
    split="train"
)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1)

def preprocess(examples):
    inputs = examples["input_text"]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=512,  # ⬇️ REDUCED from 512
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=2048,  # ⬇️ REDUCED from 2048
        padding="max_length",
        truncation=True
    )["input_ids"]
    
    # Replace pad token IDs with -100 to ignore them in loss computation
    labels = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels]
    model_inputs["labels"] = labels
    
    return model_inputs

tokenized = dataset.map(preprocess, batched=True, remove_columns=dataset["train"].column_names)

# -------------------------------
# 5️⃣ Training arguments (CPU OPTIMIZED)
# -------------------------------
args = TrainingArguments(
    output_dir="flutter_codegen_model",
    per_device_train_batch_size=2,  # Keep small for CPU
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,  # ⬇️ Reduced for CPU (less memory intensive)
    num_train_epochs=1,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=2,
    logging_steps=10,
    save_steps=50,
    save_strategy="steps",
    eval_steps=50,
    eval_strategy="steps",  # ✅ Fixed: was 'evaluation_strategy' in older versions
    fp16=False,  # ⬇️ Disable fp16 on CPU (not supported efficiently)
    no_cuda=True,  # ✅ FORCE CPU TRAINING
    use_cpu=True,  # ✅ Additional CPU flag
    dataloader_pin_memory=False,
    remove_unused_columns=True,
    report_to="none",
    max_grad_norm=1.0,
)

# -------------------------------
# 6️⃣ Trainer setup
# -------------------------------
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer
)

# Clear cache before training
if torch.cuda.is_available():
    torch.cuda.empty_cache()

print("⏳ Starting CPU training... (This will be slower than GPU but use system RAM)")
print("⚠️  CPU training is significantly slower. Expect 5-10x longer training times.")

# -------------------------------
# 7️⃣ Resume from last checkpoint if available
# -------------------------------
last_checkpoint = None
if os.path.isdir("flutter_codegen_model"):
    checkpoints = [os.path.join("flutter_codegen_model", d)
                   for d in os.listdir("flutter_codegen_model")
                   if d.startswith("checkpoint-")]
    if checkpoints:
        last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[-1]
        print(f"🔁 Resuming from checkpoint: {last_checkpoint}")

# -------------------------------
# 8️⃣ Start / resume training
# -------------------------------
try:
    trainer.train(resume_from_checkpoint=last_checkpoint)
except RuntimeError as e:
    if "out of memory" in str(e).lower():
        print("❌ Still running out of memory. Try one of these:")
        print("   1. Reduce gradient_accumulation_steps to 1")
        print("   2. Further reduce max_length values")
        print("   3. Switch to a smaller model (e.g., 'Salesforce/codet5-base')")
        print("   4. Use CPU training (slower but uses system RAM)")
    raise

# -------------------------------
# 9️⃣ Save final model
# -------------------------------
trainer.save_model("flutter_codegen_model")
tokenizer.save_pretrained("flutter_codegen_model")

print("✅ Training complete! Checkpoints saved every 50 steps.")
if torch.cuda.is_available():
    torch.cuda.empty_cache()

🖥️ Using CPU for training (no GPU)


Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  trainer = Trainer(


⏳ Starting CPU training... (This will be slower than GPU but use system RAM)
⚠️  CPU training is significantly slower. Expect 5-10x longer training times.


Step,Training Loss,Validation Loss
