# Telemetry Pipeline - Full Workflow
This notebook generates synthetic switch telemetry data, prepares it for GPT-2 fine-tuning using Hugging Face, performs model compression using  quantization, pruning, and knowledge distillation.

                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

Copyright (C) 2025 Shaji R. Nathan  
IP Infusion Inc.  
Email: shaji.nathan@ipinfusion.com  

This program is free software: you can redistribute it and/or modify  
it under the terms of the GNU Affero General Public License as  
published by the Free Software Foundation, either version 3 of the  
License, or (at your option) any later version.  

This program is distributed in the hope that it will be useful,  
but WITHOUT ANY WARRANTY; without even the implied warranty of  
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
GNU Affero General Public License for more details.  

You should have received a copy of the GNU Affero General Public License  
along with this program. If not, see <https://www.gnu.org/licenses/>.  

As per AGPLv3, if you modify this software and make it available over a  
network, you must provide the source code of your modifications under the  
same license.  

For inquiries, please contact:  
Shaji R. Nathan  
IP Infusion Inc.  
Email: shaji.nathan@ipinfusion.com  


# Safe Fine-Tuning and Model Saving 

In [None]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from datasets import load_dataset

# --- Debug GPU Information ---
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"PyTorch Version: {torch.__version__}")
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Force clearer error reporting from CUDA

# --- Load Dataset ---
dataset = load_dataset('json', data_files={'train': 'train.jsonl'})
train_test_split = dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

# --- Load Tokenizer ---
model_name = 'gpt2'
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure tokenizer has padding token (GPT-2 does not have one by default)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# --- Load Model Safely ---
# Set dtype explicitly to match intended precision (can be float16 if using fp16 training)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)

# Move to GPU after verifying load works
model = model.to("cuda")

# --- Tokenization Helper ---
def concatenate_prompt_response(examples):
    combined = [
        f"prompt: {p}\nresponse: {r}" for p, r in zip(examples['prompt'], examples['response'])
    ]
    return tokenizer(combined, truncation=True, max_length=512)

# Tokenize datasets
tokenized_train = train_dataset.map(concatenate_prompt_response, batched=True)
tokenized_eval = eval_dataset.map(concatenate_prompt_response, batched=True)

# --- Data Collator (dynamic padding) ---
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# --- Training Arguments ---
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    fp16=True,  # Use mixed precision
    logging_dir="./logs",
    logging_steps=10,
    report_to="tensorboard"
)

# --- Trainer Setup ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
    tokenizer=tokenizer
)

# --- Train Model ---
trainer.train()

# --- Safe Save (CPU-based) ---
print("✅ Training complete. Saving model to CPU...")

model = model.to("cpu")
model.save_pretrained("fine_tuned_gpt2_telemetry")
tokenizer.save_pretrained("fine_tuned_gpt2_telemetry")

print("✅ Model and tokenizer saved safely to 'fine_tuned_gpt2_telemetry'.")

# --- Post-save Reload Test ---
print("✅ Reloading saved model for sanity check...")

reloaded_model = AutoModelForCausalLM.from_pretrained("fine_tuned_gpt2_telemetry", torch_dtype=torch.float32)
reloaded_model = reloaded_model.to("cuda")  # Move back to GPU

reloaded_tokenizer = AutoTokenizer.from_pretrained("fine_tuned_gpt2_telemetry")

# Quick inference test to confirm save/load worked
test_input = "prompt: What is knowledge distillation?\nresponse:"
inputs = reloaded_tokenizer(test_input, return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = reloaded_model(**inputs)

print(f"✅ Reloaded model test passed. Output shape: {outputs.logits.shape}")


# Testing for Model Corruption etc.

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"PyTorch Version: {torch.__version__}")

try:
    teacher = AutoModelForCausalLM.from_pretrained("./fine_tuned_gpt2_telemetry", device_map=None)
    print("✅ Model loaded successfully to CPU.")

    teacher = teacher.to("cuda")
    print("✅ Model moved to GPU successfully.")
except Exception as e:
    print(f"❌ Error during model load/move: {e}")


CUDA Available: True
CUDA Device: Quadro M1000M
CUDA Version: 11.7
PyTorch Version: 2.0.0+cu117




✅ Model loaded successfully to CPU.
✅ Model moved to GPU successfully.
