In [2]:
# 1. Force install the specific PyTorch version that matches Colab's CUDA
# This prevents the "Wheel Build" crash
!pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes

# 2. Install Unsloth cleanly
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# 3. Fix the specific PyArrow crash you saw earlier
!pip uninstall -y pyarrow
!pip install pyarrow==17.0.0

import os
# 4. Check if GPU is actually attached
import torch
if torch.cuda.is_available():
    print(f"✅ GPU Detected: {torch.cuda.get_device_name(0)}")
    print(f"✅ VRAM Available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("❌ NO GPU DETECTED. Go to Runtime -> Change Runtime Type -> T4 GPU")

Collecting ninja
  Downloading ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (5.1 kB)
Collecting flash-attn
  Downloading flash_attn-2.8.3.tar.gz (8.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xformers
  Downloading xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.2 kB)
Collecting trl
  Downloading trl-0.25.1-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Using cached ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (180 kB)
Downloading xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl (122.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/122.9 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.25.1-py3-none-any.whl (46

In [3]:
import kagglehub
import pandas as pd
import json
import os
import glob
from datasets import load_dataset

# 1. Download Dataset
print("⬇️ Downloading dataset...")
path = kagglehub.dataset_download("gargmanas/sentimental-analysis-for-tweets")
csv_files = glob.glob(os.path.join(path, "*.csv"))
csv_filename = csv_files[0]

# 2. Process Data
df = pd.read_csv(csv_filename)
df.columns = df.columns.str.strip()

# Downsample for speed
if len(df) > 2000:
    df = df.sample(2000, random_state=42)

# Convert to Alpaca format
training_data = []
for index, row in df.iterrows():
    # Detect column names dynamically
    txt_col = "message" if "message" in df.columns else df.columns[1]
    lbl_col = "label" if "label" in df.columns else df.columns[2]

    label_raw = row[lbl_col]

    if label_raw == 1:
        risk_label = "High"
        reasoning = "User expresses negative affect or depressive symptoms."
    else:
        risk_label = "Low"
        reasoning = "User expresses neutral or positive sentiments."

    entry = {
        "instruction": "Analyze this tweet for signs of depression. Provide a risk label and reasoning.",
        "input": str(row[txt_col]),
        "output": f"Risk: {risk_label}\nReasoning: {reasoning}"
    }
    training_data.append(entry)

# Save
with open("depression_train_KAGGLE.json", "w") as f:
    json.dump(training_data, f, indent=4)

print("✅ Data ready.")

⬇️ Downloading dataset...
Downloading from https://www.kaggle.com/api/v1/datasets/download/gargmanas/sentimental-analysis-for-tweets?dataset_version_number=1...


100%|██████████| 476k/476k [00:00<00:00, 106MB/s]

Extracting files...
✅ Data ready.





In [4]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

# 1. Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

# 2. Load the Model
print("⏳ Loading Llama-3 Model... (This takes ~2 mins)")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 3. Add Adapters (LoRA)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# 4. Load Data & Format Prompts
dataset = load_dataset("json", data_files="depression_train_KAGGLE.json", split="train")

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + tokenizer.eos_token
        texts.append(text)
    return { "text" : texts }

dataset = dataset.map(formatting_prompts_func, batched = True)

# 5. Start Training
print("🚀 Starting Training... (This will take 10-15 minutes)")
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60, # Keep this small (60) for a quick student demo. Increase to 300 for real results.
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # <--- PREVENTS THE WANDB LOGIN BLOCK
    ),
)

trainer.train()

# 6. Test It Immediately
print("\n" + "="*30)
print("✅ TRAINING COMPLETE! Testing Model...")
print("="*30)

FastLanguageModel.for_inference(model)
test_tweet = "I feel like I'm a burden to everyone around me."

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Analyze this tweet for signs of depression. Provide a risk label and reasoning.",
        test_tweet,
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
result = tokenizer.batch_decode(outputs)[0].split("### Response:")[-1].strip()
print(f"Input: {test_tweet}\n\nAI Analysis:\n{result}")
# Simple fix for the text output
final_output = result.replace("<|eot_id|>", "")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
⏳ Loading Llama-3 Model... (This takes ~2 mins)
==((====))==  Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth 2025.11.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

🚀 Starting Training... (This will take 10-15 minutes)


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/2000 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,4.074
2,3.8849
3,4.1449
4,3.7115
5,3.4034
6,2.7591
7,2.5828
8,1.902
9,1.7754
10,1.4724



✅ TRAINING COMPLETE! Testing Model...
Input: I feel like I'm a burden to everyone around me.

AI Analysis:
Risk: High
Reasoning: User expresses negative affect or depressive symptoms.<|eot_id|>


In [5]:
from google.colab import drive
import shutil

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Save the model locally in Colab first
model.save_pretrained("depression_lora_model")
tokenizer.save_pretrained("depression_lora_model")

# 3. Copy it to your Google Drive
# This creates a folder "Depression_Project_Model" in your Drive
destination_path = "/content/drive/MyDrive/Depression_Project_Model"
shutil.copytree("depression_lora_model", destination_path, dirs_exist_ok=True)

print(f"✅ Model saved successfully to: {destination_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Model saved successfully to: /content/drive/MyDrive/Depression_Project_Model


In [6]:
import gradio as gr

# 1. Define the Prediction Function
def predict_depression(text):
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                "Analyze this tweet for signs of depression. Provide a risk label and reasoning.",
                text,
                "",
            )
        ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
    result = tokenizer.batch_decode(outputs)[0]

    # Clean up the raw text to just get the AI response
    clean_result = result.split("### Response:")[-1].strip()
    clean_result = clean_result.replace("<|eot_id|>", "") # Remove the artifact
    return clean_result

# 2. Build the Web App
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🧠 Mental Health Risk Detector (Llama 3 Fine-Tune)")
    gr.Markdown("This AI model detects linguistic markers of depression in social media text.")

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Enter User Tweet",
                placeholder="Type something here...",
                lines=3
            )
            analyze_btn = gr.Button("Analyze Risk", variant="primary")

        with gr.Column():
            output_text = gr.Textbox(
                label="AI Assessment",
                lines=4,
                interactive=False
            )

    # Click event
    analyze_btn.click(fn=predict_depression, inputs=input_text, outputs=output_text)

    # Examples for your Professor to click instantly
    gr.Examples(
        examples=[
            ["I feel like a burden to everyone around me."],
            ["Had a great time at the coffee shop today! #blessed"],
            ["I'm so tired of trying. It never gets better."],
        ],
        inputs=input_text
    )

# 3. Launch with Public Link
demo.launch(share=True, debug=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://1ffd2e0388d38f5cf6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://1ffd2e0388d38f5cf6.gradio.live


