In [1]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should return your GPU model


True
NVIDIA GeForce RTX 3050


In [2]:
#download the model and tokenizer

from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto")

print("Model and tokenizer downloaded successfully!")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Model and tokenizer downloaded successfully!


In [4]:
#donwload the data

import pandas as pd
import glob

# Define the path to all CSV files in the "labelled data" folder
csv_files = glob.glob("data/reddit-mental-health-dataset/Original Reddit Data/Labelled Data/*.csv")

# Read all CSVs and combine them
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Show basic dataset info
print(df.info())
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   score      800 non-null    float64
 1   selftext   800 non-null    object 
 2   subreddit  800 non-null    object 
 3   title      800 non-null    object 
 4   Label      800 non-null    object 
 5   CAT 1      200 non-null    object 
dtypes: float64(1), object(5)
memory usage: 38.7+ KB
None
   score                                           selftext subreddit  \
0    1.0  Tried to watch this documentary “anxious Ameri...   Anxiety   
1    1.0  i’m currently laying in bed wide awake, feelin...   Anxiety   
2    2.0  Second time trying weed. First time felt close...   Anxiety   
3    1.0  I am not posting this for me, but rather for m...   Anxiety   
4    1.0  21 year old male been dealing with anxiety eve...   Anxiety   

                                               title             Label CAT 1  


After downloading the data move the data to "data" folder and run the below code to get the data in the required format.

In [5]:
import pandas as pd
import glob

# Load all CSV files from the correct path
csv_files = glob.glob("data/reddit-mental-health-dataset/Original Reddit Data/Labelled Data/*.csv")

# Read and merge all CSVs
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Keep only relevant columns
df = df[['title', 'selftext', 'Label']]

# Drop missing values in 'selftext'
df = df.dropna(subset=['selftext'])

# Combine title and selftext into one column
df['text'] = df['title'].fillna('') + " " + df['selftext']

# Keep only the final processed text and label
df = df[['text', 'Label']]

# Show updated dataset info
print(df.info())
print(df.head())

# Save the cleaned dataset as a CSV file
df.to_csv("data/reddit-mental-health-dataset/cleaned_mental_health_data.csv", index=False)

print("✅ Cleaned dataset saved successfully!")


<class 'pandas.core.frame.DataFrame'>
Index: 800 entries, 0 to 822
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    800 non-null    object
 1   Label   800 non-null    object
dtypes: object(2)
memory usage: 18.8+ KB
None
                                                text             Label
0  Do people get over anxiety? Tried to watch thi...  Drug and Alcohol
1  does anyone else have this big fear of suddenl...  Drug and Alcohol
2  3 hour long panic attack after trying weed Sec...  Drug and Alcohol
3  Please leave in the comments ANYTHING that has...  Drug and Alcohol
4  Alcohol induced 21 year old male been dealing ...  Drug and Alcohol
✅ Cleaned dataset saved successfully!


In [6]:
from transformers import AutoTokenizer
import pandas as pd

# Load cleaned dataset
df = pd.read_csv("data/reddit-mental-health-dataset/cleaned_mental_health_data.csv")

# Load tokenizer for TinyLlama
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Tokenization function
def tokenize_text(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=512)

# Apply tokenization
df["input_ids"] = df["text"].apply(lambda x: tokenize_text(str(x))["input_ids"])

# Keep only tokenized inputs and labels
df = df[["input_ids", "Label"]]

# Save tokenized data as Parquet for efficient processing
df.to_parquet("data/reddit-mental-health-dataset/tokenized_mental_health_data.parquet", engine="pyarrow")

print("✅ Tokenization complete! Data saved as Parquet.")


✅ Tokenization complete! Data saved as Parquet.


In [7]:
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM
from datasets import load_dataset
import torch
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig

# Enable 8-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,  # Use 8-bit quantization
    llm_int8_enable_fp32_cpu_offload=True  # Offload CPU computations to save VRAM
)

# Load model and force it onto the current CUDA device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map={'': torch.cuda.current_device()},  # Assign model to GPU
    quantization_config=quantization_config  # Apply quantization
)
  # Move model to GPU




# Apply LoRA for efficient fine-tuning
lora_config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.05, 
    bias="none"
)
model = get_peft_model(model, lora_config)

# Load tokenized dataset
from datasets import DatasetDict

# Load tokenized dataset
dataset = load_dataset("parquet", data_files={"data": "data/reddit-mental-health-dataset/tokenized_mental_health_data.parquet"})

# Convert dataset to dictionary format
dataset = dataset["data"].train_test_split(test_size=0.1)

# Ensure `labels` are the same as `input_ids`
dataset = DatasetDict({
    "train": dataset["train"].map(lambda x: {"labels": x["input_ids"]}),
    "eval": dataset["test"].map(lambda x: {"labels": x["input_ids"]})
})


# Define training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_results",
    per_device_train_batch_size=2,  # Lower batch size for 4GB GPU
    per_device_eval_batch_size=2,
    num_train_epochs=3,  # Train for 3 epochs
    save_total_limit=2,
    eval_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=100,
    save_strategy="epoch"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["eval"]  # Add evaluation dataset
)


# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
print("✅ Fine-tuning complete! Model saved.")


Generating data split: 800 examples [00:00, 63402.36 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 1888.71 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 1386.03 examples/s]


Epoch,Training Loss,Validation Loss
1,1.4604,No log
2,1.4047,No log
3,1.3815,No log


✅ Fine-tuning complete! Model saved.


In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from peft import PeftModel
import torch

# Define paths
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Base model
FINETUNED_PATH = "./fine_tuned_model"  # Where fine-tuned model will be saved

# Load base model
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="cpu")

# Load LoRA adapters and merge them
model = PeftModel.from_pretrained(model, FINETUNED_PATH)
model = model.merge_and_unload()  # Merge LoRA weights

# Save the full fine-tuned model
model.save_pretrained(FINETUNED_PATH)

# Save tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(FINETUNED_PATH)

# Save configuration
config = AutoConfig.from_pretrained(BASE_MODEL)
config.save_pretrained(FINETUNED_PATH)

print("✅ Full fine-tuned model saved successfully!")


✅ Full fine-tuned model saved successfully!


In [11]:
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# Define model path
MODEL_PATH = "./fine_tuned_model"

# Ensure model is properly loaded
quantization_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    quantization_config=quantization_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Function to generate responses
def chat_with_model(prompt, max_length=150):
    formatted_prompt = f"User: {prompt}\nAI:"
    
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(formatted_prompt, "").strip()
    return response

# Test the model with sample inputs
sample_inputs = [
    "I have been feeling very anxious lately. What should I do?",
    "How can I manage my stress effectively?",
    "I feel lonely and isolated. Can you help?",
]

# Generate responses for each input
for query in sample_inputs:
    response = chat_with_model(query)
    print(f"🗣️ **User:** {query}\n🤖 **AI:** {response}\n")


  from .autonotebook import tqdm as notebook_tqdm
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🗣️ **User:** I have been feeling very anxious lately. What should I do?
🤖 **AI:** Please try to avoid any negative stimuli and try to stay in a positive environment as much as possible. It's important that you are present in the moment and focus on your breathing or some other activity that helps reduce anxiety symptoms. Avoid overthinking things, it just adds more pressure for yourself to feel anxious. Practice mindfulness exercises like meditation, yoga, or deep breathing techniques when feeling anxious. And finally, seek help from someone who can provide guidance or support if needed. Remember that everyone has their own unique struggles and feelings, so don’t be afraid of seeking professional advice

🗣️ **User:** How can I manage my stress effectively?
🤖 **AI:** It's common to feel stressed at times, but it is essential to understand that there are ways to manage your stress better. Here are some tips:

1. Practice mindfulness - Start by taking a few minutes each day to focus on yo

In [2]:
from huggingface_hub import HfApi

# Replace with your Hugging Face username and model name
USERNAME = "tezodipta"
MODEL_NAME = "MindEase-Assistant-v0.1"

api = HfApi()

# Create a repository on Hugging Face (skip if already created)
api.create_repo(repo_id=f"{USERNAME}/{MODEL_NAME}", private=False, exist_ok=True)

# Upload the entire **fine-tuned model folder** instead of a sharded model
api.upload_folder(
    folder_path="./fine_tuned_model",  # Make sure this folder contains model files
    repo_id=f"{USERNAME}/{MODEL_NAME}",
)

print(f"✅ Fine-Tuned Model uploaded successfully: https://huggingface.co/{USERNAME}/{MODEL_NAME}")


adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

[A[A
adapter_model.safetensors:   2%|▏         | 98.3k/4.52M [00:00<00:04, 935kB/s]

tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 527kB/s] 0, 22.9MB/s]
adapter_model.safetensors: 100%|██████████| 4.52M/4.52M [00:01<00:00, 2.45MB/s]

model.safetensors: 100%|██████████| 4.40G/4.40G [03:26<00:00, 21.3MB/s]

Upload 3 LFS files: 100%|██████████| 3/3 [03:27<00:00, 69.13s/it] 


✅ Fine-Tuned Model uploaded successfully: https://huggingface.co/tezodipta/MindEase-Assistant-v0.1


In [None]:
#accessing a model from huggingface hub using api call
from huggingface_hub import InferenceClient

client = InferenceClient(
	provider="together",
	api_key="Api key"
)

messages = [
	{
		"role": "user",
		"content": "I am getting anxity attach , what should i do ?"
	}
]

completion = client.chat.completions.create(
    model="mistralai/Mistral-7B-Instruct-v0.3", 
	messages=messages, 
	max_tokens=500,
)

print(completion.choices[0].message)

ChatCompletionOutputMessage(role='assistant', content=" I'm really sorry that you're feeling this way, but I'm unable to provide the help that you need. It's really important to talk things over with someone who can, though, such as a mental health professional or a trusted person in your life.\n\nAnxiety attacks can be overwhelming, but there are techniques that might help you manage them. Here are a few suggestions:\n\n1. Deep breathing: Breathe in for a count of four, hold for a count of seven, and exhale for a count of eight. This can help slow your heart rate and calm your mind.\n\n2. Grounding techniques: Focus on something in your immediate environment, such as the feeling of the ground beneath your feet, the texture of a nearby object, or the sound of traffic outside.\n\n3. Progressive muscle relaxation: Tense and then release each muscle group in your body, starting from your toes and working your way up to your head.\n\n4. Mindfulness: Try to stay in the present moment and av