In [2]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should return your GPU model


True
NVIDIA GeForce GTX 1650 with Max-Q Design


In [8]:
#download the model and tokenizer

from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto",torch_dtype=torch.float16,quantization_config=quant_config)


print("Model and tokenizer downloaded successfully!")


Model and tokenizer downloaded successfully!


In [6]:
#this block is for clearing cuda memory and clearing Ram
import torch
import gc

# Delete model & tokenizer to free RAM
# del model
# del tokenizer

# Force garbage collection to free memory
gc.collect()

# Clear CUDA cache (GPU memory)
torch.cuda.empty_cache()

# Restart Python process to fully clear RAM (Optional, but effective)
import os
os._exit(0)


: 

In [9]:
#testing the model witout fine tuning
def chat_with_model(prompt, max_length=150):
    # Format input prompt
    formatted_prompt = f"User: {prompt}\nAI:"  
    
    # Tokenize input
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)

    # Generate output
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=100,
            temperature=0.7,  # Adjust randomness
            top_p=0.9,  # Nucleus sampling
            do_sample=True,  # Enable sampling for diverse responses
            repetition_penalty=1.2,  # Reduce repetition
            use_cache=True,  # Enable KV cache
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id # Avoid padding errors
        )
    
    # Decode and clean response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(formatted_prompt, "").strip()
    return response


In [10]:
query = "Hey, I'm feeling stressed today. Any advice?"
response = chat_with_model(query)
print(f"🗣️ **User:** {query}\n🤖 **AI:** {response}")


🗣️ **User:** Hey, I'm feeling stressed today. Any advice?
🤖 **AI:** Sure thing! Here are a few things that can help you feel more relaxed and centered:
1) Practice mindfulness meditation - this involves focusing your attention on the present moment without judgment, allowing you to become more aware of your thoughts and emotions. 2) Take deep breaths or practice yoga inhalations and exhales - these exercises can calm


In [None]:
#donwload the data
#and making a dataframe of the data

import pandas as pd
import glob

# Define the path to all CSV files in the "labelled data" folder
csv_files = glob.glob("data/mental_health_counseling_conversations/Original Reddit Data/Labelled Data/*.csv")

# Read all CSVs and combine them
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Show basic dataset info
print(df.info())
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   score      800 non-null    float64
 1   selftext   800 non-null    object 
 2   subreddit  800 non-null    object 
 3   title      800 non-null    object 
 4   Label      800 non-null    object 
 5   CAT 1      200 non-null    object 
dtypes: float64(1), object(5)
memory usage: 38.7+ KB
None
   score                                           selftext subreddit  \
0    1.0  Tried to watch this documentary “anxious Ameri...   Anxiety   
1    1.0  i’m currently laying in bed wide awake, feelin...   Anxiety   
2    2.0  Second time trying weed. First time felt close...   Anxiety   
3    1.0  I am not posting this for me, but rather for m...   Anxiety   
4    1.0  21 year old male been dealing with anxiety eve...   Anxiety   

                                               title             Label CAT 1  


After downloading the data move the data to "data" folder and run the below code to get the data in the required format.

In [None]:
import pandas as pd
import glob

# Load all CSV files from the correct path
csv_files = glob.glob("data/mental_health_counseling_conversations/Original Reddit Data/Labelled Data/*.csv")

# Read and merge all CSVs
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Keep only relevant columns
df = df[['title', 'selftext', 'Label']]

# Drop missing values in 'selftext'
df = df.dropna(subset=['selftext'])

# Combine title and selftext into one column
df['text'] = df['title'].fillna('') + " " + df['selftext']

# Keep only the final processed text and label
df = df[['text', 'Label']]

# Show updated dataset info
print(df.info())
print(df.head())

# Save the cleaned dataset as a CSV file
df.to_csv("data/mental_health_counseling_conversations/cleaned_mental_health_data.csv", index=False)

print("✅ Cleaned dataset saved successfully!")


<class 'pandas.core.frame.DataFrame'>
Index: 800 entries, 0 to 822
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    800 non-null    object
 1   Label   800 non-null    object
dtypes: object(2)
memory usage: 18.8+ KB
None
                                                text             Label
0  Do people get over anxiety? Tried to watch thi...  Drug and Alcohol
1  does anyone else have this big fear of suddenl...  Drug and Alcohol
2  3 hour long panic attack after trying weed Sec...  Drug and Alcohol
3  Please leave in the comments ANYTHING that has...  Drug and Alcohol
4  Alcohol induced 21 year old male been dealing ...  Drug and Alcohol
✅ Cleaned dataset saved successfully!


In [None]:
from transformers import AutoTokenizer
import pandas as pd

# Load cleaned dataset
df = pd.read_csv("data/mental_health_counseling_conversations/cleaned_mental_health_data.csv")

# Load tokenizer for TinyLlama
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Tokenization function
def tokenize_text(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=512)

# Apply tokenization
df["input_ids"] = df["text"].apply(lambda x: tokenize_text(str(x))["input_ids"])

# Keep only tokenized inputs and labels
df = df[["input_ids", "Label"]]

# Save tokenized data as Parquet for efficient processing
df.to_parquet("data/mental_health_counseling_conversations/tokenized_mental_health_data.parquet", engine="pyarrow")

print("✅ Tokenization complete! Data saved as Parquet.")


✅ Tokenization complete! Data saved as Parquet.


In [None]:
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM
from datasets import load_dataset
import torch
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig

# Enable 8-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,  # Use 8-bit quantization
    llm_int8_enable_fp32_cpu_offload=True  # Offload CPU computations to save VRAM
)

# Load model and force it onto the current CUDA device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map={'': torch.cuda.current_device()},  # Assign model to GPU
    quantization_config=quantization_config  # Apply quantization
)
  # Move model to GPU




# Apply LoRA for efficient fine-tuning
lora_config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.05, 
    bias="none"
)
model = get_peft_model(model, lora_config)

# Load tokenized dataset
from datasets import DatasetDict

# Load tokenized dataset
dataset = load_dataset("parquet", data_files={"data": "data/mental_health_counseling_conversations/tokenized_mental_health_data.parquet"})

# Convert dataset to dictionary format
dataset = dataset["data"].train_test_split(test_size=0.1)

# Ensure `labels` are the same as `input_ids`
dataset = DatasetDict({
    "train": dataset["train"].map(lambda x: {"labels": x["input_ids"]}),
    "eval": dataset["test"].map(lambda x: {"labels": x["input_ids"]})
})


# Define training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_results",
    per_device_train_batch_size=2,  # Lower batch size for 4GB GPU
    per_device_eval_batch_size=2,
    num_train_epochs=3,  # Train for 3 epochs
    save_total_limit=2,
    eval_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=100,
    save_strategy="epoch"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["eval"]  # Add evaluation dataset
)


# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
print("✅ Fine-tuning complete! Model saved.")


Generating data split: 800 examples [00:00, 63402.36 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 1888.71 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 1386.03 examples/s]


Epoch,Training Loss,Validation Loss
1,1.4604,No log
2,1.4047,No log
3,1.3815,No log


✅ Fine-tuning complete! Model saved.


In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from peft import PeftModel
import torch

# Define paths
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Base model
FINETUNED_PATH = "./fine_tuned_model"  # Where fine-tuned model will be saved

# Load base model
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="cpu")

# Load LoRA adapters and merge them
model = PeftModel.from_pretrained(model, FINETUNED_PATH)
model = model.merge_and_unload()  # Merge LoRA weights

# Save the full fine-tuned model
model.save_pretrained(FINETUNED_PATH)

# Save tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(FINETUNED_PATH)

# Save configuration
config = AutoConfig.from_pretrained(BASE_MODEL)
config.save_pretrained(FINETUNED_PATH)

print("✅ Full fine-tuned model saved successfully!")


✅ Full fine-tuned model saved successfully!


In [26]:
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import os
import gc
# Define model path
MODEL_PATH = "./fine_tuned_model"

# Ensure model is properly loaded
# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit instead of 8-bit
    bnb_4bit_compute_dtype="float16"  # Keep precision
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    max_memory={0: "3GB", "cpu": "8GB"},  # Assign max GPU and CPU RAM
    quantization_config=quantization_config
)



tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Function to generate responses
def chat_with_model(prompt, max_length=150):
    formatted_prompt = f"User: {prompt}\nAI:"
    
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=100,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            use_cache=True
        )
    
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(formatted_prompt, "").strip()
    torch.cuda.empty_cache()
    gc.collect()
    return response

# Test the model with sample inputs
sample_inputs = [
    "hey feeling lonely today,what to do?",
]

# Generate responses for each input
for query in sample_inputs:
    response = chat_with_model(query)
    print(f"🗣️ **User:** {query}\n🤖 **AI:** {response}\n")


🗣️ **User:** hey feeling lonely today,what to do?
🤖 **AI:** There is no need for a special day to feel lonely. You can always talk to friends or family members in person and online. Or if you are alone and don't have any close people around, then go outside and enjoy the nature! Nature is often the best friend we have here on Earth.



In [7]:

#clearing all the memory of the gpu including the cache
import gc
torch.cuda.memory_summary(device=None, abbreviated=False)
torch.cuda.empty_cache()
gc.collect()




147

In [4]:
#accessing a model from huggingface hub using api call
from huggingface_hub import InferenceClient

# Fetch the API key from a file named api_key.txt
with open("api.txt", "r") as file:
	api_key = file.read().strip()

client = InferenceClient(
	provider="together",
	api_key=api_key
)

messages = [
	{
		"role": "user",
		"content": "What is the capital of France?"
	}
]

completion = client.chat.completions.create(
    model="mistralai/Mistral-7B-Instruct-v0.3", 
	messages=messages, 
	max_tokens=500,
)

print(completion.choices[0].message)

HfHubHTTPError: 402 Client Error: Payment Required for url: https://huggingface.co/api/inference-proxy/together/v1/chat/completions (Request ID: Root=1-67b42900-6d2a06a35b2b42a70a92e0fd;9ee54482-c3b7-40f2-8a7c-c308fa50d0c1)

You have exceeded your monthly included credits for Inference Endpoints. Subscribe to PRO to get 20x more monthly allowance.

In [18]:
!pip install groq

Collecting groq
  Downloading groq-0.18.0-py3-none-any.whl.metadata (14 kB)
Downloading groq-0.18.0-py3-none-any.whl (121 kB)
Installing collected packages: groq
Successfully installed groq-0.18.0


In [2]:
#gorq api

from groq import Groq

with open("gorq_key.txt", "r") as file:
	api_key = file.read().strip()

client = Groq(api_key=api_key)
completion = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "user",
            "content": "who is dhoni?"
        }
    ],
    temperature=1,
    max_completion_tokens=100,
    top_p=1,
    stream=True,
    stop=None,
)

for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")


MS Dhoni, also known as Mahendra Singh Dhoni, is a former Indian international cricketer who is widely regarded as one of the greatest wicket-keepers and captains in the history of the game. He was born on July 7, 1981, in Ranchi, Jharkhand, India.

Dhoni is known for his exceptional leadership skills, his ability to remain calm under pressure, and his impressive cricketing skills, which include:

1. **Wicket