In [2]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should return your GPU model


True
NVIDIA GeForce GTX 1650 with Max-Q Design


In [8]:
#download the model and tokenizer

from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto",torch_dtype=torch.float16,quantization_config=quant_config)


print("Model and tokenizer downloaded successfully!")


Model and tokenizer downloaded successfully!


In [6]:
#this block is for clearing cuda memory and clearing Ram
import torch
import gc

# Delete model & tokenizer to free RAM
# del model
# del tokenizer

# Force garbage collection to free memory
gc.collect()

# Clear CUDA cache (GPU memory)
torch.cuda.empty_cache()

# Restart Python process to fully clear RAM (Optional, but effective)
import os
os._exit(0)


: 

In [9]:
#testing the model witout fine tuning
def chat_with_model(prompt, max_length=150):
    # Format input prompt
    formatted_prompt = f"User: {prompt}\nAI:"  
    
    # Tokenize input
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)

    # Generate output
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=100,
            temperature=0.7,  # Adjust randomness
            top_p=0.9,  # Nucleus sampling
            do_sample=True,  # Enable sampling for diverse responses
            repetition_penalty=1.2,  # Reduce repetition
            use_cache=True,  # Enable KV cache
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id # Avoid padding errors
        )
    
    # Decode and clean response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(formatted_prompt, "").strip()
    return response


In [10]:
query = "Hey, I'm feeling stressed today. Any advice?"
response = chat_with_model(query)
print(f"🗣️ **User:** {query}\n🤖 **AI:** {response}")


🗣️ **User:** Hey, I'm feeling stressed today. Any advice?
🤖 **AI:** Sure thing! Here are a few things that can help you feel more relaxed and centered:
1) Practice mindfulness meditation - this involves focusing your attention on the present moment without judgment, allowing you to become more aware of your thoughts and emotions. 2) Take deep breaths or practice yoga inhalations and exhales - these exercises can calm


In [None]:
#donwload the data
#and making a dataframe of the data

import pandas as pd
import glob

# Define the path to all CSV files in the "labelled data" folder
csv_files = glob.glob("data/mental_health_counseling_conversations/Original Reddit Data/Labelled Data/*.csv")

# Read all CSVs and combine them
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Show basic dataset info
print(df.info())
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   score      800 non-null    float64
 1   selftext   800 non-null    object 
 2   subreddit  800 non-null    object 
 3   title      800 non-null    object 
 4   Label      800 non-null    object 
 5   CAT 1      200 non-null    object 
dtypes: float64(1), object(5)
memory usage: 38.7+ KB
None
   score                                           selftext subreddit  \
0    1.0  Tried to watch this documentary “anxious Ameri...   Anxiety   
1    1.0  i’m currently laying in bed wide awake, feelin...   Anxiety   
2    2.0  Second time trying weed. First time felt close...   Anxiety   
3    1.0  I am not posting this for me, but rather for m...   Anxiety   
4    1.0  21 year old male been dealing with anxiety eve...   Anxiety   

                                               title             Label CAT 1  


After downloading the data move the data to "data" folder and run the below code to get the data in the required format.

In [None]:
import pandas as pd
import glob

# Load all CSV files from the correct path
csv_files = glob.glob("data/mental_health_counseling_conversations/Original Reddit Data/Labelled Data/*.csv")

# Read and merge all CSVs
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Keep only relevant columns
df = df[['title', 'selftext', 'Label']]

# Drop missing values in 'selftext'
df = df.dropna(subset=['selftext'])

# Combine title and selftext into one column
df['text'] = df['title'].fillna('') + " " + df['selftext']

# Keep only the final processed text and label
df = df[['text', 'Label']]

# Show updated dataset info
print(df.info())
print(df.head())

# Save the cleaned dataset as a CSV file
df.to_csv("data/mental_health_counseling_conversations/cleaned_mental_health_data.csv", index=False)

print("✅ Cleaned dataset saved successfully!")


<class 'pandas.core.frame.DataFrame'>
Index: 800 entries, 0 to 822
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    800 non-null    object
 1   Label   800 non-null    object
dtypes: object(2)
memory usage: 18.8+ KB
None
                                                text             Label
0  Do people get over anxiety? Tried to watch thi...  Drug and Alcohol
1  does anyone else have this big fear of suddenl...  Drug and Alcohol
2  3 hour long panic attack after trying weed Sec...  Drug and Alcohol
3  Please leave in the comments ANYTHING that has...  Drug and Alcohol
4  Alcohol induced 21 year old male been dealing ...  Drug and Alcohol
✅ Cleaned dataset saved successfully!


In [None]:
from transformers import AutoTokenizer
import pandas as pd

# Load cleaned dataset
df = pd.read_csv("data/mental_health_counseling_conversations/cleaned_mental_health_data.csv")

# Load tokenizer for TinyLlama
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Tokenization function
def tokenize_text(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=512)

# Apply tokenization
df["input_ids"] = df["text"].apply(lambda x: tokenize_text(str(x))["input_ids"])

# Keep only tokenized inputs and labels
df = df[["input_ids", "Label"]]

# Save tokenized data as Parquet for efficient processing
df.to_parquet("data/mental_health_counseling_conversations/tokenized_mental_health_data.parquet", engine="pyarrow")

print("✅ Tokenization complete! Data saved as Parquet.")


✅ Tokenization complete! Data saved as Parquet.


In [None]:
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM
from datasets import load_dataset
import torch
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig

# Enable 8-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,  # Use 8-bit quantization
    llm_int8_enable_fp32_cpu_offload=True  # Offload CPU computations to save VRAM
)

# Load model and force it onto the current CUDA device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map={'': torch.cuda.current_device()},  # Assign model to GPU
    quantization_config=quantization_config  # Apply quantization
)
  # Move model to GPU




# Apply LoRA for efficient fine-tuning
lora_config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.05, 
    bias="none"
)
model = get_peft_model(model, lora_config)

# Load tokenized dataset
from datasets import DatasetDict

# Load tokenized dataset
dataset = load_dataset("parquet", data_files={"data": "data/mental_health_counseling_conversations/tokenized_mental_health_data.parquet"})

# Convert dataset to dictionary format
dataset = dataset["data"].train_test_split(test_size=0.1)

# Ensure `labels` are the same as `input_ids`
dataset = DatasetDict({
    "train": dataset["train"].map(lambda x: {"labels": x["input_ids"]}),
    "eval": dataset["test"].map(lambda x: {"labels": x["input_ids"]})
})


# Define training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_results",
    per_device_train_batch_size=2,  # Lower batch size for 4GB GPU
    per_device_eval_batch_size=2,
    num_train_epochs=3,  # Train for 3 epochs
    save_total_limit=2,
    eval_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=100,
    save_strategy="epoch"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["eval"]  # Add evaluation dataset
)


# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
print("✅ Fine-tuning complete! Model saved.")


Generating data split: 800 examples [00:00, 63402.36 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 1888.71 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 1386.03 examples/s]


Epoch,Training Loss,Validation Loss
1,1.4604,No log
2,1.4047,No log
3,1.3815,No log


✅ Fine-tuning complete! Model saved.


In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from peft import PeftModel
import torch

# Define paths
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Base model
FINETUNED_PATH = "./fine_tuned_model"  # Where fine-tuned model will be saved

# Load base model
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="cpu")

# Load LoRA adapters and merge them
model = PeftModel.from_pretrained(model, FINETUNED_PATH)
model = model.merge_and_unload()  # Merge LoRA weights

# Save the full fine-tuned model
model.save_pretrained(FINETUNED_PATH)

# Save tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(FINETUNED_PATH)

# Save configuration
config = AutoConfig.from_pretrained(BASE_MODEL)
config.save_pretrained(FINETUNED_PATH)

print("✅ Full fine-tuned model saved successfully!")


✅ Full fine-tuned model saved successfully!


In [26]:
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import os
import gc
# Define model path
MODEL_PATH = "./fine_tuned_model"

# Ensure model is properly loaded
# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit instead of 8-bit
    bnb_4bit_compute_dtype="float16"  # Keep precision
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    max_memory={0: "3GB", "cpu": "8GB"},  # Assign max GPU and CPU RAM
    quantization_config=quantization_config
)



tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Function to generate responses
def chat_with_model(prompt, max_length=150):
    formatted_prompt = f"User: {prompt}\nAI:"
    
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=100,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            use_cache=True
        )
    
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(formatted_prompt, "").strip()
    torch.cuda.empty_cache()
    gc.collect()
    return response

# Test the model with sample inputs
sample_inputs = [
    "hey feeling lonely today,what to do?",
]

# Generate responses for each input
for query in sample_inputs:
    response = chat_with_model(query)
    print(f"🗣️ **User:** {query}\n🤖 **AI:** {response}\n")


🗣️ **User:** hey feeling lonely today,what to do?
🤖 **AI:** There is no need for a special day to feel lonely. You can always talk to friends or family members in person and online. Or if you are alone and don't have any close people around, then go outside and enjoy the nature! Nature is often the best friend we have here on Earth.



In [12]:
%pip install nltk

Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 1.5/1.5 MB 11.4 MB/s eta 0:00:00
Installing collected packages: nltk
Successfully installed nltk-3.9.1
Note: you may need to restart the kernel to use updated packages.


In [13]:
%pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load your fine-tuned model
model_name = "./fine_tuned_model"  # Replace with TinyLlama or your fine-tuned model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

model.eval()

def calculate_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs.input_ids

    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        loss = outputs.loss

    perplexity = torch.exp(loss)
    return perplexity.item()

# Example usage
sample_text = "I feel anxious and overwhelmed."
print(f"Perplexity: {calculate_perplexity(sample_text)}")
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference, candidate):
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    
    smoothie = SmoothingFunction().method4
    bleu_score = sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smoothie)
    
    return bleu_score

# Example usage
reference_response = "I understand how you feel. It’s okay to feel this way."
generated_response = "I get that you’re feeling overwhelmed. It’s normal to have these emotions."

print(f"BLEU Score: {calculate_bleu(reference_response, generated_response)}")
from rouge import Rouge 

rouge = Rouge()

def calculate_rouge(reference, candidate):
    scores = rouge.get_scores(candidate, reference)
    return scores

# Example usage
print(calculate_rouge(reference_response, generated_response))



KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import os

print("Starting model optimization process...")

# Define the model paths
FINE_TUNED_PATH = "./fine_tuned_model"
OPTIMIZED_PATH = "./optimized_model"

os.makedirs(OPTIMIZED_PATH, exist_ok=True)

# Load the fine-tuned model and tokenizer
print("\nStep 1: Loading the fine-tuned model...")
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_PATH)
model = AutoModelForCausalLM.from_pretrained(FINE_TUNED_PATH)
print(f"Loaded model: {model.__class__.__name__}")

# Basic test function to measure inference speed
def measure_inference_speed(model, tokenizer, prompt, n_runs=5):
    model.eval()
    
    # Warmup
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        _ = model.generate(**inputs, max_length=100)
    
    # Benchmark
    start_time = time.time()
    for _ in range(n_runs):
        with torch.no_grad():
            _ = model.generate(**inputs, max_length=100)
    avg_time = (time.time() - start_time) / n_runs
    
    return avg_time

# Test prompt
test_prompt = "I've been feeling really down lately and nothing seems to help."

# Measure speed of the original model
print("\nMeasuring original model inference speed...")
original_time = measure_inference_speed(model, tokenizer, test_prompt)
print(f"Original model average inference time: {original_time:.4f} seconds")

# Step 1: Export to TorchScript for faster CPU inference
print("\nStep 2: Converting model to TorchScript...")
try:
    # Set model to evaluation mode
    model.eval()
    
    # Create dummy input for tracing
    dummy_input = tokenizer(test_prompt, return_tensors="pt").input_ids
    
    # Export to TorchScript via tracing
    with torch.no_grad():
        traced_model = torch.jit.trace(
            model.forward, [dummy_input]
        )
        
    # Save the TorchScript model
    torch.jit.save(traced_model, os.path.join(OPTIMIZED_PATH, "model_torchscript.pt"))
    
    print("Model successfully converted to TorchScript!")
except Exception as e:
    print(f"Error during TorchScript conversion: {e}")
    print("Proceeding with standard optimization methods...")

# Step 2: Optimize with torch.compile if available (PyTorch 2.0+)
print("\nStep 3: Applying torch.compile optimization if available...")
try:
    if hasattr(torch, 'compile'):
        # Move model to GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        
        # Apply torch.compile
        compiled_model = torch.compile(model)
        
        print("Model successfully optimized with torch.compile!")
        
        # Save the compiled model (will save the base model since compile is a runtime optimization)
        compiled_model.save_pretrained(os.path.join(OPTIMIZED_PATH, "compiled_model"))
    else:
        print("torch.compile not available in this PyTorch version")
except Exception as e:
    print(f"Error during torch.compile: {e}")

# Step 3: Save model with FP16 precision for smaller size and faster inference
print("\nStep 4: Creating half-precision (FP16) model...")
try:
    # Convert to half precision
    model_fp16 = model.half()
    
    # Save the half-precision model
    model_fp16.save_pretrained(os.path.join(OPTIMIZED_PATH, "fp16_model"))
    tokenizer.save_pretrained(os.path.join(OPTIMIZED_PATH, "fp16_model"))
    
    print("Half-precision model successfully saved!")
    
    # Measure speed with half-precision model
    model_fp16 = model_fp16.to("cuda" if torch.cuda.is_available() else "cpu")
    fp16_time = measure_inference_speed(model_fp16, tokenizer, test_prompt)
    print(f"FP16 model average inference time: {fp16_time:.4f} seconds")
    print(f"Speed improvement: {original_time/fp16_time:.2f}x faster")
except Exception as e:
    print(f"Error creating half-precision model: {e}")

# Save the tokenizer with the optimized models
tokenizer.save_pretrained(OPTIMIZED_PATH)

print("\nOptimization process complete!")
print(f"Optimized models saved to {OPTIMIZED_PATH}")

  from .autonotebook import tqdm as notebook_tqdm


Starting model optimization process...

Step 1: Loading the second fine-tuned model...
Loaded model: LlamaForCausalLM

Measuring original model inference speed...


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Original model average inference time: 24.8595 seconds

Step 2: Converting model to TorchScript...


  if sequence_length != 1:


Error during TorchScript conversion: Tracer cannot infer type of CausalLMOutputWithPast(loss=None, logits=tensor([[[ -5.0007,   0.7137,   6.5186,  ...,  -5.2262,  -2.2162,  -4.1483],
         [ -9.4134,  -9.5796,   4.5310,  ...,  -4.7552,  -7.9240,  -5.1685],
         [ -8.6430,  -8.7850,   5.0733,  ...,  -3.5022,  -6.3191,  -3.5752],
         ...,
         [ -7.3221,  -7.2797,   4.2244,  ...,  -5.3116,  -4.7137,  -1.1376],
         [-10.8785, -10.8925,   8.1621,  ...,  -5.5475,  -7.7020,  -4.5854],
         [-10.3107,  -9.9285,  10.7553,  ...,  -4.7242,  -8.0238,  -3.8785]]]), past_key_values=DynamicCache(), hidden_states=None, attentions=None)
:Cannot infer concrete type of torch.nn.Module
Proceeding with standard optimization methods...

Step 3: Applying torch.compile optimization if available...
Model successfully optimized with torch.compile!

Step 4: Creating half-precision (FP16) model...
Half-precision model successfully saved!
FP16 model average inference time: 10.7119 seconds

In [7]:

#clearing all the memory of the gpu including the cache
import gc
torch.cuda.memory_summary(device=None, abbreviated=False)
torch.cuda.empty_cache()
gc.collect()




147

In [28]:
#give me block of code for clearing cached ram memory and gpu memory
import torch
import gc

# Delete model & tokenizer to free RAM
# del model
# del tokenizer

#clearing all gpu memory


import gc
torch.cuda.memory_summary(device=None, abbreviated=False)
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

gc.collect()





127

In [4]:
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM

def generate_response(model_path, user_input, max_new_tokens=100):
    """Generate a single response from the model."""
    print("Loading model...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(
        model_path, torch_dtype=torch.bfloat16, device_map="auto", local_files_only=True
    )
    
    model.eval()
    
    inputs = tokenizer(f"User: {user_input}\nAI:", return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(f"User: {user_input}\nAI:", "").strip()
    return response

if __name__ == "__main__":
    model_path = "./optimized_model/fp16_model"
    user_input = "How can I manage my anxiety symptoms?"
    response = generate_response(model_path, user_input)
    gc.collect()
    torch.cuda.empty_cache()
    print("AI:", response)


Loading model...
AI: You can start by acknowledging your symptoms and taking them seriously. If you find yourself feeling overwhelmed, take a deep breath and try to focus on one thing at a time. Practice mindfulness techniques like meditation, yoga or deep breathing. You can also try talking to a therapist or counselor who specializes in anxiety disorders. They can help you develop coping strategies and provide support as you work through your symptoms. Remember


In [2]:
from huggingface_hub import HfApi

# Replace with your Hugging Face username and model name
USERNAME = "tezodipta"
MODEL_NAME = "MindEase-Assistant-v0.1"

api = HfApi()

# Create a repository on Hugging Face (skip if already created)
api.create_repo(repo_id=f"{USERNAME}/{MODEL_NAME}", private=False, exist_ok=True)

# Upload the entire **fine-tuned model folder** instead of a sharded model
api.upload_folder(
    folder_path="./fine_tuned_model",  # Make sure this folder contains model files
    repo_id=f"{USERNAME}/{MODEL_NAME}",
)

print(f"✅ Fine-Tuned Model uploaded successfully: https://huggingface.co/{USERNAME}/{MODEL_NAME}")


adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

[A[A
adapter_model.safetensors:   2%|▏         | 98.3k/4.52M [00:00<00:04, 935kB/s]

tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 527kB/s] 0, 22.9MB/s]
adapter_model.safetensors: 100%|██████████| 4.52M/4.52M [00:01<00:00, 2.45MB/s]

model.safetensors: 100%|██████████| 4.40G/4.40G [03:26<00:00, 21.3MB/s]

Upload 3 LFS files: 100%|██████████| 3/3 [03:27<00:00, 69.13s/it] 


✅ Fine-Tuned Model uploaded successfully: https://huggingface.co/tezodipta/MindEase-Assistant-v0.1


In [4]:
#accessing a model from huggingface hub using api call
from huggingface_hub import InferenceClient

# Fetch the API key from a file named api_key.txt
with open("api.txt", "r") as file:
	api_key = file.read().strip()

client = InferenceClient(
	provider="together",
	api_key=api_key
)

messages = [
	{
		"role": "user",
		"content": "What is the capital of France?"
	}
]

completion = client.chat.completions.create(
    model="mistralai/Mistral-7B-Instruct-v0.3", 
	messages=messages, 
	max_tokens=500,
)

print(completion.choices[0].message)

HfHubHTTPError: 402 Client Error: Payment Required for url: https://huggingface.co/api/inference-proxy/together/v1/chat/completions (Request ID: Root=1-67b42900-6d2a06a35b2b42a70a92e0fd;9ee54482-c3b7-40f2-8a7c-c308fa50d0c1)

You have exceeded your monthly included credits for Inference Endpoints. Subscribe to PRO to get 20x more monthly allowance.

In [7]:
!pip install together

Collecting together
  Downloading together-1.4.1-py3-none-any.whl.metadata (12 kB)
Collecting eval-type-backport<0.3.0,>=0.1.3 (from together)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Collecting pillow<12.0.0,>=11.1.0 (from together)
  Downloading pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting rich<14.0.0,>=13.8.1 (from together)
  Using cached rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting tabulate<0.10.0,>=0.9.0 (from together)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting typer<0.16,>=0.9 (from together)
  Downloading typer-0.15.1-py3-none-any.whl.metadata (15 kB)
Collecting markdown-it-py>=2.2.0 (from rich<14.0.0,>=13.8.1->together)
  Using cached markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)
Collecting shellingham>=1.3.0 (from typer<0.16,>=0.9->together)
  Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich<

In [27]:
from together import Together

with open("together_key.txt", "r") as file:
	api_key = file.read().strip()
# Initialize the client with API key
client = Together(api_key=api_key)

response = client.chat.completions.create(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
    messages=[{"role": "user", "content": "What are some fun things to do in New York?"}],
    max_tokens=200,
)

print(response.choices[0].message.content)


The city that never sleeps! New York has endless options for entertainment, culture, and adventure. Here are some fun things to do in New York:

**Iconic Landmarks:**

1. Statue of Liberty and Ellis Island: Take a ferry to Liberty Island to see the iconic statue up close and visit the Ellis Island Immigration Museum.
2. Central Park: Explore the park's many walking paths, lakes, and landmarks like the Bethesda Fountain and Loeb Boathouse.
3. Times Square: Experience the bright lights and energy of the "Crossroads of the World."
4. Empire State Building: Enjoy panoramic views of the city from the observation deck on the 86th floor.
5. Brooklyn Bridge: Walk or bike across the iconic bridge for spectacular city views.

**Museums and Galleries:**

1. The Metropolitan Museum of Art: One of the world's largest and most renowned museums, with a collection that spans over 5,000 years of human history.
2.


In [18]:
!pip install groq

Collecting groq
  Downloading groq-0.18.0-py3-none-any.whl.metadata (14 kB)
Downloading groq-0.18.0-py3-none-any.whl (121 kB)
Installing collected packages: groq
Successfully installed groq-0.18.0


In [2]:
#gorq api

from groq import Groq

with open("gorq_key.txt", "r") as file:
	api_key = file.read().strip()

client = Groq(api_key=api_key)
completion = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "user",
            "content": "who is dhoni?"
        }
    ],
    temperature=1,
    max_completion_tokens=100,
    top_p=1,
    stream=True,
    stop=None,
)

for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")


MS Dhoni, also known as Mahendra Singh Dhoni, is a former Indian international cricketer who is widely regarded as one of the greatest wicket-keepers and captains in the history of the game. He was born on July 7, 1981, in Ranchi, Jharkhand, India.

Dhoni is known for his exceptional leadership skills, his ability to remain calm under pressure, and his impressive cricketing skills, which include:

1. **Wicket

In [None]:
#open router api
from openai import OpenAI
with open("openrouter_key.txt", "r") as file:
	api_key = file.read().strip()

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=api_key,
)

completion = client.chat.completions.create(
  extra_body={},
  model="mistralai/mistral-saba",
  messages=[
    {
      "role": "user",
      "content": "What is the meaning of life?"
    }
  ],
  max_tokens=100,
)
print(completion.choices[0].message.content)

The question "What is the meaning of life?" is one of the most profound and enduring philosophical inquiries. Different cultures, religions, and philosophical traditions offer a variety of answers. Here are a few perspectives:

1. **Existentialism**: Existentialists like Jean-Paul Sartre argued that life has no inherent meaning, and it is up to each individual to create their own purpose.

2. **Religious and Spiritual Views**: Many religions provide their own interpretations. For example:
   - In Christianity
