<a href="https://colab.research.google.com/github/komaravallibhavya/chatbot_llm/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
import torch
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl xformers peft accelerate bitsandbytes


In [2]:
from unsloth import FastLanguageModel

max_seq_length = 2048
load_in_4bit = True

# Choosing gemma-7b-bnb-4bit for efficient tuning
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/gemma-7b-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=load_in_4bit
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.12: Fast Gemma patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.57G [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

In [3]:
import pandas as pd
from datasets import Dataset

# Load the provided fashion dataset
fashion_dataset_path = "/content/fashion_advisor_transformed_dataset.csv"
fashion_data = pd.read_csv(fashion_dataset_path)

# Convert to HuggingFace dataset format
fashion_dataset = Dataset.from_pandas(fashion_data)


In [4]:
import torch
from unsloth import FastLanguageModel
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset, Dataset
import pandas as pd

# Load the fashion dataset
file_path = "/content/fashion_advisor_transformed_dataset.csv"
data = pd.read_csv(file_path)

# Prepare the dataset for training
def prepare_fashion_prompts(data):
    """Prepares dataset into prompt-response format."""
    prompts = []
    for _, row in data.iterrows():
        instruction = row.get("instruction", "Provide fashion advice.")
        input_context = row.get("input", "")
        response = row.get("response", "")
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_context}\n\n### Response:\n{response}\n"
        prompts.append({"text": prompt})
    return prompts

dataset = Dataset.from_pandas(pd.DataFrame(prepare_fashion_prompts(data)))

# Load the model
max_seq_length = 2048
load_in_4bit = True
model_name = "unsloth/llama-3-8b-bnb-4bit"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=load_in_4bit
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    max_seq_length=max_seq_length,
    use_rslora=False,
    loftq_config=None
)

# Training arguments
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=10,
        max_steps=60,
        num_train_epochs=4,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs"
    )
)

# Train the model
trainer_stats = trainer.train()

# Save the trained model
model.save_pretrained("fashion_advisor_model")

# Create a chatbot interface
def fashion_advisor_chatbot():
    FastLanguageModel.for_inference(model)

    print("Fashion Advisor Chatbot is ready! Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        prompt = f"### Instruction:\nProvide fashion advice.\n\n### Input:\n{user_input}\n\n### Response:\n"
        inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

        outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
        response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

        print(f"Fashion Advisor: {response.strip()}")

# Run the chatbot
if __name__ == "__main__":
    fashion_advisor_chatbot()


==((====))==  Unsloth 2024.12.12: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Unsloth 2024.12.12 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Map (num_proc=2):   0%|          | 0/50 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 50 | Num Epochs = 10
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
1,2.6719
2,2.6802
3,2.6335
4,2.4976
5,2.4098
6,2.1909
7,1.867
8,1.5327
9,1.2107
10,0.9792


Fashion Advisor Chatbot is ready! Type 'exit' to end the chat.
You: dress code for interview 
Fashion Advisor: ### Instruction:
Provide fashion advice.

### Input:
dress code for interview 

### Response:
For a interview, consider wearing clothing styles like Formal in fabrics such as Silk, Wool. Recommended colors are Navy, Burgundy, while avoiding Red. Accessorize with Diamond, Silver.
You: exit
Goodbye!


In [5]:
!pip install streamlit pyngrok transformers torch unsloth


Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.2-py3-none-any.whl (22 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m79.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64

In [6]:
%%writefile app.py
import streamlit as st
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import torch

# Load the model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
    model_path = "fashion_advisor_model"  # Path to your saved model
    model = FastLanguageModel.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()

# Streamlit App
st.title("Fashion Advisor Chatbot")
st.write("Ask me for fashion advice!")

# User input
user_input = st.text_input("Enter your question:")

if user_input:
    # Prepare the prompt
    prompt = f"### Instruction:\nProvide fashion advice.\n\n### Input:\n{user_input}\n\n### Response:\n"
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

    # Generate response
    outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Display response
    st.write("**Fashion Advisor:**", response.strip())


Writing app.py


In [None]:
from pyngrok import ngrok

# Start Streamlit app
!streamlit run app.py &

# Expose the app using ngrok
public_url = ngrok.connect(port=8501)
print(f"Streamlit app is live at: {public_url}")



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.34.35.46:8501[0m
[0m
[34m  Stopping...[0m


KeyboardInterrupt: 

In [None]:
!touch requirements.txt

In [None]:
!cat requirements.txt

In [None]:
!pip freeze > requirements.txt

In [None]:
!cat requirements.txt

absl-py==1.4.0
accelerate==1.2.1
aiohappyeyeballs==2.4.4
aiohttp==3.11.10
aiosignal==1.3.2
alabaster==1.0.0
albucore==0.0.19
albumentations==1.4.20
altair==5.5.0
annotated-types==0.7.0
anyio==3.7.1
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
array_record==0.5.1
arviz==0.20.0
astropy==6.1.7
astropy-iers-data==0.2024.12.16.0.35.48
astunparse==1.6.3
async-timeout==4.0.3
atpublic==4.1.0
attrs==24.3.0
audioread==3.0.1
autograd==1.7.0
babel==2.16.0
backcall==0.2.0
beautifulsoup4==4.12.3
bigframes==1.29.0
bigquery-magics==0.4.0
bitsandbytes==0.45.0
bleach==6.2.0
blinker==1.9.0
blis==0.7.11
blosc2==2.7.1
bokeh==3.6.2
Bottleneck==1.4.2
bqplot==0.12.43
branca==0.8.1
CacheControl==0.14.1
cachetools==5.5.0
catalogue==2.0.10
certifi==2024.12.14
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.0
chex==0.1.88
clarabel==0.9.0
click==8.1.7
cloudpathlib==0.20.0
cloudpickle==3.1.0
cmake==3.31.2
cmdstanpy==1.2.5
colorcet==3.1.0
colorlover==0.3.0
colour==0.1.5
community==1.0.0b1
confection==0.1.5
c

In [None]:
pip install huggingface-hub




In [None]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineG

In [None]:
model.push_to_hub("GBSP/fashion_advisor_model")

HfHubHTTPError: (Request ID: Root=1-677af656-20eebe2018da7a4c188b6241;5ec9a2a2-6b6b-4fb8-b31a-93c4fd33d2bf)

403 Forbidden: You don't have the rights to create a model under the namespace "GBSP".
Cannot access content at: https://huggingface.co/api/repos/create.
Make sure your token has the correct permissions.