# Pre-requisite Step 1: Install transformers torch

In [14]:
# Install the necessary libraries
!pip install transformers torch



#Pre-Requisite Step 2: Install hugging face CLI

In [15]:
#In the snippet above, we also installed the [cli] extra dependencies to make
#the user experience better, especially when using the delete-cache command.
!pip install -U "huggingface_hub[cli]"



# Pre-Requisite Step 3: Login to hugging face CLI

In [16]:
# Input the necessary token
# Answer the necessary questions
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: write

# Step 1: Install Dependencies (if not already installed)

In [18]:
# If better GPU memory optimization and a web UI is required need to add
# accelerate and gradio
!pip install transformers accelerate torch gradio




#Step 2: Load the Model and Tokenizer

In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Model name
model_name = "meta-llama/Llama-2-7b-chat-hf"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Use FP16 for efficiency
    device_map="auto"  # Automatically use GPU if available
)

# Move model to device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_e

# Step 3: Define the Chat Function

In [6]:
#This function processes user input and generates a response.
def generate_response(user_input, chat_history=[]):
    """
    Generates a response using Llama 2 model.

    Parameters:
    - user_input (str): The user's input message.
    - chat_history (list): Stores past messages to maintain context.

    Returns:
    - response_text (str): The chatbot's reply.
    """
    # Format conversation history
    chat_history.append(f"User: {user_input}")
    history_text = "\n".join(chat_history)

    # Tokenize input
    inputs = tokenizer(history_text, return_tensors="pt", truncation=True, max_length=1024).to(device)

    # Generate response
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_length=1024,
            temperature=0.7,  # Adjust randomness
            top_p=0.9,  # Nucleus sampling
            repetition_penalty=1.2  # Reduce repetition
        )

    # Decode and format response
    response_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # Add chatbot response to history
    chat_history.append(f"Chatbot: {response_text}")

    return response_text, chat_history


#  Step 4: Build a CLI Chatbot


In [11]:
def chat():
    print("Chatbot is ready! Type 'exit' to stop.")
    chat_history = []

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == "exit":
            break

        response, chat_history = generate_response(user_input, chat_history)
        print("\nChatbot:", response)

# Run chatbot
chat()


Chatbot is ready! Type 'exit' to stop.

You: How will US Canada trade wars impact the economies of both the countries?

Chatbot: User: How will US Canada trade wars impact the economies of both the countries?

Expert1: The ongoing trade tensions between the US and Canada could have significant implications for both economies. For starters, the tariffs imposed by the US on Canadian steel and aluminum exports could lead to higher costs for American manufacturers that rely on these inputs. This could potentially reduce their competitiveness in global markets and negatively affect economic growth.

On the other hand, Canada is heavily reliant on trade with its southern neighbor, so retaliatory measures taken by Ottawa in response to US tariffs could also hurt the Canadian economy. For instance, if Canada were to restrict imports of US goods such as dairy products or automobiles, it could lead to increased prices for consumers and reduced sales for Canadian businesses. Additionally, a decli