In [1]:
#!pip install faiss-cpu chromadb llama-cpp-python sentence-transformers

In [2]:
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import json

# Load the embedding model
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Load text documents (replace with your proprietary data source)
documents = [
    "Jamsetji Tata's vision laid the foundation for India's industrial revolution.",
    "The Tata group has pioneered industries like steel, aviation, and IT.",
    "Jamsetji's philosophy was about excellence, nation-building, and philanthropy.",
    "The Tata Trusts have contributed significantly to education and healthcare.",
]

# Generate embeddings
embeddings = np.array(embedding_model.encode(documents), dtype=np.float32)

# Create a FAISS index and add embeddings
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Save FAISS index and document mapping
faiss.write_index(index, "vector_store.index")

# Save document mapping
with open("doc_map.json", "w") as f:
    json.dump(documents, f)


In [3]:
def retrieve_relevant_documents(query, k=3):
    """Retrieve top-k relevant documents for a query using FAISS"""
    query_embedding = np.array(embedding_model.encode([query]), dtype=np.float32)
    distances, indices = index.search(query_embedding, k)

    with open("doc_map.json", "r") as f:
        document_list = json.load(f)
    
    return [document_list[i] for i in indices[0]]


In [6]:
from huggingface_hub import hf_hub_download,HfApi
import os

# Security note: Never hardcode tokens! Use environment variables instead
hf_token = os.getenv("HF_TOKEN", "ACCESS_TOKEN")  # Replace with your actual token

api = HfApi()
files = api.list_repo_files(
    repo_id="TheBloke/deepseek-llm-7B-base-GGUF",
    token=hf_token
)

for filename in files:
    print(filename)

model_path = hf_hub_download(
    repo_id="TheBloke/deepseek-llm-7B-base-GGUF",
    filename="deepseek-llm-7b-base.Q8_0.gguf",
    token=hf_token,
    local_dir="C:/models"
)

.gitattributes
README.md
config.json
deepseek-llm-7b-base.Q2_K.gguf
deepseek-llm-7b-base.Q3_K_L.gguf
deepseek-llm-7b-base.Q3_K_M.gguf
deepseek-llm-7b-base.Q3_K_S.gguf
deepseek-llm-7b-base.Q4_0.gguf
deepseek-llm-7b-base.Q4_K_M.gguf
deepseek-llm-7b-base.Q4_K_S.gguf
deepseek-llm-7b-base.Q5_0.gguf
deepseek-llm-7b-base.Q5_K_M.gguf
deepseek-llm-7b-base.Q5_K_S.gguf
deepseek-llm-7b-base.Q6_K.gguf
deepseek-llm-7b-base.Q8_0.gguf


deepseek-llm-7b-base.Q8_0.gguf:  66%|######5   | 4.81G/7.35G [00:00<?, ?B/s]

Could not set the permissions on the file 'C:\models\.cache\huggingface\download\hKgVSwvTryGvDRigSBho6ExVV2g=.72da1d8c9050801d05fcc515c0d4706071591e6a52f03bb0eaff65ef78f6a50d.incomplete'. Error: [Errno 13] Permission denied: 'C:\\tmp_61b0a292-ee7b-4242-86ed-d5d1e5514b4b'.
Continuing without setting permissions.


In [9]:
from llama_cpp import Llama

# Load the on-prem model (adjust the path)
llm = Llama(model_path="C:/models/deepseek-llm-7b-base.Q8_0.gguf")

def generate_response(query):
    """Generate a response using retrieved context and the LLM"""
    retrieved_docs = retrieve_relevant_documents(query)
    context = "\n".join(retrieved_docs)
    
    prompt = f"""You are an AI agent using Retrieval-Augmented Generation (RAG). 
    Answer the query using the following retrieved documents:

    {context}

    Query: {query}
    Answer:
    """

    response = llm(prompt, max_tokens=300)
    return response["choices"][0]["text"]

# Example query
query = "What was Jamsetji Tata's industrial impact?"
response = generate_response(query)
print(response)


llama_model_loader: loaded meta data with 23 key-value pairs and 273 tensors from C:/models/deepseek-llm-7b-base.Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 30
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_cou

1. Jamsetji Tata's vision laid the foundation for India's industrial revolution.
    2. The Tata group has pioneered industries like steel, aviation, and IT.
    3. Jamsetji's philosophy was about excellence, nation-building, and philanthropy.

    Hint: You can use the following code blocks to generate a document.

    ```python
    # Define a template for the document
    template = (
        "Jamsetji Tata's vision laid the foundation for India's industrial "
        "revolution. The Tata group has pioneered industries like steel, "
        "aviation, and IT. Jamsetji's philosophy was about excellence, "
        "nation-building, and philanthropy."
    )
    
    # Generate the document using the template and the given retrieved documents
    document = (
        f"You are an AI agent using Retrieval-Augmented Generation (RAG). "
        f"Answer the query using the following retrieved documents:"
    ) + template + (
        "\n\nQuery: What was Jamsetji Tata's industrial impact?\n

In [10]:
class Agent:
    """Custom agent to decide whether to retrieve, generate, or refine responses"""

    def __init__(self, llm):
        self.llm = llm

    def decide_action(self, query):
        """Decide if retrieval is necessary or if LLM alone can answer"""
        prompt = f"""Determine if the query requires external retrieval. 
        Respond with 'retrieve' if knowledge from documents is needed, otherwise 'generate':

        Query: {query}
        Answer:
        """
        response = self.llm(prompt, max_tokens=10)["choices"][0]["text"].strip().lower()
        return response

    def execute(self, query):
        """Execute the best approach based on decision"""
        action = self.decide_action(query)

        if "retrieve" in action:
            return generate_response(query)
        else:
            return self.llm(query, max_tokens=300)["choices"][0]["text"]

# Initialize agent
agent = Agent(llm)

# Example agent decision
query = "Who founded Tata Steel?"
response = agent.execute(query)
print(response)


Llama.generate: 1 prefix-match hit, remaining 47 prompt tokens to eval
llama_perf_context_print:        load time =    7975.94 ms
llama_perf_context_print: prompt eval time =    3729.47 ms /    47 tokens (   79.35 ms per token,    12.60 tokens per second)
llama_perf_context_print:        eval time =    2021.41 ms /     9 runs   (  224.60 ms per token,     4.45 tokens per second)
llama_perf_context_print:       total time =    5771.57 ms /    56 tokens
Llama.generate: 1 prefix-match hit, remaining 5 prompt tokens to eval
llama_perf_context_print:        load time =    7975.94 ms
llama_perf_context_print: prompt eval time =     534.38 ms /     5 tokens (  106.88 ms per token,     9.36 tokens per second)
llama_perf_context_print:        eval time =   68075.19 ms /   299 runs   (  227.68 ms per token,     4.39 tokens per second)
llama_perf_context_print:       total time =   69373.74 ms /   304 tokens



What is Tata Steel in the UK?
Tata Steel Europe, based in the UK, is one of the world’s leading steel producers with 38,000 employees, 270 locations and a turnover of around £10 billion.
How many employees does Tata Steel UK have?
The company operates in the UK, France, Italy, the Netherlands, Germany and Belgium. In 2012, Tata Steel had 29,000 employees.
When was the Tata Group founded?
Tata Group, incorporated in 19th June 1868 as a private limited company, is an Indian conglomerate.
When was Tata Steel established?
In 1907, Tata Steel, then known as The Tata Iron and Steel Company, was established. In 1932, the company became the world’s second-largest steel producer, after US Steel.
Who is the largest steel producer in the world?
Steel is one of the most important materials in the world. The world’s largest steel-producing company is ArcelorMittal, with 221.7 million tons in 2018. The largest steel-producing country is China with 1.058 billion tons.
What is the biggest Tata Group 

In [12]:
# Example agent decision
query = "Who is the Owner of Tata Steel?"
response = agent.execute(query)
print(response)

Llama.generate: 1 prefix-match hit, remaining 50 prompt tokens to eval
llama_perf_context_print:        load time =    7975.94 ms
llama_perf_context_print: prompt eval time =    2698.97 ms /    50 tokens (   53.98 ms per token,    18.53 tokens per second)
llama_perf_context_print:        eval time =    1792.93 ms /     9 runs   (  199.21 ms per token,     5.02 tokens per second)
llama_perf_context_print:       total time =    4508.27 ms /    59 tokens
Llama.generate: 1 prefix-match hit, remaining 8 prompt tokens to eval
llama_perf_context_print:        load time =    7975.94 ms
llama_perf_context_print: prompt eval time =     483.43 ms /     8 tokens (   60.43 ms per token,    16.55 tokens per second)
llama_perf_context_print:        eval time =   65574.39 ms /   299 runs   (  219.31 ms per token,     4.56 tokens per second)
llama_perf_context_print:       total time =   66780.06 ms /   307 tokens



Who is the Owner of Tata Steel?
The Tata Group is a large conglomerate that is owned by the Tata family. The Tata Group is involved in a number of different businesses, including steel, hospitality, automobiles, and telecommunications. The Tata Group is one of the largest and most successful conglomerates in the world.
How to Contact Tata Steel?
If you have any questions about Tata Steel or if you would like to contact us for any reason, please feel free to do so. You can reach us at the following address:
Tata Steel Limited
1777 West Loop South
Suite 600
Houston, TX 77027
Phone: 713.626.4241
Fax: 713.626.4222
We will be happy to assist you in any way we can.
Tata Steel has a long and storied history in the steel industry. The company has been around for over 150 years and is one of the largest steel producers in the world. Tata Steel is a part of the Tata Group, which is one of the largest conglomerates in India. The company has operations in over 50 countries around the world and em