<a href="https://colab.research.google.com/github/sandeepangh782/RAG---High-Entropy-alloys/blob/main/Flan_t5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sentence-transformers faiss-cpu transformers torch accelerate



In [None]:
import pickle
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline

MODEL_NAME = "google/flan-t5-small"  # Smaller, more compatible model
EMBEDDING_MODEL = 'intfloat/e5-small-v2' # Use the same model for consistency
INDEX_PATH = "/content/Metallurgy_FAISS_Index.bin"
METADATA_PATH = "/content/Metallurgy_Metadata.pkl"
DEVICE = "cpu"  # No GPU support for bitsandbytes, so use CPU

print(f"Using device: {DEVICE}")


def load_data(metadata_path, index_path):
    """Loads embeddings and metadata."""
    with open(metadata_path, "rb") as f:
        metadata = pickle.load(f)

    index = faiss.read_index(index_path)
    return metadata, index


def get_relevant_chunks(query, index, metadata, top_k=3):
    """Retrieves relevant text chunks using FAISS."""
    # Use the same embedding model that was used to create the index (e.g., E5)
    # embedding_model = SentenceTransformer(EMBEDDING_MODEL).to(DEVICE)
    embedding_model = SentenceTransformer('intfloat/e5-large-v2').to(DEVICE)  # Assuming E5 was used to create the index
    query_embedding = embedding_model.encode(query)

    # Convert the query embedding to the correct data type if needed
    query_embedding = query_embedding.astype('float32')  # faiss often expects float32

    # Ensure that the shape of the query embedding is correct
    query_embedding = query_embedding.reshape(1, -1)  # Reshape to a 2D array

    # Before searching, check the dimensions:
    print(f"Index dimension: {index.d}, Query embedding dimension: {query_embedding.shape[1]}")

    D, I = index.search(query_embedding, top_k)
    retrieved_chunks = [metadata["alloy_descriptions"][i] for i in I[0]] # Get descriptions from metadata
    return retrieved_chunks


def generate_response(query, context, model_name=MODEL_NAME):
    """Generates a response using the language model with RAG."""
    try:
        qa_pipeline = pipeline(
            "text2text-generation",
            model=model_name,
            device=DEVICE
        )
    except Exception as e:
        print(f"Error loading model: {e}")
        return "Error loading the model. Try a smaller model or restart the runtime."

    prompt = f"""Use the following context to answer the question at the end. If the context doesn't contain the answer, say that you don't know.

    Context:
    {' '.join(context)}

    Question: {query}
    Answer:"""

    try:
        result = qa_pipeline(prompt, max_length=512)
        return result[0]["generated_text"].strip()
    except Exception as e:
        print(f"Error during generation: {e}")
        return "An error occurred during response generation."

if __name__ == "__main__":
    try:
        metadata, index = load_data(METADATA_PATH, INDEX_PATH)
    except FileNotFoundError:
        print(f"Error: index or metadata files not found at {DATA_DIR}. Please ensure the files are uploaded to Google Drive in the correct folder.")
        exit()

    while True:
        query = input("Enter your query (or type 'exit'): ")
        if query.lower() == "exit":
            break

        relevant_chunks = get_relevant_chunks(query, index, metadata)
        response = generate_response(query, relevant_chunks)
        print("Response:", response)


Using device: cpu
Enter your query (or type 'exit'): What are the element compositions of the alloy: Al0.5NbTaTiV?


modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

Index dimension: 1024, Query embedding dimension: 1024


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cpu


Response: iv.
Enter your query (or type 'exit'): Name the element compositions of the alloy Al0.5NbTaTiV
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: Al (0.2), Ti (0.2), V (0.2), Nb (0.2), Ta (0.2) and without heat treatment,showing a BCC microstructure and BCC_SS phases
Enter your query (or type 'exit'): What is the microstructure of Al0.5NbTaTiV
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: BCC microstructure and BCC_SS phases
Enter your query (or type 'exit'): Does Al0.5NbTaTiV undergoes heat treatment?
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: No
Enter your query (or type 'exit'): Name all the elements that composes the alloy Hf0.75NbTa0.5Ti1.5Zr1.25
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: 4
Enter your query (or type 'exit'): exit


In [None]:
import pickle
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline

MODEL_NAME = "google/flan-t5-small"  # Smaller, more compatible model
EMBEDDING_MODEL = 'intfloat/e5-small-v2' # Use the same model for consistency
INDEX_PATH = "/content/Metallurgy_FAISS_Index.bin"
METADATA_PATH = "/content/Metallurgy_Metadata.pkl"
DEVICE = "cpu"  # No GPU support for bitsandbytes, so use CPU

print(f"Using device: {DEVICE}")


def load_data(metadata_path, index_path):
    """Loads embeddings and metadata."""
    with open(metadata_path, "rb") as f:
        metadata = pickle.load(f)

    index = faiss.read_index(index_path)
    return metadata, index


def get_relevant_chunks(query, index, metadata, top_k=3):
    """Retrieves relevant text chunks using FAISS."""
    # Use the same embedding model that was used to create the index (e.g., E5)
    # embedding_model = SentenceTransformer(EMBEDDING_MODEL).to(DEVICE)
    embedding_model = SentenceTransformer('intfloat/e5-large-v2').to(DEVICE)  # Assuming E5 was used to create the index
    query_embedding = embedding_model.encode(query)

    # Convert the query embedding to the correct data type if needed
    query_embedding = query_embedding.astype('float32')  # faiss often expects float32

    # Ensure that the shape of the query embedding is correct
    query_embedding = query_embedding.reshape(1, -1)  # Reshape to a 2D array

    # Before searching, check the dimensions:
    print(f"Index dimension: {index.d}, Query embedding dimension: {query_embedding.shape[1]}")

    D, I = index.search(query_embedding, top_k)
    retrieved_chunks = [metadata["alloy_descriptions"][i] for i in I[0]] # Get descriptions from metadata
    return retrieved_chunks


def generate_response(query, context, model_name=MODEL_NAME):
    """Generates a response using the language model with RAG."""
    try:
        qa_pipeline = pipeline(
            "text2text-generation",
            model=model_name,
            device=DEVICE
        )
    except Exception as e:
        print(f"Error loading model: {e}")
        return "Error loading the model. Try a smaller model or restart the runtime."

    prompt = f"""Use the following context to answer the question at the end. If the context doesn't contain the answer, say that you don't know.

    Context:
    {' '.join(context)}

    Question: {query}
    Answer:"""

    try:
        result = qa_pipeline(prompt, max_length=512)
        return result[0]["generated_text"].strip()
    except Exception as e:
        print(f"Error during generation: {e}")
        return "An error occurred during response generation."

if __name__ == "__main__":
    try:
        metadata, index = load_data(METADATA_PATH, INDEX_PATH)
    except FileNotFoundError:
        print(f"Error: index or metadata files not found at {DATA_DIR}. Please ensure the files are uploaded to Google Drive in the correct folder.")
        exit()

    while True:
        query = input("Enter your query (or type 'exit'): ")
        if query.lower() == "exit":
            break

        relevant_chunks = get_relevant_chunks(query, index, metadata)
        response = generate_response(query, relevant_chunks)
        print("Response:", response)


Using device: cpu
Enter your query (or type 'exit'): Does the alloy Al0.5NbTaTiV contain five elemental compositions?
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: yes
Enter your query (or type 'exit'): Is the aluminum (Al) composition in Al0.5NbTaTiV equal to 0.111111111?
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: no
Enter your query (or type 'exit'): What is the composition of the alloy  Al0.5NbTaTiV
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: 0.2
Enter your query (or type 'exit'): What elements are present in Al0.5NbTaTiV
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: iv.
Enter your query (or type 'exit'): Name the elements that are present in Al0.5NbTaTiV
Index dimension: 1024, Query embedding dimension: 1024


Device set to use cpu


Response: iv.
Enter your query (or type 'exit'): exit
