<a href="https://colab.research.google.com/github/sanika2234/Prompt-Engineering-Class/blob/main/assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers torch gensim numpy scipy matplotlib



In [2]:
import torch
from transformers import AutoTokenizer, AutoModel
import gensim.downloader as api
import numpy as np
from scipy.spatial.distance import cosine
import matplotlib.pyplot as plt

In [5]:
# Load pre-trained BERT model and tokenizer using Auto classes
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')

# Function to get the word embedding from BERT
def get_word_embedding(word):
    # Tokenize the input word and get the embeddings
    tokens = tokenizer(word, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**tokens)
    # Take the mean of the token embeddings
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embedding

# Function to perform word arithmetic
def word_arithmetic(word1, word2, word3):
    vec1 = get_word_embedding(word1)
    vec2 = get_word_embedding(word2)
    vec3 = get_word_embedding(word3)

    # Perform arithmetic: word1 - word2 + word3
    result_vec = vec1 - vec2 + vec3
    return result_vec
 #Function to find the most similar word
def find_most_similar(target_vec, word_list):
    similarities = []
    for word in word_list:
        word_vec = get_word_embedding(word)
        # Calculate cosine similarity
        similarity = 1 - cosine(target_vec, word_vec)
        similarities.append((word, similarity))
    # Sort by similarity
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[0]  # Return the most similar word and its similarity score

# New examples of word arithmetic
examples = [
           ('chef', 'kitchen', 'classroom', ['teacher', 'student', 'principal', 'blackboard', 'desk']),
           ('actor', 'stage', 'court', ['judge', 'lawyer', 'witness', 'gavel', 'jury']),
           ('doctor', 'hospital', 'office', ['worker', 'manager', 'desk', 'meeting', 'computer']),
           ('painter', 'canvas', 'writer', ['book', 'pen', 'paper', 'novel', 'editor']),
('pilot', 'airplane', 'ship', ['captain', 'deck', 'crew', 'navigation', 'port'])
]

# Perform word arithmetic and find the most similar word for each example
for word1, word2, word3, options in examples:
    result_emb = word_arithmetic(word1, word2, word3)
    most_similar, similarity = find_most_similar(result_emb, options)
    print(f"{word1} - {word2} + {word3} is most similar to: {most_similar} (similarity: {similarity:.4f})")



chef - kitchen + classroom is most similar to: student (similarity: 0.8677)
actor - stage + court is most similar to: lawyer (similarity: 0.7677)
doctor - hospital + office is most similar to: worker (similarity: 0.7979)
painter - canvas + writer is most similar to: editor (similarity: 0.7736)
pilot - airplane + ship is most similar to: crew (similarity: 0.7721)


In [6]:
!pip install langchain groq
!pip install langchain-groq groq
!pip install -U langchain-community
!pip install langchain langchain-community huggingface_hub faiss-cpu
!pip install sentence-transformers

Collecting langchain
  Downloading langchain-0.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groq
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Collecting langchain-core<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_core-0.3.0-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.121-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Downloading h11-0.14.0-py3-n

In [7]:
pip install langchain faiss-cpu transformers wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11679 sha256=e228de5276b1c965a42e2cdb1253b577ffa310eb7ba7ada011c95fd0faaed4c4
  Stored in directory: /root/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [13]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI  # Assuming `ChatGroq` works similarly to OpenAI models
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WikipediaLoader
import os

# Set Groq API key
os.environ["GROQ_API_KEY"] = "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"  # Replace with your Groq API key

# Step 1: Choose 5 articles (article titles are just examples, adjust as needed)
articles = [
    "Artificial Intelligence",
    "Quantum Computing",
    "Climate Change",
    "Ancient Civilizations",
    "Space Exploration"
]

# Step 2: Load and process each article
documents = []
for article_title in articles:
    loader = WikipediaLoader(article_title)
    article_text = loader.load()
    documents.extend(article_text)

# Step 3: Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs_chunks = text_splitter.split_documents(documents)

# Step 4: Create embeddings and store in a VectorDB
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vector_db = FAISS.from_documents(docs_chunks, embedding_model)

# Step 5: Initialize the Groq LLM
llm = ChatGroq(model_name="mixtral-8x7b-32768")  # Ensure `ChatGroq` is compatible with `langchain`

# Step 6: Load the QA chain using the appropriate LLM
qa_chain = load_qa_chain(llm, chain_type="stuff")

# Step 7: Define the query function using the chain and retriever
def run_query(query):
    docs = vector_db.similarity_search(query)
    result = qa_chain.run(input_documents=docs, question=query)
    return result

# Step 8: Run 10 diverse queries on the RAG system
queries = [
" What are the benefits of renewable energy sources?",
"How does blockchain technology work?",
"What are the leading causes of deforestation?",
"Which historical figure is known for developing the theory of relativity?",
"What role does the internet play in modern education?",
"How are autonomous vehicles expected to impact transportation?",
"What is CRISPR and how does it relate to genetic engineering?",
"What are the primary health risks associated with air pollution?",
"Which ancient culture is credited with creating the first written language?",
"What are the current challenges in exploring the deep ocean?"
]

# Step 9: Run each query and record results
for i, query in enumerate(queries, 1):
    response = run_query(query)
    print(f"Query {i}: {query}")
    print(f"Response: {response}\n")




Query 1:  What are the benefits of renewable energy sources?
Response: Renewable energy sources, such as solar, wind, hydro, and nuclear power, offer several benefits. They can replace fossil fuels for powering transportation, heating buildings, and running industrial processes, which helps reduce carbon emissions and mitigate climate change. These energy sources are cleaner and produce less pollution, improving air quality and public health.

Additionally, renewable energy sources are sustainable and virtually inexhaustible. They reduce dependence on fossil fuels, which are finite resources and subject to geopolitical tensions and price volatility.

Informing decisions about global climate management strategy or policy decisions in some countries, renewable energy sources can contribute positively towards other sustainable development objectives. For example, they can create local jobs in manufacturing, installation, and maintenance, stimulate economic growth, and improve energy secur