In [18]:
import os
# import json
import math
# import ollama
# import requests

from dotenv import load_dotenv
from collections import Counter

In [19]:
load_dotenv()

os.environ.LANGCHAIN_TRACING_V2 = True
os.environ.LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com/"
os.environ.LANGCHAIN_API_KEY = os.getenv("LANGSMITH_API_KEY")

In [20]:
document_body =[
     
    "Quantum computing promises to revolutionize data encryption by making previously unsolvable problems computationally feasible.",
    "Regular exercise, combined with a balanced diet, is key to maintaining both physical and mental well-being.",
    "The vibrant markets of Marrakech offer a sensory overload, from the scent of spices to the vivid colors of handcrafted textiles.",
    "Natural Language Processing (NLP) is enabling machines to understand and respond to human language more accurately than ever before.",
    "Inflation rates can have a profound impact on consumer spending, leading to changes in overall economic growth.",
    "The concept of cognitive dissonance explains why people sometimes hold onto beliefs even when confronted with contradictory evidence.",
    "NASA’s Artemis program aims to establish a sustainable human presence on the Moon by the end of the decade.",
    "The Renaissance was a period of immense cultural, artistic, and intellectual revival that began in Italy in the 14th century.",
    "Integrating hands-on learning experiences into the classroom can significantly enhance students’ engagement and retention of knowledge.",
    "Deforestation in the Amazon rainforest not only threatens biodiversity but also accelerates climate change by releasing stored carbon dioxide."
]

**Using ML process for embedding the document body**

In [21]:
user_query = "Sample text"
doc_query = "Sample document"
query_tokens = user_query.lower().split(" ")
query_counter = Counter(query_tokens)
doc_tokens = user_query.lower().split(" ")
doc_counter = Counter(doc_tokens)

In [23]:
query_list = []
for tk in query_counter.values():
    query_list.append(tk)
    print(tk)

1
1


In [24]:
result_list = []
for tokens in query_counter.keys() & doc_counter.keys():
    result_list.append(query_counter[tokens] * doc_counter[tokens])

In [26]:

def text_to_vector(text):
    # Convert text into a vector using word frequencies
    words = text.lower().split()  # Split text into words
    return Counter(words)

def cosine_similarity_sentence(sentence1, sentence2):
    # Step 1: Convert sentences to word frequency vectors
    vector1 = text_to_vector(sentence1)
    vector2 = text_to_vector(sentence2)
    
    # Step 2: Get the common words between both sentences
    common_words = set(vector1.keys()).intersection(set(vector2.keys()))
    
    # Step 3: Calculate the dot product between the two vectors
    dot_product = sum(vector1[word] * vector2[word] for word in common_words)
    
    # Step 4: Calculate the magnitude of each vector
    magnitude1 = math.sqrt(sum([value**2 for value in vector1.values()]))
    magnitude2 = math.sqrt(sum([value**2 for value in vector2.values()]))
    
    # Step 5: Calculate cosine similarity
    if magnitude1 == 0 or magnitude2 == 0:
        return 0.0  # Handle zero-vector case to avoid division by zero
    return dot_product / (magnitude1 * magnitude2)


# Function to compare a query against a list of sentences
def document_similarity(query, sentences):
    # Compute similarity between the query and each sentence in the document
    similarities = [cosine_similarity_sentence(query, sentence) for sentence in sentences]
    return max(similarities), sentences[similarities.index(max(similarities))]

In [27]:
user_input = "What do you think about uantum computing"

sim_value, relevant_document = document_similarity(user_input, document_body)

In [28]:
print(relevant_document)

Quantum computing promises to revolutionize data encryption by making previously unsolvable problems computationally feasible.


**Augmentation using groq model**

In [30]:
import streamlit as st
import math
from collections import Counter
from dotenv import load_dotenv
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

# Load environment variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

# Function to convert text to a word frequency vector
def text_to_vector(text):
    words = text.lower().split()  # Split text into words
    return Counter(words)

# Function to calculate cosine similarity between two sentences
def cosine_similarity_sentence(sentence1, sentence2):
    vector1 = text_to_vector(sentence1)
    vector2 = text_to_vector(sentence2)
    
    common_words = set(vector1.keys()).intersection(set(vector2.keys()))
    dot_product = sum(vector1[word] * vector2[word] for word in common_words)
    
    magnitude1 = math.sqrt(sum([value**2 for value in vector1.values()]))
    magnitude2 = math.sqrt(sum([value**2 for value in vector2.values()]))
    
    if magnitude1 == 0 or magnitude2 == 0:
        return 0.0
    return dot_product / (magnitude1 * magnitude2)

# Function to compare query against a list of sentences
def document_similarity(query, sentences):
    similarities = [cosine_similarity_sentence(query, sentence) for sentence in sentences]
    return max(similarities), sentences[similarities.index(max(similarities))]

# Streamlit App UI
st.title("Document Similarity Checker with Study Recommendation")

# Text input or file upload
document_body = st.text_area("Enter document text or paste multiple sentences (one per line):")
uploaded_file = st.file_uploader("Or upload a text file", type="txt")

# User input for query
user_query = st.text_input("Enter a query to compare against the document:")

# Add a submit button
if st.button("Submit"):
    if uploaded_file:
        document_body = uploaded_file.read().decode("utf-8")

    if document_body and user_query:
        sentences = document_body.split('\n')
        sim_value, relevant_document = document_similarity(user_query, sentences)
        
        st.write(f"Most relevant document: {relevant_document}")
        st.write(f"Cosine similarity score: {sim_value:.4f}")

        # ChatGroq model setup for recommendations
        llm = ChatGroq(
            model="llama-3.1-70b-versatile",
            temperature=0,
            groq_api_key=groq_api_key,
            max_tokens=None,
            timeout=None,
            max_retries=2,
        )

        prompt_extract = PromptTemplate.from_template(
           """
           You are a chat bot making recommendation for studies. A recommended subject course is: {sim}.
           Given a query: {query}
           Compile a recommended subject course for the user, which is based on the user's query.  
           """
        )

        json_parser = JsonOutputParser()
        chain = prompt_extract | llm
        response_chain = chain.invoke(input={"sim": relevant_document, "query": user_query})

        # Display the LLM response
        st.write("Recommended study course:")
        st.write(response_chain.content)
    else:
        st.write("Please provide both document text and a query.")


Based on your query, I recommend the following subject course:

**Course Title:** Quantum Computing and Cryptography

**Course Description:** This course explores the principles of quantum computing and its applications in cryptography. Students will learn how quantum computers can solve complex problems that are currently unsolvable with classical computers, and how this can be used to break certain types of encryption. The course will also cover the basics of quantum mechanics, quantum algorithms, and quantum cryptography.

**Course Outline:**

1. Introduction to Quantum Computing
	* Overview of quantum computing and its history
	* Quantum bits and quantum gates
	* Quantum algorithms and their applications
2. Quantum Mechanics and Quantum Information
	* Principles of quantum mechanics
	* Quantum states and entanglement
	* Quantum measurement and decoherence
3. Quantum Cryptography
	* Introduction to cryptography and encryption
	* Quantum key distribution and secure communication
	* Q

**Augmentation using LLAMA2**

In [31]:
# prompt = """
# You are a chat bot making recommendation for studies. A recommended subject course is: {relevant_document}.
# Given a query: {user_input}
# Compile a recommended subject course for the user, which is based on the user's query.  
# """


# url = "http://localhost:11434/api/generate"

# data = {
#         "model": "llama2",
#         "prompt": prompt.format(user_input=user_input, relevant_document=relevant_document)
# }

# headers = {"Content-Type": "application/json"}

# response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)

# full_response = []
# try:
#   for line in response.iter_lines():
#     if line:
#       decoded_line = json.loads(line.decode("utf-8"))
#       print(decoded_line)
#       # full_response.append(decoded_line["response"])
# finally:
#   response.close()
      

# curl http://localhost:11434/api/chat -d '{
#   "model": "llama3.2",
#   "messages": [
#     { "role": "user", "content": "why is the sky blue?" }
#   ]
# }'

# response = ollama.chat(model='llama2', messages=[
#   {
#     'recommended_course': sim,
#     'user_query': my_query,
#   },
# ])
# print(response['message']['content'])

In [14]:
# for idx, score in enumerate(sims):
#     print(f"Sentence {idx + 1}: {document_body[idx]}")
#     print(f"Similarity Score: {score:.4f}\n")

In [32]:
# prompt = """
# You are a chat bot making recommendation for studies. A recommended subject course is: {sim}.
# Given a query: {query}
# Compile a recommended subject course for the user, which is based on the user's query.  
# """


# url = "http://localhost:11434/api/generate"

# data = {
#         "model": llm,
#         "prompt": prompt.format(query=user_input, sim=relevant_document)
# }

# # generate a response
# curl http://localhost:11434/api/generate -d '{
#   "model": "llama3.2",
#   "prompt":"Why is the sky blue?"
# }'

# # chat with a model
# curl http://localhost:11434/api/chat -d '{
#   "model": "llama3.2",
#   "messages": [
#     { "role": "user", "content": "why is the sky blue?" }
#   ]
# }'