In [None]:
%pip install --upgrade langchain-together==0.2.0

In [None]:
%pip install markdown

In [None]:
import os
import textwrap
import openai
import time
from dotenv import load_dotenv

from langchain.vectorstores import DeepLake
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_together import ChatTogether


In [None]:
source_text = './contents/llm.txt'
dataset_path = os.environ['ACTIVELOOP_DATASET']



CHUNK_SIZE=1000
CHUNK_OVERLAP=200

In [4]:
gemini_embedding = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
db = DeepLake(dataset_path=dataset_path, embedding=gemini_embedding, read_only=True)

In [9]:
def search(query):
    #return db.search(prompt, search_type="similarity")
    return db.similarity_search_with_score(query=query, k=5)


In [10]:
user_prompt="Tell me about space exploration on the Moon and Mars."

In [None]:
search_results = search(user_prompt)
print(search_results)

In [12]:
def wrap_text(text, width=80):
    lines = []
    while len(text) > width:
        split_index = text.rfind(' ', 0, width)
        if split_index == -1:
            split_index = width
        lines.append(text[:split_index])
        text = text[split_index:].strip()
    lines.append(text)
    return '\n'.join(lines)

In [None]:
import textwrap

top_score = 0
top_text = ""
top_metadata = ""

for document, score in search_results:
    print(f"Document: {document.page_content}, Score: {score}")

    # Assuming the search results are ordered with the top result first
    top_score = score
    top_text = document.page_content.strip()
    top_metadata = document.metadata#['source']

    # Print the top search result
    print("Top Search Result:")
    print(f"Score: {top_score}")
    print(f"Source: {top_metadata}")
    print("Text:")
    print(wrap_text(top_text))



In [None]:
augmented_input = f"""{user_prompt} {top_text}"""
print(augmented_input)

In [28]:
def call_gpt_with_augmented_text(context):
    text_input = '\n'.join(context)

    prompt = f"Please summarize or explain the following context: \n {text_input}"
    try:
        # response = openai.chat.completions.create(
        #     model='gpt-4o-mini',
        #     messages=[
        #         {"role": "system", "content": "You are a space exploration expert."},
        #         {"role": "assistant", "content": "You can read the input and answer in detail."},
        #         {"role": "user", "content": prompt}
        #     ],
        #     temperature=0.1  # Fine-tune parameters as needed
        # )
        # return response.choices[0].message.content
        llm = ChatTogether(model="meta-llama/Llama-3-70b-chat-hf", temperature=0)

        messages = [
                (
                    "system", "You are a space exploration expert",
                ),
                (
                    "assistant","You can read the input and answer in detail."
                ),
                (
                    "human", f'{prompt}'
                ),
        ]
        result = llm.invoke(messages)
        return str(result)
    except Exception as e:
        return str(e)


In [None]:
start_time = time.time()  # Start timing before the request
gpt4_response = call_gpt_with_augmented_text(augmented_input)

response_time = time.time() - start_time  # Measure response time
print(f"Response Time: {response_time:.2f} seconds")  # Print response time

print('gpt-4o-mini', "Response:", gpt4_response)

In [30]:

import re
from IPython.display import display, Markdown, HTML
import markdown

In [None]:


def print_formatted_response(response):
    # Check for markdown by looking for patterns like headers, bold, lists, etc.
    markdown_patterns = [
        r"^#+\s",           # Headers
        r"^\*+",            # Bullet points
        r"\*\*",            # Bold
        r"_",               # Italics
        #r"",                # Links
        r"-\s",             # Dashes used for lists
        r"\`\`\`"           # Code blocks
    ]

    # If any pattern matches, assume the response is in markdown
    if any(re.search(pattern, response, re.MULTILINE) for pattern in markdown_patterns):
        # Markdown detected, convert to HTML for nicer display
        html_output = markdown.markdown(response)
        display(HTML(html_output))  # Use display(HTML()) to render HTML in Colab
    else:
        # No markdown detected, wrap and print as plain text
        wrapper = textwrap.TextWrapper(width=80)
        wrapped_text = wrapper.fill(text=response)

        print("Text Response:")
        print("--------------------")
        print(wrapped_text)
        print("--------------------\n")

print_formatted_response(gpt4_response)