In [1]:
import json
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["WORLD_SIZE"] = "1"

In [2]:
from IPython.display import display
import ipywidgets as widgets
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.groq import Groq
from llama_index.core import Settings
import warnings
import os

In [3]:
!pip freeze > requirements.txt

In [3]:
warnings.filterwarnings('ignore')


GROQ_API_KEY = os.getenv("GROQ_API_KEY")

prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Answer the question and provide additional helpful information,
based on the pieces of information, if applicable. Be succinct.

Responses should be properly formatted to be easily read.
"""

In [None]:
context = "This directory contains multiple academic documents on large language models (llms) and NLP research"

# Data ingestion: load all files from a directory
directory_path = "/home/olawale/Desktop/PROJECTS/llms/data/input/"  # Update this with your directory path
reader = SimpleDirectoryReader(input_dir=directory_path)
documents = reader.load_data()

# Split the documents into nodes
text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
nodes = text_splitter.get_nodes_from_documents(documents, show_progress=True)

In [None]:
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
Settings.llm = llm
Settings.embed_model = embed_model
vector_index = VectorStoreIndex.from_documents(documents, show_progress=True, node_parser=nodes)
vector_index.storage_context.persist(persist_dir="./storage_mini")
storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
index = load_index_from_storage(storage_context)

In [None]:
input_box = widgets.Text(
    value='Summarise the Self-Rag paper: Learning to Retrieve, Generate, and Critique through Self-Reflection',
    placeholder='Type your question here',
    description='Question:',
    disabled=False
)

output_area = widgets.Output()

def on_button_click(b):
    with output_area:
        output_area.clear_output()
        question = input_box.value
        query_prompt = prompt_template.format(context=context, question=question)
        response_text = query_engine.query(query_prompt).response
        words = response_text.split()  # Split response into words
        words_per_line = 15  # Number of words per line
        formatted_lines = [
            " ".join(words[i:i+words_per_line]) 
            for i in range(0, len(words), words_per_line)
        ]
        
        for line in formatted_lines:
            print(line)


button = widgets.Button(
    description='Ask',
    disabled=False,
    button_style='',
    tooltip='Ask the question',
    icon='check'
)

button.on_click(on_button_click)

display(input_box, button, output_area)

# Set up query engine
query_engine = index.as_query_engine()

In [7]:
query_prompt = "How does the SEyjLF-RAG model generazfgsdfhjkdgfte tegyuoxt with rjkleflection tokens to evalgyuouate the utigyuolity of retrieval"
response_text = query_engine.query(query_prompt).response

In [None]:
# answer = response_text.split("\n")
answer = response_text
answer

In [None]:
keyword_prompt = "Respond without any suffix answer, only listing the keywords in this: " + answer
keyword_text = query_engine.query(keyword_prompt).response
keyword_text

In [None]:
len(answer)

In [None]:
[sentence[3:] for sentence in answer[1:]]

Retrieval-Augmented Generation for Large
Language Models: A Survey


provide 20 different prompts that explains the self rag paper

In [16]:
prompt = '''Geophysics is the study of the Earth's internal and external physical processes using quantitative methods. It is an interdisciplinary field that combines principles from geology, physics, mathematics, and engineering to understand the Earth's structure, composition, and dynamics.

Geophysicists use a wide range of techniques, including seismic, gravitational, magnetic, and electrical methods, to investigate the Earth's interior, crust, and atmosphere. They analyze data from various sources, such as:

1. Seismic waves generated by earthquakes or artificial sources, which provide information about the Earth's internal structure and composition.
2. Gravity measurements, which help determine the distribution of mass within the Earth.
3. Magnetic field measurements, which reveal the Earth's internal magnetic field and its variations.
4. Electrical and electromagnetic measurements, which provide information about the Earth's electrical conductivity and subsurface structures.

Geophysics has many applications in various fields, including:

1. **Oil and gas exploration**: Geophysicists use seismic and other methods to locate subsurface hydrocarbon reservoirs and identify potential drilling sites.
2. **Earthquake seismology**: Geophysicists study seismic waves to understand earthquake mechanisms, locate epicenters, and assess seismic hazards.
3. **Environmental monitoring**: Geophysicists use electrical and electromagnetic methods to monitor groundwater contamination, track contaminant plumes, and detect subsurface pollutants.
4. **Climate change research**: Geophysicists study the Earth's magnetic field, paleomagnetism, and geothermal heat flow to understand the Earth's climate history and predict future changes.
5. **Natural resource management**: Geophysicists help locate and manage groundwater resources, mineral deposits, and geothermal energy sources.
6. **Geotechnical engineering**: Geophysicists use geophysical methods to investigate soil and rock properties, assess landslide risks, and design foundations for buildings and infrastructure.
7. **Planetary science**: Geophysicists study the internal structure and composition of other planets and moons in our solar system, providing insights into their formation and evolution.

Some of the key areas of research in geophysics include:

1. **Seismology**: The study of seismic waves and their applications in understanding the Earth's internal structure and earthquake processes.
2. **Gravity and magnetic fields**: The study of the Earth's gravitational and magnetic fields, which provide insights into the Earth's internal structure and composition.
3. **Electromagnetic methods**: The study of electrical and electromagnetic properties of the Earth's subsurface, which help identify subsurface structures and monitor environmental changes.
4. **Geodynamics**: The study of the Earth's internal dynamics, including plate tectonics, mantle convection, and the Earth's magnetic field.
5. **Planetary geophysics**: The study of the internal structure and composition of other planets and moons in our solar system.

Overall, geophysics is a vital field that helps us understand the Earth's internal and external processes, which is essential for addressing various environmental, economic, and societal challenges.'''

In [None]:
response_groq = llm.complete(f"List key points in this response using singlular words: {prompt}")
print(response_groq)

In [None]:
!pip install matplotlib

In [None]:
import fitz  # PyMuPDF
import matplotlib.pyplot as plt
from PIL import Image
import io

def extract_images_from_pdf(pdf_path):
    images = []
    # Open the PDF
    pdf_document = fitz.open(pdf_path)

    for page_num in range(len(pdf_document)):
        page = pdf_document[page_num]
        image_list = page.get_images(full=True)

        for img_index, img in enumerate(image_list):
            xref = img[0]
            # Extract image bytes
            base_image = pdf_document.extract_image(xref)
            image_bytes = base_image["image"]
            image = Image.open(io.BytesIO(image_bytes))
            images.append((page_num, img_index, image))

    pdf_document.close()
    return images

def plot_images(images):
    for page_num, img_index, img in images:
        plt.figure(figsize=(8, 6))
        plt.title(f"Page {page_num + 1}, Image {img_index + 1}")
        plt.imshow(img)
        plt.axis('off')
        plt.show()

# Specify the PDF file path
pdf_path = "/home/olawale/Desktop/PROJECTS/llms/beyond-abstracts/data/upload/2312.10997v5.pdf"

# Extract and plot images
extracted_images = extract_images_from_pdf(pdf_path)
plot_images(extracted_images)

In [None]:
import dash
import dash_cytoscape as cyto
import dash_html_components as html

app = dash.Dash(__name__)

default_stylesheet = [
    {
        "selector": "node",
        "style": {
            "width": "mapData(size, 0, 100, 20, 60)",
            "height": "mapData(size, 0, 100, 20, 60)",
            "content": "data(label)",
            "font-size": "12px",
            "text-valign": "center",
            "text-halign": "center",
        }
    }
]


# default_stylesheet = [{
#     'selector': 'node',
#     'style': {
#         'label': 'data(label)',
#         "background-fill": "radial-gradient",
#         "background-gradient-stop-colors": 'data(background_color)',
#         "background-gradient-stop-positions": '0, 80, 90, 100',
#         'color': 'data(color)',
#         'text-valign': 'center',
#         'text-halign': 'center',
#         'font-size': 'data(font_size)',
#         'border-color': 'data(border_color)',
#         'border-width': 1.5,
#         "border-opacity": 1,
#         'width': 'data(size)',
#         'height': 'data(size)',
#         'opacity': 0.98
#     }
# }, {
#     'selector': 'edge',
#     'style': {
#         "line-fill": "linear-gradient",
#         "line-gradient-stop-colors": 'data(colors)',
#         "line-gradient-stop-positions": "10, 20, 30, 40, 50, 60, 70, 80, 90",
#         'width': 2.5,
#         'curve-style': 'bezier',
#         'source-endpoint': 'outside-to-node',
#         'target-endpoint': 'outside-to-node'
#     }
# }]

app.layout = html.Div([
    cyto.Cytoscape(
        id="cytospace",
        elements=[
            {'data': {'id': 'one', 'label': 'Node 1', 'size': 40}, 'position': {'x': 50, 'y': 50}},
            {'data': {'id': 'two', 'label': 'Node 2', 'size': 100}, 'position': {'x': 200, 'y': 200}},
            {'data': {'id': 'three', 'label': 'Node 3', 'size': 10}, 'position': {'x': 100, 'y': 100}},
            {'data': {'id': 'four', 'label': 'Node 4', 'size': 300}, 'position': {'x': 160, 'y': 50}},
            {'data': {'id': 'five', 'label': 'Node 5', 'size': 200}, 'position': {'x': 200, 'y': 100}},
            {'data': {'source': 'one', 'target': 'two','label': 'Node 1 to 2'}},
            {'data': {'source': 'one', 'target': 'four','label': 'Node 1 to 2'}}
        ],
        layout={'name':'preset'},
        stylesheet=default_stylesheet
    )
])

if __name__ == "__main__":
    app.run_server(debug=True)

In [None]:
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(
    url="https://77aa90d2-b4ac-4ef7-b373-47587d0de30d.eu-central-1-0.aws.cloud.qdrant.io:6333", 
    api_key="kNn8H63GLggXx6hwgHXoOWvKwnAajt784uOBOu54Ub6NLV0hWuVuOg",
)

print(qdrant_client.get_collections())