In [12]:
import os
import re
import ollama

In [13]:
def readtextfiles(path):
  text_contents = {}
  directory = os.path.join(path)

  for filename in os.listdir(directory):
    if filename.endswith(".txt"):
      file_path = os.path.join(directory, filename)

      with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()

      text_contents[filename] = content

  return text_contents


In [14]:
def chunksplitter(text, chunk_size=100):
  words = re.findall(r'\S+', text)

  chunks = []
  current_chunk = []
  word_count = 0

  for word in words:
    current_chunk.append(word)
    word_count += 1

    if word_count >= chunk_size:
      chunks.append(' '.join(current_chunk))
      current_chunk = []
      word_count = 0

  if current_chunk:
    chunks.append(' '.join(current_chunk))

  return chunks

In [15]:
def getembedding(chunks):
  embeds = ollama.embed(model="nomic-embed-text", input=chunks)
  return embeds.get('embeddings', [])

In [16]:
import chromadb

chromaclient = chromadb.HttpClient(host="localhost", port=8000)
textdocspath = "./text"
text_data = readtextfiles(textdocspath)

In [20]:
if "buildragwithpython" in [collection.name for collection in chromaclient.list_collections()]:
    chromaclient.delete_collection("buildragwithpython")
collection = chromaclient.create_collection(name="buildragwithpython", metadata={"hnsw:space": "cosine"})


for filename, text in text_data.items():
  chunks = chunksplitter(text)
  embeds = getembedding(chunks)
  chunknumber = list(range(len(chunks)))
  ids = [filename + str(index) for index in chunknumber]
  metadatas = [{"source": filename} for index in chunknumber]
  collection.add(ids=ids, documents=chunks, embeddings=embeds, metadatas=metadatas)

In [27]:
import sys, chromadb, ollama

chromaclient = chromadb.HttpClient(host="localhost", port=8000)
collection = chromaclient.get_or_create_collection(name="buildragwithpython")

query = "What were popular contributions of abbasid dynasty in science?".join(sys.argv[1:])
queryembed = ollama.embed(model="nomic-embed-text", input=query)['embeddings']

relateddocs = '\n\n'.join(collection.query(query_embeddings=queryembed, n_results=10)['documents'][0])
prompt = f"{query} - Answer that question using the following text as a resource: {relateddocs}"
noragoutput = ollama.generate(model="mistral", prompt=query, stream=False)
print(f"Answered without RAG: {noragoutput['response']}")
print("---")
ragoutput = ollama.generate(model="phi3", prompt=prompt, stream=False)

print(f"Answered with RAG: {ragoutput['response']}")

Answered without RAG: 1. House of Wisdom (Bayt al-Hikmah): The Abbasid Caliphate established the House of Wisdom, a center for translation and research, where ancient Greek, Persian, Indian, and Syriac works were translated into Arabic. This facilitated the dissemination of knowledge across cultures and contributed to the Golden Age of Islam.

2. Translation Movement: The Abbasids sponsored translations from multiple languages into Arabic, including works by Aristotle, Plato, Galen, Euclid, Ptolemy, and others in various fields such as mathematics, astronomy, medicine, philosophy, and literature. This had a significant impact on the scientific development not only within the Islamic world but also in Europe during the Middle Ages.

3. Mathematics: Al-Khwarizmi, an Abbasid scholar, developed the decimal number system and algebraic techniques that were essential for the development of modern mathematics. His work laid the foundations for the fields of algebra, trigonometry, and calculus.