In [1]:
import numpy as np
import pandas as pd

# Loading the Dataset


In [28]:
import markdown_to_json
import pprint

with open('./data/OmniFlow.md', 'r') as file:
    data = file.read()
    
# print(data)

md_dict = markdown_to_json.dictify(data)

# pprint.pprint(md_dict)

# DFS into the dictionary 
# start from the first set of keys?
def dfs_dict(d, path):
    if path is None:
        path = []
        
    for key, value in d.items():
        # path.append({"key": key, "value": str(value)})
        
        if isinstance(value, dict):
            # Recursively apply DFS if the value is another dictionary
            dfs_dict(value, path)
        else:
            # Reached the end
            # cleanup the value and key
            
            path.append({"key": key, "value": str(value)})

all_md_keys_dict = []        
dfs_dict(md_dict, all_md_keys_dict)

pprint.pprint(all_md_keys_dict)

[{'key': 'Introduction',
  'value': 'Welcome to the OmniFlow Documentation. OmniFlow is an all-in-one '
           'workflow automation platform designed to help businesses '
           'streamline their operations by automating repetitive tasks, '
           'integrating with existing tools, and providing real-time '
           'analytics. This guide will walk you through the features, '
           'installation, and usage of OmniFlow.'},
 {'key': 'Table of Contents',
  'value': "['[OmniFlow Documentation](#omniflow-documentation)', "
           "['[Introduction](#introduction)', '[Table of "
           "Contents](#table-of-contents)', '[1. Overview](#1-overview)', '[2. "
           "Installation](#2-installation)', ['[System "
           "Requirements](#system-requirements)', '[Installation "
           "Steps](#installation-steps)'], '[3. Getting "
           "Started](#3-getting-started)', ['[Creating Your First "
           "Workflow](#creating-your-first-workflow)', '[Adding Team

In [29]:
md_df = pd.DataFrame.from_dict(all_md_keys_dict)
md_df['id'] = md_df.index
md_df.head(5)

Unnamed: 0,key,value,id
0,Introduction,Welcome to the OmniFlow Documentation. OmniFlo...,0
1,Table of Contents,['[OmniFlow Documentation](#omniflow-documenta...,1
2,1. Overview,OmniFlow is built to adapt to the unique needs...,2
3,System Requirements,['**Operating System:** Windows Server 2016 or...,3
4,Installation Steps,['**Download the Installer:** Access the lates...,4


# ChromaDB

In [30]:
import chromadb
from chromadb.config import Settings

In [31]:
chroma_client = chromadb.PersistentClient(path="./db")

In [32]:
collection_name = "md_collection"

# delete the collection if it already exists
if len(chroma_client.list_collections()) > 0 and collection_name in [chroma_client.list_collections()[0].name]:
        chroma_client.delete_collection(name=collection_name)
collection = chroma_client.create_collection(name=collection_name)

In [33]:
# some constants
DOCUMENT="value"
TOPIC="key"

In [34]:
# populate the collection
collection.add(
    documents=md_df[DOCUMENT].tolist(),
    metadatas=[{TOPIC: topic} for topic in md_df[TOPIC].tolist()],
    ids=[f"id{x}" for x in range(len(md_df))],
)

# Querying the database

In [37]:
import pprint
results = collection.query(query_texts=["how to install"], n_results=3 )

pprint.pprint(results)

{'data': None,
 'distances': [[1.4127005832468031, 1.4198544050607402, 1.421332467839799]],
 'documents': [['[\'Go to the "Team" section from the settings menu.\', '
                '\'Click "Invite Team Member."\', \'Enter the email address of '
                'the team member and assign their role (e.g., Admin, Manager, '
                "User).', 'Send the invitation to grant access.']",
                "['**Download the Installer:** Access the latest version of "
                'OmniFlow from our official website '
                "[www.omniflow.com](https://www.omniflow.com).', '**Run the "
                'Installer:** Open the downloaded file and follow the '
                "on-screen instructions to install OmniFlow.', '**Initial "
                'Setup:** After installation, launch OmniFlow and follow the '
                "setup wizard to configure your workspace.', '**Create an "
                'Admin Account:** Set up your admin account to begin using '
               

# Loading the Model and Creating the Prompt

In [39]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#model_id = "databricks/dolly-v2-3b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
lm_model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)

In [40]:
pipe = pipeline(
    "text-generation",
    model=lm_model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    device_map="auto",
)

# Creating the Extended Prompt

In [98]:
def generate_extended_prompt(question, context_to_lookup=None, results_to_use=1):
  context = ""
  # lookup the vector database for semantically similar data
  if (context_to_lookup):
    ctxs = collection.query(query_texts=[context_to_lookup], n_results=results_to_use)
    # join all the results
    context = " ".join([f"#{str(i)}" for i in ctxs["documents"][0]])

  # populate the prompt template
  prompt_template = f"""
  Relevant context: {context}
  Considering the above context, answer the following question.
  Question: {question}
  """
  return prompt_template

In [109]:
question = "How much RAM is needed for Omniflow?"
ctx_lookup = "RAM"
ext_prompt = generate_extended_prompt(question, ctx_lookup, 1)
print(ext_prompt)


  Relevant context: #['**Operating System:** Windows Server 2016 or later, macOS 11.0 or later, Linux (CentOS 8 or later)', '**RAM:** 8 GB minimum', '**Disk Space:** 1 GB minimum', '**Internet Connection:** Required for cloud-based features']
  Considering the above context, answer the following question.
  Question: How much RAM is needed for Omniflow?
  


In [110]:
lm_response = pipe(ext_prompt)
print(lm_response[0]["generated_text"])


  Relevant context: #['**Operating System:** Windows Server 2016 or later, macOS 11.0 or later, Linux (CentOS 8 or later)', '**RAM:** 8 GB minimum', '**Disk Space:** 1 GB minimum', '**Internet Connection:** Required for cloud-based features']
  Considering the above context, answer the following question.
  Question: How much RAM is needed for Omniflow?
  
  Answer: The minimum RAM requirement for Omniflow is 8 GB.
