In [104]:

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

from langchain_community.vectorstores import Chroma
from langchain.schema import Document
from langchain_text_splitters import MarkdownHeaderTextSplitter
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import GPT4AllEmbeddings

from tqdm.autonotebook import tqdm
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
gpt4all_embd = GPT4AllEmbeddings()



In [105]:
# Specify the path to the plain text file
feature_file_path = 'sample.feature'

# Open and read the file
with open(feature_file_path, 'r') as file:
    content = file.read()

# Print the content of the file
print(content)

Feature: Login
  As a new user
  I want to log in to the website
  So that the system can remember my data

  Scenario #1: Successful Log in to the website
    Given A user brings up the login pop-up
    When A user clicks Sign-in
    And A user enters a valid email <email> and password <password>
    And A user clicks Sign-in
    Then A user should be successfully logged into the site

  Scenario #2: Unsuccessful Log in to the website
    Given A user brings up the login pop-up
    When A user enters an invalid email <email> and password <password>
    And A user clicks Sign-in
    Then A user should not be successfully logged into the site


In [106]:
document = Document(page_content=content)
print(document)

page_content='Feature: Login
  As a new user
  I want to log in to the website
  So that the system can remember my data

  Scenario #1: Successful Log in to the website
    Given A user brings up the login pop-up
    When A user clicks Sign-in
    And A user enters a valid email <email> and password <password>
    And A user clicks Sign-in
    Then A user should be successfully logged into the site

  Scenario #2: Unsuccessful Log in to the website
    Given A user brings up the login pop-up
    When A user enters an invalid email <email> and password <password>
    And A user clicks Sign-in
    Then A user should not be successfully logged into the site'


In [107]:
headers_to_split_on = [
    ("Scenario", "Scenario Outline")
]

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on, strip_headers=False
)

md_header_splits = markdown_splitter.split_text(content)
md_header_splits

[Document(metadata={}, page_content='Feature: Login\nAs a new user\nI want to log in to the website\nSo that the system can remember my data'),
 Document(metadata={'Scenario Outline': '#1: Successful Log in to the website'}, page_content='Scenario #1: Successful Log in to the website\nGiven A user brings up the login pop-up\nWhen A user clicks Sign-in\nAnd A user enters a valid email <email> and password <password>\nAnd A user clicks Sign-in\nThen A user should be successfully logged into the site'),
 Document(metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}, page_content='Scenario #2: Unsuccessful Log in to the website\nGiven A user brings up the login pop-up\nWhen A user enters an invalid email <email> and password <password>\nAnd A user clicks Sign-in\nThen A user should not be successfully logged into the site')]

In [110]:
# Create a vector store from the documents
vectorstore = Chroma.from_documents(documents=md_header_splits, embedding=gpt4all_embd)


In [109]:
# Reinitialize the vector store to clear it
vectorstore = Chroma.from_documents(documents=[], embedding=gpt4all_embd)

IndexError: list index out of range in upsert.

In [111]:
retriever = vectorstore.as_retriever()

In [112]:
query = "Scenario where my email is not registered and I try to login"

In [114]:
# Retrieve relevant content using the similarity_search method
results = retriever.invoke(query, top_k=1)
results

[Document(metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}, page_content='Scenario #2: Unsuccessful Log in to the website\nGiven A user brings up the login pop-up\nWhen A user enters an invalid email <email> and password <password>\nAnd A user clicks Sign-in\nThen A user should not be successfully logged into the site'),
 Document(metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}, page_content='Scenario #2: Unsuccessful Log in to the website\nGiven A user brings up the login pop-up\nWhen A user enters an invalid email <email> and password <password>\nAnd A user clicks Sign-in\nThen A user should not be successfully logged into the site'),
 Document(metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}, page_content='Scenario #2: Unsuccessful Log in to the website\nGiven A user brings up the login pop-up\nWhen A user enters an invalid email <email> and password <password>\nAnd A user clicks Sign-in\nThen A user should not be

In [115]:
# Calculate similarity scores manually
query_embedding = gpt4all_embd.embed_query("Scenario where my email is not registered and I try to login")
document_embeddings = [gpt4all_embd.embed_query(result.page_content) for result in results]

similarity_scores = cosine_similarity([query_embedding], document_embeddings).flatten()

# Print the results with similarity scores
for result, score in zip(results, similarity_scores):
    print(f"Document: {result}\nSimilarity Score: {score}\n")

Document: page_content='Scenario #2: Unsuccessful Log in to the website
Given A user brings up the login pop-up
When A user enters an invalid email <email> and password <password>
And A user clicks Sign-in
Then A user should not be successfully logged into the site' metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}
Similarity Score: 0.5796955321258643

Document: page_content='Scenario #2: Unsuccessful Log in to the website
Given A user brings up the login pop-up
When A user enters an invalid email <email> and password <password>
And A user clicks Sign-in
Then A user should not be successfully logged into the site' metadata={'Scenario Outline': '#2: Unsuccessful Log in to the website'}
Similarity Score: 0.5796955321258643

Document: page_content='Scenario #2: Unsuccessful Log in to the website
Given A user brings up the login pop-up
When A user enters an invalid email <email> and password <password>
And A user clicks Sign-in
Then A user should not be successfully l