# Gemma Model

In [1]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from dotenv import load_dotenv
load_dotenv()

ACCESS_TOKEN = os.getenv("ACCESS_TOKEN") # reads .env file with ACCESS_TOKEN=<your hugging face access token>

model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=ACCESS_TOKEN)
# quantization_config = BitsAndBytesConfig(load_in_4bit=True, 
#                                          bnb_4bit_use_double_quant=True,
#                                          bnb_4bit_compute_dtype=torch.bfloat16)

model = AutoModelForCausalLM.from_pretrained(model_id, 
                                             device_map={"": torch.device("cpu")}, 
                                            #  quantization_config=quantization_config,
                                             token=ACCESS_TOKEN)
model.eval()
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
print(f"using {device}")




  from .autonotebook import tqdm as notebook_tqdm
Downloading shards: 100%|██████████| 2/2 [00:00<00:00,  5.37it/s]
Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:13<00:00,  6.72s/it]


using cpu


In [2]:
def inference(question: str, context: str):

    if context == None or context == "":
        prompt = f"""Give a brief answer to the following question. Question: {question}"""
    else:
        prompt = f"""Using the information contained in the context, give a detailed answer to the question.
            Context: {context}.
            Question: {question}"""
    chat = [
        {"role": "user", "content": prompt},
        # { "role": "model", "content": "Recurrent Attention (RAG)** is a novel neural network architecture specifically designed" }
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        chat,
        tokenize=False,
        add_generation_prompt=True,
    )
    inputs = tokenizer.encode(
        formatted_prompt, add_special_tokens=False, return_tensors="pt"
    ).to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=250,
            do_sample=False,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    response = response[len(formatted_prompt) :]  # remove input prompt from reponse
    response = response.replace("<eos>", "")  # remove eos token
    return response


question = "What is a transformer?"
print(inference(question=question, context=""))

Sure, here's a brief answer to the question:

A transformer is an electrical device that transfers energy from one circuit to another through inductively coupled conductors. It is used to increase or decrease the voltage of an alternating current (AC) signal.


# Document Loading and Splitting

In [8]:
# from langchain.document_loaders import PyPDFLoader

# loaders = [
#     PyPDFLoader("/home/eversberg/Downloads/1706.03762.pdf"),
#     PyPDFLoader("/home/eversberg/Downloads/2005.11401.pdf"),
# ]
# pages = []
# for loader in loaders:
#     pages.extend(loader.load())

In [4]:
# Load text from files
txt_files = ["../files/test-suite.txt"]
txt_documents = []
for file in txt_files:
    with open(file, 'r') as f:
        content = f.read()
        txt_documents.append(content)

In [6]:

# Convert text into document format
from langchain.schema import Document

docs = [Document(page_content=txt, metadata={"source": file}) for txt, file in zip(txt_documents, txt_files)]


In [7]:
from langchain.text_splitter import TokenTextSplitter
# Split text into chunks
text_splitter = TokenTextSplitter(chunk_size=128, chunk_overlap=12)
split_docs = text_splitter.split_documents(docs)

In [9]:
# from langchain.text_splitter import TokenTextSplitter

# text_splitter = TokenTextSplitter(chunk_size=128, chunk_overlap=12)
# docs = text_splitter.split_documents(pages)

In [None]:
# print(docs[0].page_content)

# Embeddings and Vector Store

In [8]:

import numpy as np
from langchain_community.embeddings import (
    HuggingFaceEmbeddings
)
encoder = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2', model_kwargs = {'device': "cpu"})




In [None]:
# embeddings1 = encoder.embed_query("RAG")
# embeddings2 = encoder.embed_query(docs[0].page_content)
# print(np.dot(embeddings1, embeddings2))

In [10]:
from langchain.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy

# create FAISS index with only text documents
faiss_db = FAISS.from_documents(split_docs, encoder, distance_strategy=DistanceStrategy.DOT_PRODUCT)

In [13]:
# from langchain.vectorstores import FAISS
# from langchain_community.vectorstores.utils import DistanceStrategy
# faiss_db = FAISS.from_documents(docs, encoder, distance_strategy=DistanceStrategy.DOT_PRODUCT)

In [13]:
question = f"""Check if there is a relevant test scenario that covers the issue raised by the following jira bug ticket. 
{test_scenario}
Provide the answer in the below snytax.
If there is a relevant scenario:
'''
Scenario Id = @MSDK-xxx (Each scenario has its own id tag at the beginning)
Coverage Percantage: xx% (provide a coverage percemtage on a scale from 1 to 100, where 1 is there is very small coverage and 100 is there is exact coverage. 
Description: xxx (optional, if there is any additional comments)
'''
If there isn't any relevant scenario: 
'''There isn't any relevant test scenarios that cover the issue raised by the Jira bug ticket.
'''
"""

retrieved_docs = faiss_db.similarity_search(question, k=3)
context = "".join(doc.page_content + "\n" for doc in retrieved_docs)
print(context)

          | qa-xlarge   |

Feature: Theme Support

  Background: Install and run demo app
    Given I have environment data in "./qa-scenarios/environment-data.yaml"
    Given I have Demo App installed

  @MSDK-1400
  Scenario: As a Demo App / Pointr Maps user, I should see all components with default colors, When there is no theme related configuration in the test environment
    Given the test environment doesn't have any theme related configurations
 
 Given I have environment data in "./qa-scenarios/environment-data.yaml"
        Given I change the language of the test device to a random supported language
        Given I have Demo Apps installed

    @MSDK-1050 @sanity
    Scenario Outline: As a demo app user, I should see a flyover animation, When I launch demo app via deeplink
        When I "<action>"
        Then "Flyover
    @MSDK-220
    Scenario: As a demo app user, I should see fallback icon, When I highlight a rich POI with a broken/unsupported logo
        Given test env

In [12]:
torch.cuda.empty_cache()
print(inference(question=question, context=context))

The relevant test scenario that covers the issue raised by the Jira bug ticket is:

**Scenario ID:** @MSDK-1050
**Correlation Percentage:** 100%
**Description:** As a demo app user, I should see a flyover animation, When I launch demo app via deeplink

This scenario perfectly matches the issue described in the Jira bug ticket, where the flyover animation is not displayed when launching the demo app via deeplink.


In [None]:
print("For this answer I used the following documents:")
for doc in retrieved_docs:
    print(doc.metadata)

In [14]:
with open("../files/queries.txt", 'r') as f:
    queries = f.read()

In [23]:
query_list = queries.split("Jira id: ") [1:]

In [24]:
query_list

['SDK-7960\nTicket Title:  Mapwidget should refresh when I scan different QR codes in a row\nTicket Description: \n\n',
 'SDK-7879\nTicket Title:  Enter Building label should disappear when I cancel wayfinding session\nTicket Description: While I was  testing navigation scenarios, I ran into an issue where the Enter Building label remained on the map after I cancelled the navigation. I tried to reproduce it again but couldn’t. Please check the first 20 seconds of the recording to see the issue.\n\n',
 'SDK-7796\nTicket Title:  Zoom level is set incorrect, when I start a navigation while my heading is in the opposite direction of the path\nTicket Description: Get indoor position, Turn you heading in the opposite direction of the route,\nSelect a category and click on Take me There button of the destination POI, observe the zoom level is lower than expected.\n\n',
 'SDK-7969\nTicket Title: Level Selector should update, when I set up outdoor-to-indoor route and change the level\nTicket De

In [26]:
for test_scenario in query_list:

    question = f"""Check if there is a relevant test scenario that covers the issue raised by the following jira bug ticket. 
                    {test_scenario}
                    Provide the answer in the below snytax.
                    If there is a relevant scenario:
                    '''
                    Scenario Id = @MSDK-xxx (Each scenario has its own id tag at the beginning)
                    Coverage Percantage: xx% (provide a coverage percemtage on a scale from 1 to 100, where 1 is there is very small coverage and 100 is there is exact coverage.                     '''
                    If there isn't any relevant scenario: 
                    '''There isn't any relevant test scenarios that cover the issue raised by the Jira bug ticket.
                    '''
                    """

    retrieved_docs = faiss_db.similarity_search(question, k=3)
    context = "".join(doc.page_content + "\n" for doc in retrieved_docs)

    print(question)

    torch.cuda.empty_cache()
    print(inference(question=question, context=context))

    print("*" * 100)    

Check if there is a relevant test scenario that covers the issue raised by the following jira bug ticket. 
                    SDK-7960
Ticket Title:  Mapwidget should refresh when I scan different QR codes in a row
Ticket Description: 


                    Provide the answer in the below snytax.
                    If there is a relevant scenario:
                    '''
                    Scenario Id = @MSDK-xxx (Each scenario has its own id tag at the beginning)
                    Coverage Percantage: xx% (provide a coverage percemtage on a scale from 1 to 100, where 1 is there is very small coverage and 100 is there is exact coverage.                     '''
                    If there isn't any relevant scenario: 
                    '''There isn't any relevant test scenarios that cover the issue raised by the Jira bug ticket.
                    '''
                    
There is no relevant test scenario that covers the issue raised by the Jira bug ticket. Therefore, I cannot