In [None]:
import phoenix as px

In [None]:
import llama_index
from llama_index.core import Document
from llama_index.indices.managed.vectara import VectaraIndex,VectaraAutoRetriever
from llama_index.core.schema import TextNode
from llama_index.core.vector_stores import MetadataInfo, VectorStoreInfo
from llama_index.core.indices.service_context import ServiceContext
from llama_index.llms.together import TogetherLLM
from semantic_router import Route,RouteLayer
from semantic_router.encoders import OpenAIEncoder

from common_imports import *
import json , os , sys , time , re
import dotenv

In [None]:
px.launch_app()
llama_index.core.set_global_handler("arize_phoenix")

In [None]:
dotenv.load_dotenv()

## Review Emb

In [None]:
def convert_reviews_into_textnodes(reviews):
    # reviews : list of reviews 
    # review_documents = [Document(text=review['text'],metadata=review , doc_id=review['metadata']) for review in reviews]
    review_nodes = [TextNode(text=review['text'],metadata={
        'source':str(review['source']),
        'date_time':str(review['metadata']['at']),
        'reviewId':str(review['metadata']['reviewId']),
        'userName':str(review['metadata']['userName']),
        'rating':str(review['metadata']['score']),
        'thumbsUpCount':str(review['metadata']['thumbsUpCount']),
        'appVersion':str(review['metadata']['appVersion']),
        'replyContent':str(review['metadata']['replyContent']),
        'repliedAt':str(review['metadata']['repliedAt']),
        'text':str(review['text']),
        'app_name':str(review['app_name']),
    },id_ = review['metadata']['reviewId']
    ) for review in reviews]

    return review_nodes

In [None]:
def get_reviews_from_file(file_path):
    # file_path : path to the file containing reviews
    # Returns list of reviews
    with open(file_path,'r') as f:
        reviews = json.load(f)
    return reviews

In [211]:
class ReviewEngine:
    def __init__(
            self,
            verbose=True, 
            similarity_top_k=2,
            summary_enabled=False,
            summary_response_lang="eng",
            summary_num_results=7,
            llm_model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
            ):
        self.index = VectaraIndex(show_progress=True)
        self.vector_store_info = VectorStoreInfo(
            content_info = "App reviews from different sources",
            metadata_info = [
                MetadataInfo(
                    name = "source",
                    type = "string",
                    description="Source of the review like playstore,appstore etc"
                ),
                MetadataInfo(
                    name = "date_time",
                    type = "string",
                    description="Date and time of the review"
                ),
                MetadataInfo(
                    name = "reviewId",
                    type = "string",
                    description="Review Id"
                ),
                MetadataInfo(
                    name = "userName",
                    type = "string",
                    description="User name of the reviewer"
                ),
                MetadataInfo(
                    name = "rating",
                    type = "float",
                    description="Rating given by the reviewer"
                ),
                MetadataInfo(
                    name = "thumbsUpCount",
                    type = "int",
                    description="Number of thumbs up i.e. the relevance of the review"
                ),
                MetadataInfo(
                    name = "appVersion",
                    type = "string",
                    description="App version of the app for which review is given"
                ),
                MetadataInfo(
                    name = "replyContent",
                    type = "string",
                    description="Reply content to the review by any other user"
                ),
                MetadataInfo(
                    name = "repliedAt",
                    type = "string",
                    description="Date and time of the reply"
                ),
                MetadataInfo(
                    name = "app_name",
                    type = "string",
                    description="App name for which review is given"
                )
            ]
        )
        self.llm = TogetherLLM(
            model=llm_model_name, api_key=os.environ['TOGETHER_API_KEY']
        )

        self.verbose = verbose
        self.similarity_top_k = similarity_top_k
        self.summary_enabled = summary_enabled
        self.summary_response_lang = summary_response_lang
        self.summary_num_results = summary_num_results

        self.build()
    
    def build(self):
        self.auto_retriever = VectaraAutoRetriever(
            vector_store_info=self.vector_store_info,
            llm=self.llm,
            index=self.index,
            show_progress=True,
            summary_enabled = self.summary_enabled,
            summary_response_lang = self.summary_response_lang,
            summary_num_results = self.summary_num_results,
            verbose=self.verbose,
        )
        self.retriever = self.index.as_retriever(
            similarity_top_k=self.similarity_top_k,
            summary_enabled = self.summary_enabled,
            summary_response_lang = self.summary_response_lang,
            summary_num_results = self.summary_num_results,
            llm = self.llm,
            )
        self.query_engine = self.index.as_query_engine(similarity_top_k=5)

    def ingest_reviews(self,review_file_path,start=None,end=None):
        self.reviews = get_reviews_from_file(review_file_path)[start:end]
        self.review_nodes = convert_reviews_into_textnodes(self.reviews)
        self.index = VectaraIndex(nodes = self.review_nodes, show_progress=True)
        self.build()
    
    def ingest_nodes(self,nodes):
        self.index = VectaraIndex(nodes = nodes, show_progress=True)
        self.build()

    def run(self,query , mode:str):
        # query : query string
        # mode : 'autoretriever' or 'retriever' or 'query_engine'
        if mode == 'autoretriever':
            return self.auto_retriever.retrieve(query)
        elif mode == 'retriever':
            return self.retriever.retrieve(query)
        elif mode == 'query_engine':
            return self.query_engine.query(query)
        else:
            return "Invalid mode"

In [None]:
_review_file_path = './datas/api_result_reviews_relv_Google_Pay_Secure_UPI_payment_v0.json'
_reviews = get_reviews_from_file(_review_file_path)
_review_nodes = convert_reviews_into_textnodes(_reviews)

In [None]:
_review_nodes[0]

In [304]:
review_engine = ReviewEngine(
    verbose=True, 
    similarity_top_k=2,
    summary_enabled=False,
    summary_response_lang="eng",
    summary_num_results=7,
    llm_model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
)

LLM is explicitly disabled. Using MockLLM.
Embeddings have been explicitly disabled. Using MockEmbedding.


In [None]:
# review_file_path = './datas/api_result_reviews_relv_Grand_Theft_Auto_San_Andreas_v0.json'
# review_engine.ingest_reviews(review_file_path,end=20)

In [None]:
# ans = review_engine.run('What are the reviews from playstore',mode='autoretriever')

In [None]:
ans

## Generic JSON Engine

In [274]:
"""
JSON_Engine is a wrapper over LlamaIndex JSON Query Engine
It takes a JSON prompt and a pydanctic class name as input in the constructor
It uses OpenAI API to generate the output

run method processes the prompt and returns the output in the form of pydantic class object
"""
class JSON_Engine(BaseTool):
    def __init__(self, prompt, class_name, llm_model_name: str = "mistralai/Mixtral-8x7B-Instruct-v0.1",temperature=0.1, api_key_name = "TOGETHER_API_KEY",parse=True):
        self.output_parser = PydanticOutputParser(class_name)
        self.llm = TogetherLLM(model=llm_model_name, api_key=os.environ[api_key_name], temperature=temperature)
        # self.llm = OpenAI(model="gpt-3.5-turbo", temperature=temperature)
        self.json_prompt_str = prompt
        self.class_name = class_name
        self.json_prompt_str = self.output_parser.format(self.json_prompt_str)
        self.json_prompt_tmpl = PromptTemplate(self.json_prompt_str)
        if parse:
            self.p = QueryPipeline(chain=[self.json_prompt_tmpl, self.llm, self.output_parser], verbose=False)
        else:
            self.p = QueryPipeline(chain=[self.json_prompt_tmpl, self.llm], verbose=False)
    
    def run(self, **kwargs):
        response = self.p.run(**kwargs)
        return response

## Data Ingestion Pipeline

In [275]:
class SubContext(BaseModel):
    subContext:str = Field(...,description=
                        """
                        Subset of a large review context which is relevant to the query. 
                        It should be a full sentence containing a single aspect of the review.
                        """)
    sentiment:str = Field(...,description=
                           """
                             Sentiment of the review i.e. positive or negative
                            'positive' means the review has positive sentiment
                            'negative' means the review has negative sentiment
                            """)
    containsTechnicalTerms:bool = Field(...,description=
                                        """
                                        Whether the sub context contains technical terms or not such as any glitch, bug, feature etc.
                                        True means the sub context contains technical terms
                                        False means the sub context does not contain technical terms
                                        """)
    

class ListOfSubContext(BaseModel):
    subContexts:List[SubContext] = Field(...,description=
            """
            List of sub contexts extracted from the review context
            """)

class SubContextEngine:
    def __init__(self):
        self.engine = JSON_Engine("""
        Given a review context, extract the list of relevant sub contexts (between <<< and >>>):
        <<<
        {text}
        >>>

        Note: 
                These sub contexts should be all different aspects of the review context and should be full sentences. 
                Also provide the sentiment of each sub context and whether it contains any technical terms or not.
        """,class_name = ListOfSubContext,temperature=0.1)
    
    def __call__(self,**kwargs):
        return self.engine.run(**kwargs)    

In [276]:
class Isssue(BaseModel):
    issue:str = Field(...,description="""
    A negative aspect extracted from the review context. It should be a full sentence and complete.
    """)

    containsAnyReport:bool = Field(...,description="""
    Whether the issue contains any particular report or not. It may user is reporting some issue or bug like any glitch, crash, lag, something not working etc. 
    True means the issue contains any particular report
    False means the issue does not contain any particular report
    """)

    featureRequest:bool = Field(...,description=
    """
    Whether the issue is a feature request or not. It may user is unsatisfied with the current features and wants some new features / upgrade.
    True means the issue is a feature request
    False means the issue is not a feature request
    """)

class IssueList(BaseModel):
    issues:List[Isssue] = Field(...,description=
    """
    List of different Issues from review context
    """
    )

class IssueEngine:
    def __init__(self):
        self.engine = JSON_Engine("""
        Given a review context, extract the list of different negative aspects mentioned in the review context:
        <<<
        {text}
        >>>

        Note: 
        These aspects should be all different negative aspects of the review context and should be full sentences.
        Also they must be complete i.e. they should not be a part of bigger aspects.
        If there are no negative aspects, return an empty list. 

        """,class_name = IssueList,temperature=0.2)
    
    def __call__(self,**kwargs):
        return self.engine.run(**kwargs)

In [277]:
class ReviewDegenAndSentimentPipeline:
    def __init__(self):
        self.issue_engine = IssueEngine()
    
    def __call__(self,review):
        issues = self.issue_engine(text=review)
        issues = issues.dict()['issues']
        return issues

In [278]:
_sub_contexts = ReviewDegenAndSentimentPipeline()(review="""
Hi, After the last update, I'm not able to open the app. When I click on the login id, the UI disappear's and close the application. Kindly fix the issue ASAP. Secondly, the experience was excellent and swift before, but the cash back is very low. Everyone needs cash backs instead of irrelevant vouchers and coupons. Kindly bring these fixes ASAP. Keep growing guys. Thank you google pay.!!
""")

In [279]:
_sub_contexts

[{'issue': "I'm not able to open the app after the last update.",
  'containsAnyReport': True,
  'featureRequest': False},
 {'issue': 'The cash back is very low.',
  'containsAnyReport': False,
  'featureRequest': True},
 {'issue': 'Users prefer cash backs over irrelevant vouchers and coupons.',
  'containsAnyReport': False,
  'featureRequest': True}]

### Creating Routes for different team

In [280]:
class UtterenceList(BaseModel):
    utterenceList : List[str] = Field(...,description="""List of different utterances / use case related to the given team's work""")

class UtterenceEngine:
    def __init__(self):
        self.engine = JSON_Engine("""
        Given an app description (between ### and ###):
        ###
        {app_description}
        ###


        Given a team's scope of work (between <<< and >>>):
        <<<
        {scopes}
        >>>

        List down the more than 15 different utterances / use cases related to the given team's work and the app_description where they work.
        These utterances should be full sentences and refers to the different bugs, features, improvements, etc. related to the team's work.
        Strictly stick to the scope of work of the team.
        """,class_name = UtterenceList,temperature=0.1)
    
    def __call__(self,**kwargs):
        return self.engine.run(**kwargs)
    

In [281]:
class TechnicalSummary(BaseModel):
    summary:str = Field(...,description=
            """
            Summary of all technical details associated with the app
            """)

class TechnicalSummaryEngine:
    def __init__(self):
        self.engine = JSON_Engine("""
        Given a raw app description (between <<< and >>>):
        <<<
        {app_description}
        >>>
        
        Summarize all the technical details associated with the app into a brief summary. Only include techincal features. 
        """,class_name = TechnicalSummary,temperature=0.1)
    
    def __call__(self,**kwargs):
        return self.engine.run(**kwargs)

In [None]:
class Sem

In [282]:
class TeamRoutePipeline:
    def __init__(self, team_details_file_path:str, app_details_file_path:str,app_description=None,build_mode=False,score_threshold_config=None,score_threshold=0.82):
        self.team_details_file_path = team_details_file_path
        # load app details from the file
        with open(app_details_file_path,'r') as f:
            app_details = json.load(f)
        with open(self.team_details_file_path,'r') as f:
            self.team_details = json.load(f)

        self.app_name = app_details["title"]
        self.app_description = app_description
        self.utterance_engine = UtterenceEngine()
        self.technical_summary_engine = TechnicalSummaryEngine()

        self.routes = None
        self.route_layer = None
        self.routing_encoder = None
        self.score_threshold_config = score_threshold_config
        if score_threshold_config is None:
            self.score_threshold_config = {team:score_threshold for team in self.team_details["teams"]}

        if app_description is None:
            self.app_description = app_details["description"]
            self.summarized_app_description = self.technical_summary_engine(app_description=self.app_description).summary
        if build_mode:
            self.build()

    def build(self):
        # load team details from the file
        with open(self.team_details_file_path,'r') as f:
            self.team_details = json.load(f)
        print("Building utterances ...")
        for i,team in tqdm(enumerate(self.team_details["teams"])):
            utterance_ans = self.utterance_engine(app_description=self.summarized_app_description,scopes=str(team["scopes"])).utterenceList
            self.team_details["teams"][i]["utterances"] = utterance_ans
        # save the updated team details to the file
        with open(self.team_details_file_path,'w') as f:
            json.dump(self.team_details,f,indent=4)  

    def build_routes(self):
        with open(self.team_details_file_path,'r') as f:
            self.team_details = json.load(f)
        self.routes = []
        print("Building route layers ...")
        for i,team in tqdm(enumerate(self.team_details["teams"])):
            team_name = team["teamName"]
            
            try:
                team_utterances = team["utterances"]
            except:
                print(f"team_name : {team_name} has not utterances ... build_routes paused")
                return

            route = Route(name=team_name,utterances=team_utterances,score_threshold=self.score_threshold_config[team_name])
            self.routes.append(route)
        self.routing_encoder = OpenAIEncoder()
        self.route_layer = RouteLayer(encoder=self.routing_encoder, routes=self.routes)

    def route_text(self, text):
        if self.route_layer is None:
            print("Routes are not built yet")
            return None
        return self.route_layer(text)

In [296]:
class ReviewRoutePipeline:
    def __init__(self, team_route_obj):
        self.review_degen_pipeline = ReviewDegenAndSentimentPipeline()
        self.team_route_obj = team_route_obj

    def __call__(self,review_nodes):
        review_nodes_assigned = []
        for review_node in review_nodes:
            issues = self.review_degen_pipeline(review_node.text) 
            # assign each subset to a team
            # Loop over subsets_with_sentiment 
            teams = []
            review_node.metadata['assigned_teams'] = []
            review_node.metadata['issues'] = []
            review_node.metadata['detailed_issues'] = []
            # review_node.metadata['positive_keywords'] = []
            for issue in issues:
                print(">>> ",issue['issue'])
                if issue["containsAnyReport"] or issue['featureRequest']:
                    team_name = self.team_route_obj.route_text(issue['issue'])
                    print(team_name)
                    team_name = team_name.name
                    team_name = team_name if team_name else "General"
                    print(f"issue : {issue} assigned to Team: {team_name}")
                    teams.append(team_name)
                    review_node.metadata['issues'].append(issue['issue'])
                    review_node.metadata['detailed_issues'].append(issue)
                    review_node.metadata['assigned_teams'].append(issue)
            review_nodes_assigned.append(review_node)
        return review_nodes_assigned

In [297]:
team_route = TeamRoutePipeline(
    team_details_file_path = './datas/team_details_Google_Pay_Secure_UPI_payment.json',
    app_details_file_path = './datas/api_result_appdescr_Google_Pay_Secure_UPI_payment.json',
    score_threshold_config={
        'Payments Team': 0.1,
        'Recharges and Bill Payments Team': 0.1,
        'Rewards and Loyalty Team': 0.1,
        'Loans and Banking Team': 0.1,
        'Security and Fraud Prevention Team': 0.1,
        'UPI Scan and PIN': 0.80,
        'User Interface and User Experience': 0.0
    }
)

# team_route.build()
team_route.build_routes()

Building route layers ...


7it [00:00, 2253.10it/s]
[32m2024-04-19 01:20:39 INFO semantic_router.utils.logger local[0m


In [298]:
team_route.route_layer.get_thresholds()

{'Payments Team': 0.1,
 'Recharges and Bill Payments Team': 0.1,
 'Rewards and Loyalty Team': 0.1,
 'Loans and Banking Team': 0.1,
 'Security and Fraud Prevention Team': 0.1,
 'UPI Scan and PIN': 0.8,
 'User Interface and User Experience': 0.82}

In [299]:
_review_route_pipeline = ReviewRoutePipeline(team_route)

In [303]:
_review_nodes_routed = _review_route_pipeline(_review_nodes[:2])

>>>  not working properly from the last month
name=None function_call=None similarity_score=None
issue : {'issue': 'not working properly from the last month', 'containsAnyReport': True, 'featureRequest': False} assigned to Team: General
>>>  my keyboard does not pop up. Not at all
name=None function_call=None similarity_score=None
issue : {'issue': 'my keyboard does not pop up. Not at all', 'containsAnyReport': True, 'featureRequest': False} assigned to Team: General
>>>  I have not been able to understand the problem even after looking for it on google & Youtube
name=None function_call=None similarity_score=None
issue : {'issue': 'I have not been able to understand the problem even after looking for it on google & Youtube', 'containsAnyReport': True, 'featureRequest': False} assigned to Team: General
>>>  I'm not able to open the app. When I click on the login id, the UI disappears and close the application.


name=None function_call=None similarity_score=None
issue : {'issue': "I'm not able to open the app. When I click on the login id, the UI disappears and close the application.", 'containsAnyReport': True, 'featureRequest': False} assigned to Team: General
>>>  The cash back is very low.
>>>  Everyone needs cash backs instead of irrelevant vouchers and coupons.
name='Rewards and Loyalty Team' function_call=None similarity_score=None
issue : {'issue': 'Everyone needs cash backs instead of irrelevant vouchers and coupons.', 'containsAnyReport': False, 'featureRequest': True} assigned to Team: Rewards and Loyalty Team


In [305]:
review_engine.ingest_nodes(_review_nodes_routed)

LLM is explicitly disabled. Using MockLLM.
Embeddings have been explicitly disabled. Using MockEmbedding.


In [None]:
_review_nodes_routed

In [306]:
_review_nodes_routed

[TextNode(id_='cffa1994-02d8-4c58-b387-b82df504bf58', embedding=None, metadata={'source': 'playstore', 'date_time': '2024-03-22 20:15:35', 'reviewId': 'cffa1994-02d8-4c58-b387-b82df504bf58', 'userName': 'Athene Refer Code:- aa40c739a6', 'rating': '4', 'thumbsUpCount': '5266', 'appVersion': '219.1.3 (arm64-v8a_release_flutter)', 'replyContent': 'None', 'repliedAt': 'NaN', 'text': 'Best And Superfast App...But, not working properly from the last month. I can scan any barcode and make the payment through it. But, when opt for mobile transfer to any person, my keyboard does not pop up. Not at all. Please help, as I tried everything. I cleared its data, cache, and even unistalled & reinstalled the app. But, all in vain. I have not been able to understand the problem even after looking for it on google & Youtube.😃', 'app_name': 'Google_Pay_Secure_UPI_payment', 'assigned_teams': [{'issue': 'not working properly from the last month', 'containsAnyReport': True, 'featureRequest': False}, {'issue