In [1]:
import streamlit as st
import tempfile
import os
from langchain_community.vectorstores import FAISS
# from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings, AzureChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from typing import List, Optional

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field



In [2]:
from typing import List, Optional

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field






In [3]:
def load_document(tmp_file_path):
  # with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
  #     tmp_file.write(file.getvalue())
  #     tmp_file_path = tmp_file.name

  loader = PyPDFLoader(tmp_file_path)
  documents = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
  docs = text_splitter.split_documents(documents)
  
  # os.unlink(tmp_file_path)
  return docs

def create_vector_db(docs, path):
  embedding_function = AzureOpenAIEmbeddings(
            openai_api_type = "azure",
            openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
            deployment = "text-embedding-ada-002",
            model = "text-embedding-ada-002"
        )
  db = FAISS.from_documents(docs, embedding_function)
  db.save_local(path)
  return db

def load_vector_db(path):
  embedding_function = AzureOpenAIEmbeddings(
            openai_api_type = "azure",
            openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
            deployment = "text-embedding-ada-002",
            model = "text-embedding-ada-002"
        )
  return FAISS.load_local(path, embedding_function, allow_dangerous_deserialization=True)

def analyze_eligibility(rfp_content, proposal_content):
  llm= AzureChatOpenAI(
                openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
                openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
                azure_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
                temperature = 0,
                
            )

  template = """
  You are an AI contract analyzer. Your task is to compare the eligibility criteria listed in a Request for Proposal (RFP) with the details provided in a proposal.

  RFP Content:
  {rfp_content}

  Proposal Content:
  {proposal_content}

  For each eligibility criterion in the RFP, provide the following:

  Eligibility Criterion: [Insert eligibility criterion from RFP]
  Eligibility Met (Yes/No): [Yes/No]
  Reason: [Provide a detailed explanation of how the eligibility criterion is met or not met based on the proposal]

  Please provide your analysis in a clear, structured format.
  """

  prompt = PromptTemplate(input_variables=["rfp_content", "proposal_content"], template=template)
  # chain = LLMChain(llm=llm, prompt=prompt)

  extarctor_chain=prompt|llm.with_structured_output(
    schema=EligibilityData,
    include_raw=False
  )
  extarctor_chain.invoke({'rfp_content':rfp_content, 'proposal_content':proposal_content})
  
  return extarctor_chain.invoke({'rfp_content':rfp_content, 'proposal_content':proposal_content})


In [4]:
rfp_file ="../data/tendernotice-1-11713759672.pdf"
# proposal_file = st.file_uploader("Upload Proposal (PDF)", type="pdf")
# Load and process RFP
rfp_docs = load_document(rfp_file)
rfp_db = create_vector_db(rfp_docs, "../vectorstore/RFP")


# Load and process Proposal
proposal_file= "../data/Proposal for Conducting Market Study on Artificial Intelligence and Competition.pdf"
proposal_docs = load_document(proposal_file)
proposal_db = create_vector_db(proposal_docs, "../vectorstore/Proposals/")

In [5]:
# Retrieve relevant content
rfp_content = rfp_db.similarity_search("eligibility criteria", k=20)
proposal_content = proposal_db.similarity_search("company background and qualifications", k=20)

In [6]:
rfp_content

[Document(metadata={'source': '../data/tendernotice-1-11713759672.pdf', 'page': 11}, page_content='ii. CCI’s nominated Officers will be part of “Study Team” to ensure timely completion \nof the study and quality assessment of the submissions made by the Agency.  \n \n \n \nVIII.   PRE -QUALIFICATION/ ELIGIBILITY CRITERIA  \n \nThe pre -qualification/ eligibility criteria for the agencies are provided in the table  below:  \n \nS. No.  Pre-Qualification Criteria  Proof Required  \n1. A minimum  of 5 years of existence  \n Copy of Certificate of \nIncorporation/ Registration/ MoA, \nas applicable.  \n2. The Agency should have a minimum Annual \nTurnover of Rs. 50 Cr. during each of the  3 \nfinancial years  i.e., FY 2020 -21, FY 2021 -22 \nand FY 2022 -23 Copies of  financial statements \nduly audited by statutory auditors . \nIn the absence of the same,  \ncertificates issued by chartered \naccountants  for the relevant years , \nas per Annexure -1 \n3. The Agency should have co mpleted

In [7]:
# Combine retrieved content
rfp_text = " ".join([doc.page_content for doc in rfp_content])
proposal_text = " ".join([doc.page_content for doc in proposal_content])

In [8]:
rfp_text

'ii. CCI’s nominated Officers will be part of “Study Team” to ensure timely completion \nof the study and quality assessment of the submissions made by the Agency.  \n \n \n \nVIII.   PRE -QUALIFICATION/ ELIGIBILITY CRITERIA  \n \nThe pre -qualification/ eligibility criteria for the agencies are provided in the table  below:  \n \nS. No.  Pre-Qualification Criteria  Proof Required  \n1. A minimum  of 5 years of existence  \n Copy of Certificate of \nIncorporation/ Registration/ MoA, \nas applicable.  \n2. The Agency should have a minimum Annual \nTurnover of Rs. 50 Cr. during each of the  3 \nfinancial years  i.e., FY 2020 -21, FY 2021 -22 \nand FY 2022 -23 Copies of  financial statements \nduly audited by statutory auditors . \nIn the absence of the same,  \ncertificates issued by chartered \naccountants  for the relevant years , \nas per Annexure -1 \n3. The Agency should have co mpleted  a \nminimum of 5 studies/ projects (AI \nrelated/tech -policy related/competition \nrelated/sect

In [9]:
llm= AzureChatOpenAI(
            openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
            openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
            azure_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
            temperature = 0,
            
        )

template = """
You are an AI contract analyzer. Your task is to Find all the eligibility criteria listed in a Request for Proposal (RFP).

RFP Content:
{rfp_text}

"""

prompt = PromptTemplate(input_variables=["rfp_text"], template=template)
chain = LLMChain(llm=llm, prompt=prompt)

rfp_result=chain.run(rfp_text=rfp_text)

from IPython.display import Markdown, display
display(Markdown(rfp_result))

  warn_deprecated(
  warn_deprecated(


The eligibility criteria listed in the Request for Proposal (RFP) are as follows:

1. **A minimum of 5 years of existence**
   - **Proof Required:** Copy of Certificate of Incorporation/Registration/MoA, as applicable.

2. **The Agency should have a minimum Annual Turnover of Rs. 50 Cr. during each of the 3 financial years i.e., FY 2020-21, FY 2021-22, and FY 2022-23**
   - **Proof Required:** Copies of financial statements duly audited by statutory auditors. In the absence of the same, certificates issued by chartered accountants for the relevant years, as per Annexure-1.

3. **The Agency should have completed a minimum of 5 studies/projects (AI related/tech-policy related/competition related/sectoral studies for policy purposes)**
   - **Proof Required:** Not explicitly mentioned in the provided text, but typically would include project completion reports or similar documentation.

4. **The Agency should not have been blacklisted by Central/State Government Departments/Undertakings/Court of Law**
   - **Proof Required:** No Conviction/Debarment Certificate duly signed by the Authorised Signatory of the Agency, as per Annexure-3.

5. **The agency should have a valid PAN and Service Tax/GST Registration in India (if applicable)**
   - **Proof Required:** Self-attested copy of PAN card and Service Tax/GST Registration certificate (if applicable).

In [10]:
llm= AzureChatOpenAI(
            openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
            openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
            azure_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
            temperature = 0,
            
        )

template = """
You are an AI contract analyzer. Your task is to compare the eligibility criteria listed in a Request for Proposal (RFP) with the details provided in a proposal.

RFP Content:
{rfp_result}

Proposal Content:
{proposal_content}

For each eligibility criterion in the RFP, provide the following:

Eligibility Criterion: [Insert eligibility criterion from RFP]
Eligibility Met (Yes/No): [Yes/No]
Reason: [Provide a detailed explanation of how the eligibility criterion is met or not met based on the proposal]

Please provide your analysis in a clear, structured format.

"""

prompt = PromptTemplate(input_variables=["rfp_result", "proposal_content"], template=template)
chain = LLMChain(llm=llm, prompt=prompt)
prompt = PromptTemplate(input_variables=["rfp_content", "proposal_content"], template=template)
# chain = LLMChain(llm=llm, prompt=prompt)

# extarctor_chain=prompt|llm.with_structured_output(
# schema=EligibilityData,
# include_raw=False
# )
extarctor_chain=prompt|llm
result=extarctor_chain.invoke({'rfp_result':rfp_result, 'proposal_content':proposal_content})


In [11]:
from IPython.display import display, Markdown
display(Markdown(result.content))


Certainly! Below is the structured analysis comparing the eligibility criteria listed in the RFP with the details provided in the proposal.

### Eligibility Criterion 1: A minimum of 5 years of existence
**Eligibility Met (Yes/No):** Yes  
**Reason:** The proposal states that Quantum Analytics Consulting has over 15 years of global experience. Additionally, Annexure 1 includes the Certificate of Incorporation, which serves as proof of the company's existence for more than 5 years.

### Eligibility Criterion 2: The Agency should have a minimum Annual Turnover of Rs. 50 Cr. during each of the 3 financial years i.e., FY 2020-21, FY 2021-22, and FY 2022-23
**Eligibility Met (Yes/No):** No  
**Reason:** The proposal only includes financial statements for FY 2022-23 (Annexure 2). There is no mention or inclusion of financial statements or certificates for FY 2020-21 and FY 2021-22, which are required to meet this criterion.

### Eligibility Criterion 3: The Agency should have completed a minimum of 5 studies/projects (AI related/tech-policy related/competition related/sectoral studies for policy purposes)
**Eligibility Met (Yes/No):** Yes  
**Reason:** The proposal mentions that Quantum Analytics Consulting has conducted over 50 studies globally on technology's impact on market structures, including ten in AI and competition. Annexure 3 includes copies of completion certificates for relevant past projects, which supports this claim.

### Eligibility Criterion 4: The Agency should not have been blacklisted by Central/State Government Departments/Undertakings/Court of Law
**Eligibility Met (Yes/No):** No  
**Reason:** The proposal does not include a No Conviction/Debarment Certificate duly signed by the Authorised Signatory of the Agency, as per Annexure-3. This document is required to confirm that the agency has not been blacklisted.

### Eligibility Criterion 5: The agency should have a valid PAN and Service Tax/GST Registration in India (if applicable)
**Eligibility Met (Yes/No):** No  
**Reason:** The proposal does not include a self-attested copy of the PAN card and Service Tax/GST Registration certificate. These documents are necessary to meet this eligibility criterion.

### Summary
Based on the analysis, Quantum Analytics Consulting meets the eligibility criteria for the minimum years of existence and the completion of relevant projects. However, the proposal lacks the necessary financial statements for all required years, the No Conviction/Debarment Certificate, and the PAN and Service Tax/GST Registration documents. Therefore, the proposal does not fully meet all the eligibility criteria outlined in the RFP.

#### Structured Output-1 

In [12]:
class EligibilityCriterion(BaseModel):
  criterion: str = Field(..., description="Description of the eligibility criterion.")
  eligibility_met: str = Field(..., description="Whether the eligibility criterion is met (Yes/No).")
  reason: str = Field(..., description="Reason for the eligibility status.")

class EligibilityData(BaseModel):
  eligibility_criteria: List[EligibilityCriterion]

In [13]:
llm= AzureChatOpenAI(
            openai_api_key = os.getenv("AZURE_OPENAI_API_KEY"),
            openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
            azure_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
            temperature = 0,
            
        )

template = """
You are an AI contract analyzer. Your task is to compare the eligibility criteria listed in a Request for Proposal (RFP) with the details provided in a proposal.

RFP Content:
{rfp_result}

Proposal Content:
{proposal_content}

For each eligibility criterion in the RFP, provide the following:

Eligibility Criterion: [Insert eligibility criterion from RFP]
Eligibility Met (Yes/No): [Yes/No]
Reason: [Provide a detailed explanation of how the eligibility criterion is met or not met based on the proposal]

Please provide your analysis in a clear, structured format.

"""

prompt = PromptTemplate(input_variables=["rfp_result", "proposal_content"], template=template)


extarctor_chain=prompt|llm.with_structured_output(
schema=EligibilityData,
include_raw=False
)

result=extarctor_chain.invoke({'rfp_result':rfp_result, 'proposal_content':proposal_content})


In [14]:
result

EligibilityData(eligibility_criteria=[EligibilityCriterion(criterion='A minimum of 5 years of existence', eligibility_met='Yes', reason='The proposal states that Quantum Analytics Consulting has over 15 years of global experience. This is supported by the Certificate of Incorporation provided in Annexure 1.'), EligibilityCriterion(criterion='The Agency should have a minimum Annual Turnover of Rs. 50 Cr. during each of the 3 financial years i.e., FY 2020-21, FY 2021-22, and FY 2022-23', eligibility_met='No', reason='The proposal only includes financial statements for FY 2022-23 in Annexure 2. There is no information provided for FY 2020-21 and FY 2021-22.'), EligibilityCriterion(criterion='The Agency should have completed a minimum of 5 studies/projects (AI related/tech-policy related/competition related/sectoral studies for policy purposes)', eligibility_met='Yes', reason="The proposal mentions that Quantum Analytics Consulting has conducted over 50 studies globally on technology's imp

#### Structured Output- JSON-1

In [15]:

from langchain_core.output_parsers import JsonOutputParser
parser = JsonOutputParser(pydantic_object=EligibilityData)

template = """
You are an AI contract analyzer. Your task is to compare the eligibility criteria listed in a Request for Proposal (RFP) with the details provided in a proposal.

RFP Content:
{rfp_result}

Proposal Content:
{proposal_content}

For each eligibility criterion in the RFP, provide the following:

Eligibility Criterion: [Insert eligibility criterion from RFP]
Eligibility Met (Yes/No): [Yes/No]
Reason: [Provide a detailed explanation of how the eligibility criterion is met or not met based on the proposal]

Please provide your analysis in a clear, structured format.
\n{format_instructions}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["rfp_content", "proposal_content"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

json_chain=prompt|llm|parser

result=json_chain.invoke({'rfp_result':rfp_result, 'proposal_content':proposal_content})
result

{'eligibility_criteria': [{'criterion': 'A minimum of 5 years of existence',
   'eligibility_met': 'Yes',
   'reason': 'The proposal states that Quantum Analytics Consulting has over 15 years of global experience. Additionally, Annexure 1 includes the Certificate of Incorporation, which serves as proof of existence.'},
  {'criterion': 'The Agency should have a minimum Annual Turnover of Rs. 50 Cr. during each of the 3 financial years i.e., FY 2020-21, FY 2021-22, and FY 2022-23',
   'eligibility_met': 'No',
   'reason': 'The proposal only includes the financial statement for FY 2022-23 (Annexure 2). There is no mention of financial statements or certificates for FY 2020-21 and FY 2021-22.'},
  {'criterion': 'The Agency should have completed a minimum of 5 studies/projects (AI related/tech-policy related/competition related/sectoral studies for policy purposes)',
   'eligibility_met': 'Yes',
   'reason': "The proposal mentions that Quantum Analytics Consulting has conducted over 50 stud

# APPENDIC_ RANDOM TEST

In [21]:
text="Proposal for Conducting Market Study on Artificial Intelligence and \nCompetition  \nSubmitted to:  Competition Commission of India  \nPrepared by:  Quantum Analytics Consulting  \nDate:  July 25, 2024  \nExecutive Summary  \nQuantum Analytics Consulting proposes to undertake a comprehensive market study on the \nimpact of Artificial Intelligence (AI) on competition in key sectors. With over 15 years of global \nexperience and a dedicated AI research division, our firm is uniquely positioned to deliver \ninsights that will inform policy and regulatory approaches to foster fair competition and \ninnovation in AI -driven markets. The firm has successfully delivered more than 20 projects  in \nAI/Analytics sector in last 3 years.  \nFirm Profile  and Qualifications  \n• Global Presence:  Offices in 20 countries, 300+ consultants worldwide.  \n• Expertise:  Specialized in AI, data science, competition law, and technology policy.  \n• Past Projects:  Over 50 studies conducted globally on technology's impact on mar ket \nstructures, including ten in AI and competition.  \n• Revenue:  \nFY 2020 -21 50 Cr  \nFY 2021 -22 52 Cr  \nFY 2022 -23 58 Cr  \n \nProject Understanding and Approach  \n• Objectives:  To assess how AI shapes competition in various sectors and identify \npotential regulatory responses.  \n• Methodology:   \n1. Literature Review:  Access to major databases like JSTOR, IEEE Xplore, and \nproprietary tech industry reports.  \n2. Stakeholder Consultation:  Interviews with 50+ firms including AI startups, tech \ngiants, and policy bodies.  3. Data Collection and Analysis:  Use of statistical software for quantitative \nanalysis and thematic analysis for qualitative insights.  \n4. Reporting:  Drafting an interim report for fee dback, followed by a detailed final \nreport with policy recommendations.  \nProject Team and Management Structure  \n• Project Leader:  Dr. Lisa Ray, Ph.D. in AI from MIT, 12 years of experience in AI \nresearch.  \n• Team Members:  Includes data scientists, AI experts, competition economists, and legal \nadvisors.  \n• Organizational Chart:  [Insert a simple diagram showing the team structure]  \nProjec t Timeline  \n• Weeks 1 -6: Secondary research and development of questionnaires.  \n• Weeks 7 -18: Stakeholder interviews and data collection.  \n• Weeks 19 -24: Data analysis and interim report preparation.  \n• Weeks 25 -30: Final report drafting and review.  \n• Week 31:  Presentation to the Commission.  \nRisk Management and Quality Assurance  \n• Risk Management:  Regular audits and stakeholder feedback loops to mitigate risks.  \n• Quality Assurance:  Peer reviews and compliance checks with ISO 9001 standards.  \nLegal and Regulatory Compliance  \n• Data Protection:  Adherence to GDPR and local data protection laws.  \n• Confidentiality:  Strict protocols for handling sensitive information.  \nBudget and Cost Justif ication  \n• Total Project Cost:  $500,000 o  Labor:  $300,000 o  Travel and Meetings:  \n$50,000 o  Data Access and Tools:  $100,000 o  Miscellaneous:  $50,000  \n• Value Proposition:  Cost-effective due to in -house AI tools and existing data agreements.  \nAttachments  \n• Certificate of Incorporation.  \n• Financial Statements for FY 2020 -21 , FY 2021 -22, FY 2022 -23. \n• Copies of completion certificates for relevant past projects  \n• No Conviction/ Debarment Certificate  \n• Self-attested copy of PAN card and Service Tax   \nConclusion  \nQuantum Analytics is excited about the opportunity to collaborate with the Competition \nCommission of India to explore the transformative impact of AI on market competition. We are \ncommitted to pr oviding comprehensive, insightful, and actionable findings.  \nSubmission Details  \n• Address for Submission:  Competition Commission of India, 9th Floor, Office Block – \n1, Kidwai Nagar (East), New Delhi - 110023, India.  \n• Electronic Submission:  Proposals should be submitted via email to ai-study@cci.gov.in  \nby 5:00 PM on August 30, 2024.  \n   "
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
# docs = text_splitter.split_documents(documents)
docs= text_splitter.create_documents([text])

In [23]:
docs

[Document(page_content='Proposal for Conducting Market Study on Artificial Intelligence and \nCompetition'),
 Document(page_content='Submitted to:  Competition Commission of India  \nPrepared by:  Quantum Analytics Consulting'),
 Document(page_content='Date:  July 25, 2024  \nExecutive Summary'),
 Document(page_content='Quantum Analytics Consulting proposes to undertake a comprehensive market study on the'),
 Document(page_content='impact of Artificial Intelligence (AI) on competition in key sectors. With over 15 years of global'),
 Document(page_content='experience and a dedicated AI research division, our firm is uniquely positioned to deliver'),
 Document(page_content='insights that will inform policy and regulatory approaches to foster fair competition and'),
 Document(page_content='innovation in AI -driven markets. The firm has successfully delivered more than 20 projects  in'),
 Document(page_content='AI/Analytics sector in last 3 years.  \nFirm Profile  and Qualifications'),
 Do