In [1]:
import pandas as pd
import numpy as np
# from openai import OpenAI

# client = OpenAI()

In [2]:
import os

from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

In [3]:
# import pandas as pd
# import numpy as np
# import os
# import requests
# import json

# # Get the Hugging Face API key from the environment
# api_key = os.getenv('HUGGINGFACE_API_KEY')

# # Set up the API URL and headers for Hugging Face
# API_URL = "https://api-inference.huggingface.co/models/gpt2"
# headers = {"Authorization": f"Bearer {api_key}"}

In [4]:
df = pd.read_csv(r"/workspaces/Supply-Chain-Management/Data/supplier_contracts_dataset.csv")
df = df.replace({np.nan: None})

documents = df.to_dict(orient='records')

In [6]:
import json
from tqdm.auto import tqdm

# Adjust the prompt template for supplier contracts
prompt_template = """
You emulate a procurement analyst using our supplier contracts assistant application.
Formulate 5 specific questions this analyst might ask based on a provided contract record.
The questions should be specific to the contract and should avoid repeating too many words from the record.

The record:

ContractType: {contracttype}
SupplierName: {suppliername}
RiskLevel: {risklevel}
ComplianceIssues: {complianceissues}
KeyTerms: {keyterms}
NegotiationRecommendation: {negotiationrecommendation}
QualityIssues: {qualityissues}
SupplyChainDisruptions: {supplychaindisruptions}
IncreasedCosts: {increasedcosts}
ComplianceAndLegalRisks: {complianceandlegalrisks}
MissedOpportunities: {missedopportunities}
DamagedRelationships: {damagedrelationships}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

# Function to generate questions based on a contract document
def generate_questions(doc):
    prompt = prompt_template.format(
        contracttype=doc['contracttype'],
        suppliername=doc['suppliername'],
        risklevel=doc['risklevel'],
        complianceissues=doc['complianceissues'],
        keyterms=doc['keyterms'],
        negotiationrecommendation=doc['negotiationrecommendation'],
        qualityissues=doc['qualityissues'],
        supplychaindisruptions=doc['supplychaindisruptions'],
        increasedcosts=doc['increasedcosts'],
        complianceandlegalrisks=doc['complianceandlegalrisks'],
        missedopportunities=doc['missedopportunities'],
        damagedrelationships=doc['damagedrelationships']
    )
    return llm(prompt)

# Function to call the LLM and generate responses
def llm(prompt):
    # Assuming `client` is the Groq API client instance
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
     model='Llama3-groq-70b-8192-tool-use-preview'
    )
    
    return response.choices[0].message.content

# Assuming 'documents' is your list of contract records
results = {}

# Loop through each contract document and generate questions
for doc in tqdm(documents):
    doc_id = doc['contractid']
    if doc_id in results:
        continue

    # Generate questions and parse the result
    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

# Prepare the final results in a DataFrame
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

# Convert the results to a DataFrame
df_results = pd.DataFrame(final_results, columns=['contractid', 'question'])



  0%|          | 0/2500 [00:00<?, ?it/s]

In [7]:
df_results

Unnamed: 0,contractid,question
0,1,What are the specific standards that Supplier_...
1,1,How often should compliance monitoring be cond...
2,1,What are the key terms that need to be renegot...
3,1,What measures can be taken to address the qual...
4,1,How can we mitigate the medium compliance and ...
...,...,...
12495,2500,What are the specific quality standards mentio...
12496,2500,How often should compliance checks be conducted?
12497,2500,What are the consequences for non-compliance?
12498,2500,How can we mitigate the high compliance and le...


In [9]:
df_results.to_csv(r"/workspaces/Supply-Chain-Management/Data/ground-truth-retrieval.csv", index=False)

In [None]:
# df_results = pd.DataFrame(final_results, columns=['contractid', 'question'])