In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv(r"/workspaces/Supply-Chain-Management/Data/supplier_contracts_dataset.csv")
df = df.replace({np.nan: None})

# Convert only the relevant text-based fields to string
text_fields = ['contracttype', 'suppliername', 'risklevel', 'complianceissues', 'keyterms', 'negotiationrecommendation']

# Ensure the specified text fields are strings
for field in text_fields:
    df[field] = df[field].astype(str)

**MINSEARCH**

In [3]:
# df.columns = df.columns.str.lower()
# df.to_csv(r"/workspaces/Supply-Chain-Management/Data/supplier_contracts_dataset.csv",index=False)

In [4]:
documents = df.to_dict(orient='records')

Get top 10 list of high risk level contracts

In [5]:
import minsearch

# Create an index
index = minsearch.Index(
    text_fields=text_fields,
    keyword_fields=[]
)

# Fit the index with the documents
index.fit(documents)

# Example search query
query = "high risk level"
results = index.search(query, num_results=10)

# Print results
for result in results:
    print(result)

{'contractid': 9969, 'datesigned': '2024-05-19', 'contracttype': 'Manufacturing Agreement', 'suppliername': 'Supplier_9969', 'risklevel': 'High', 'complianceissues': 'Late Delivery', 'keyterms': 'Payment within 30 days', 'pastperformancescore': 7, 'negotiationrecommendation': 'Seek alternative suppliers'}
{'contractid': 10000, 'datesigned': '2023-04-18', 'contracttype': 'Service Agreement', 'suppliername': 'Supplier_10000', 'risklevel': 'High', 'complianceissues': 'Non-Compliance with Standards', 'keyterms': 'Regular performance reviews', 'pastperformancescore': 3, 'negotiationrecommendation': 'Renegotiate payment terms'}
{'contractid': 9999, 'datesigned': '2023-09-08', 'contracttype': 'Procurement Contract', 'suppliername': 'Supplier_9999', 'risklevel': 'High', 'complianceissues': 'Payment Issues', 'keyterms': 'Payment within 30 days', 'pastperformancescore': 9, 'negotiationrecommendation': 'Include compliance monitoring'}
{'contractid': 9998, 'datesigned': '2024-01-26', 'contracttype

Get the Contract types that has high risk and their count

In [6]:
import minsearch
from collections import Counter
# Create an index
index = minsearch.Index(
    text_fields=text_fields,
    keyword_fields=['risklevel']
)

# Fit the index with the documents
index.fit(documents)

# Perform the search for high-risk level contracts
filter_dict = {'risklevel': 'High'}
results = index.search(query='high risk level', filter_dict=filter_dict, num_results=len(documents))

# Extract and print contract types with high risk level
high_risk_contract_types = [result['contracttype'] for result in results]

# Count the occurrences of each contract type
contract_type_counts = Counter(high_risk_contract_types)

# Print the count of each contract type
print("Count of each contract type with high risk level:")
for contract_type, count in contract_type_counts.items():
    print(f"{contract_type}: {count}")

Count of each contract type with high risk level:
Service Agreement: 694
Distribution Agreement: 676
Supply Agreement: 701
Manufacturing Agreement: 688
Procurement Contract: 675


**GROQ API**

In [7]:
from openai import OpenAI

client = OpenAI()

In [8]:
df.columns

Index(['contractid', 'datesigned', 'contracttype', 'suppliername', 'risklevel',
       'complianceissues', 'keyterms', 'pastperformancescore',
       'negotiationrecommendation'],
      dtype='object')

In [11]:
# Function to perform the search on supplier contracts based on query
def search(query, filter_dict=None, max_results=10):
    # Filter the DataFrame based on risk level (if provided)
    if filter_dict:
        filtered_df = df[df['risklevel'] == filter_dict.get('risklevel', '')]
    else:
        filtered_df = df
    # Convert the filtered data to a list of dictionaries and limit the number of results
    results = filtered_df.to_dict(orient='records')[:max_results]
    return results

# Function to build a clearer prompt for Groq API
def build_clear_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context += (
            f"- **ContractType**: {doc['contracttype']}\n"
            f"  **SupplierName**: {doc['suppliername']}\n"
            f"  **RiskLevel**: {doc['risklevel']}\n"
            f"  **ComplianceIssues**: {doc['complianceissues']}\n"
            f"  **KeyTerms**: {doc['keyterms']}\n"
            f"  **NegotiationRecommendation**: {doc['negotiationrecommendation']}\n\n"
        )
    
    prompt = (
      
        f"QUESTION: {query}\n\n"
        f"CONTEXT:\n{context}"
    )
    
    return prompt

# Function to call the LLM (Groq API)
def llm(prompt, model='gpt-4o-mini'):
    # Assuming `client` is the Groq API client instance
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

# Function to perform the full RAG (Retrieve and Generate) process
def rag(query, model='gpt-4o-mini'):
    # Search for high-risk contracts (you can modify filter_dict based on needs)
    search_results = search(query, filter_dict={'risklevel': 'High'})
    
    # Build the prompt using the search results
    prompt = build_clear_prompt(query, search_results)
    
    # Get the LLM response based on the prompt
    answer = llm(prompt, model=model)
    
    return answer

# Example usage
question = "Give contract types and their negotiation recommendations for high-risk contracts"
answer = rag(question)
print(answer)


Here are various high-risk contract types along with their respective negotiation recommendations:

### 1. Manufacturing Agreement
- **SupplierName**: Supplier_1
- **RiskLevel**: High
- **ComplianceIssues**: Substandard Quality
- **KeyTerms**: Penalty for late delivery
- **NegotiationRecommendation**: Adjust delivery schedules

### 2. Distribution Agreement
- **SupplierName**: Supplier_4
- **RiskLevel**: High
- **ComplianceIssues**: None
- **KeyTerms**: Quality standards as per ISO 9001
- **NegotiationRecommendation**: Specify quality standards

### 3. Procurement Contract
- **SupplierName**: Supplier_6
- **RiskLevel**: High
- **ComplianceIssues**: Substandard Quality
- **KeyTerms**: Delivery within 60 days
- **NegotiationRecommendation**: Include penalty clauses for late delivery

### 4. Service Agreement
- **SupplierName**: Supplier_7
- **RiskLevel**: High
- **ComplianceIssues**: Substandard Quality
- **KeyTerms**: Payment within 30 days
- **NegotiationRecommendation**: Seek alternat

**Retrieval Evaluation**

{'id': 0, 'question': 'What is the starting position for doing push-ups?'}