In [1]:
import pandas as pd
import numpy as np

In [22]:
df = pd.read_csv(r"/workspaces/Supply-Chain-Management/Data/supplier_contracts_dataset.csv")
df = df.replace({np.nan: None})
df = df.rename(columns={'contractid':'id'})
# # Convert only the relevant text-based fields to string
text_fields = ['contracttype', 'suppliername', 'risklevel', 'complianceissues', 'keyterms', 'negotiationrecommendation','qualityissues', 'supplychaindisruptions',
       'increasedcosts', 'complianceandlegalrisks', 'missedopportunities',
       'damagedrelationships']

# Ensure the specified text fields are strings
for field in text_fields:
    df[field] = df[field].astype(str)

**MINSEARCH**

In [3]:
# df.columns = df.columns.str.lower()
# df.to_csv(r"/workspaces/Supply-Chain-Management/Data/supplier_contracts_dataset.csv",index=False)

In [4]:
documents = df.to_dict(orient='records')

Get top 10 list of high risk level contracts

In [5]:
import minsearch

# Create an index
index = minsearch.Index(
    text_fields=text_fields,
    keyword_fields=[]
)

# Fit the index with the documents
index.fit(documents)

# Example search query
query = "high risk level"
results = index.search(query, num_results=10)

# Print results
for result in results:
    print(result)

{'contractid': 309, 'datesigned': '5/19/2023', 'contracttype': 'Service Agreement', 'suppliername': 'Supplier_33', 'risklevel': 'High', 'complianceissues': 'Non-Compliance with Standards', 'keyterms': 'Delivery within 60 days', 'pastperformancescore': 9, 'negotiationrecommendation': 'Adjust delivery schedules', 'qualityissues': 'Yes', 'supplychaindisruptions': 'Yes', 'increasedcosts': 'Yes', 'complianceandlegalrisks': 'High', 'missedopportunities': 'Yes', 'damagedrelationships': 'No', 'qualitymetrics': 10, 'deliverymetrics': 8, 'costmetrics': 4, 'relationshipmetrics': 8}
{'contractid': 329, 'datesigned': '12/21/2023', 'contracttype': 'Distribution Agreement', 'suppliername': 'Supplier_85', 'risklevel': 'High', 'complianceissues': 'Non-Compliance with Standards', 'keyterms': 'Quality standards as per ISO 9001', 'pastperformancescore': 5, 'negotiationrecommendation': 'Seek alternative suppliers', 'qualityissues': 'Yes', 'supplychaindisruptions': 'No', 'increasedcosts': 'No', 'compliancea

Get the Contract types that has high risk and their count

In [6]:
import minsearch
from collections import Counter
# Create an index
index = minsearch.Index(
    text_fields=text_fields,
    keyword_fields=['risklevel']
)

# Fit the index with the documents
index.fit(documents)

# Perform the search for high-risk level contracts
filter_dict = {'risklevel': 'High'}
results = index.search(query='high risk level', filter_dict=filter_dict, num_results=len(documents))

# Extract and print contract types with high risk level
high_risk_contract_types = [result['contracttype'] for result in results]

# Count the occurrences of each contract type
contract_type_counts = Counter(high_risk_contract_types)

# Print the count of each contract type
print("Count of each contract type with high risk level:")
for contract_type, count in contract_type_counts.items():
    print(f"{contract_type}: {count}")

Count of each contract type with high risk level:
Supply Agreement: 156
Procurement Contract: 176
Distribution Agreement: 165
Manufacturing Agreement: 183
Service Agreement: 170


**GROQ API**

In [7]:
# from openai import OpenAI

# client = OpenAI()

In [8]:
import os

from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

In [9]:
df.columns

Index(['contractid', 'datesigned', 'contracttype', 'suppliername', 'risklevel',
       'complianceissues', 'keyterms', 'pastperformancescore',
       'negotiationrecommendation', 'qualityissues', 'supplychaindisruptions',
       'increasedcosts', 'complianceandlegalrisks', 'missedopportunities',
       'damagedrelationships', 'qualitymetrics', 'deliverymetrics',
       'costmetrics', 'relationshipmetrics'],
      dtype='object')

In [10]:

# Function to call the LLM (Groq API)
# def llm(prompt, model='gpt-4o-mini'):
#     # Assuming `client` is the Groq API client instance
#     response = client.chat.completions.create(
#         model=model,
#         messages=[{"role": "user", "content": prompt}]
#     )
    
#     return response.choices[0].message.content



In [11]:
df.columns


Index(['contractid', 'datesigned', 'contracttype', 'suppliername', 'risklevel',
       'complianceissues', 'keyterms', 'pastperformancescore',
       'negotiationrecommendation', 'qualityissues', 'supplychaindisruptions',
       'increasedcosts', 'complianceandlegalrisks', 'missedopportunities',
       'damagedrelationships', 'qualitymetrics', 'deliverymetrics',
       'costmetrics', 'relationshipmetrics'],
      dtype='object')

In [12]:

# Function to perform the search on supplier contracts based on query

def search(query, filter_dict=None, max_results=10):
    # Filter the DataFrame based on risk level (if provided)
    if filter_dict:
        filtered_df = df[df['risklevel'] == filter_dict.get('risklevel', '')]
    else:
        filtered_df = df
    # Convert the filtered data to a list of dictionaries and limit the number of results
    results = filtered_df.to_dict(orient='records')[:max_results]
    return results

# Function to build a clearer prompt for Groq API
def build_clear_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context += (
            f"- **ContractType**: {doc['contracttype']}\n"
            f"  **SupplierName**: {doc['suppliername']}\n"
            f"  **RiskLevel**: {doc['risklevel']}\n"
            f"  **ComplianceIssues**: {doc['complianceissues']}\n"
            f"  **KeyTerms**: {doc['keyterms']}\n"
            f"  **NegotiationRecommendation**: {doc['negotiationrecommendation']}\n"
            f"  **QualityIssues**: {doc['qualityissues']}\n"
            f"  **SupplyChainDisruptions**: {doc['supplychaindisruptions']}\n"
            f"  **IncreasedCosts**: {doc['increasedcosts']}\n"
            f"  **ComplianceAndLegalRisks**: {doc['complianceandlegalrisks']}\n"
            f"  **MissedOpportunities**: {doc['missedopportunities']}\n"
            f"  **DamagedRelationships**: {doc['damagedrelationships']}\n"
            f"  **QualityMetrics**: {doc['qualitymetrics']}\n"
            f"  **DeliveryMetrics**: {doc['deliverymetrics']}\n"
            f"  **CostMetrics**: {doc['costmetrics']}\n"
            f"  **RelationshipMetrics**: {doc['relationshipmetrics']}\n\n"
        )
    
    prompt = (
        f"QUESTION: {query}\n\n"
        f"CONTEXT:\n{context}"
    )
    
    return prompt

# Function to call the LLM (Groq API)
# def llm(prompt, model='gpt-4o-mini'):
#     # Assuming client is the Groq API client instance
#     response = client.chat.completions.create(
#         model=model,
#         messages=[{"role": "user", "content": prompt}]
#     )
    
#     return response.choices[0].message.content

def llm(prompt, model='Llama3-groq-70b-8192-tool-use-preview'):
    # Assuming client is the Groq API client instance
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
    model=model
    )
    
    return response.choices[0].message.content


# Function to perform the full RAG (Retrieve and Generate) process
def rag(query, model='Llama3-groq-70b-8192-tool-use-preview'):
    # Search for high-risk contracts (you can modify filter_dict based on needs)
    search_results = search(query, filter_dict={'risklevel': 'High'})
    
    # Build the prompt using the search results
    prompt = build_clear_prompt(query, search_results)
    
    # Get the LLM response based on the prompt
    answer = llm(prompt, model=model)
    
    return answer

# Example usage
question = "Give contract types , qualitymetrics ,SupplyChainDisruptions and their negotiation recommendations for high-risk contracts"
answer = rag(question)
print(answer)

Based on the provided context, here are the contract types, quality metrics, supply chain disruptions, and negotiation recommendations for high-risk contracts:

1. **ContractType**: Distribution Agreement
   - **SupplierName**: Supplier_60
   - **RiskLevel**: High
   - **NegotiationRecommendation**: Include compliance monitoring
   - **QualityMetrics**: 3
   - **SupplyChainDisruptions**: No

2. **ContractType**: Service Agreement
   - **SupplierName**: Supplier_67
   - **RiskLevel**: High
   - **NegotiationRecommendation**: Include compliance monitoring
   - **QualityMetrics**: 5
   - **SupplyChainDisruptions**: Yes

3. **ContractType**: Supply Agreement
   - **SupplierName**: Supplier_33
   - **RiskLevel**: High
   - **NegotiationRecommendation**: Include penalty clauses for late delivery
   - **QualityMetrics**: 3
   - **SupplyChainDisruptions**: Yes

4. **ContractType**: Service Agreement
   - **SupplierName**: Supplier_24
   - **RiskLevel**: High
   - **NegotiationRecommendation**:

In [13]:
print("\n## Risk-Based Queries")

question = "Which suppliers have the most non-compliance issues, regardless of risk level?"
answer = rag(question)
print(answer)

print("\n##Compliance & Legal Queries:")


question = "Identify contracts where compliance monitoring is needed."
answer = rag(question)
print(answer)

print("\n##Cost and Financial Metrics:")
question = "List contracts that provide cost savings but involve high risk."
answer = rag(question)
print(answer)

print("\n##Contractual Terms and Recommendations:")
question = "Which contracts have penalty clauses for late delivery, and how effective have they been?"
answer = rag(question)
print(answer)

print("\n##Supplier Relationship Queries:")
question = "What are the relationship metrics for high-performing suppliers?"
answer = rag(question)
print(answer)


print("\n##Opportunity and Innovation Queries:")
question = "Identify suppliers that provide innovative opportunities despite low past performance scores."
answer = rag(question)
print(answer)


print("\n#Custom Queries")

question = "Show me all contracts with a combination of poor delivery metrics, compliance risks, and high past performance scores."
answer = rag(question)
print(answer)

print("\n")
question ="What are the patterns between non-compliance and increased costs in supplier contracts?"
answer = rag(question)
print(answer)






## Risk-Based Queries
Based on the provided data, the suppliers with the most non-compliance issues, regardless of risk level, are:

1. Supplier_60 (Distribution Agreement) - Non-Compliance with Standards
2. Supplier_67 (Service Agreement) - Late Delivery
3. Supplier_33 (Supply Agreement) - Late Delivery
4. Supplier_24 (Service Agreement) - Substandard Quality
5. Supplier_28 (Supply Agreement) - None
6. Supplier_79 (Distribution Agreement) - None

These suppliers have a mix of compliance issues such as non-compliance with standards, late delivery, and substandard quality.

##Compliance & Legal Queries:
Supplier_60, Supplier_67, Supplier_33, Supplier_24, Supplier_16, Supplier_29, Supplier_62, Supplier_20, Supplier_28, Supplier_79

##Cost and Financial Metrics:
Supplier_60, Supplier_67, Supplier_33, Supplier_24, Supplier_16, Supplier_29, Supplier_62, Supplier_20, Supplier_28, and Supplier_79 are contracts that provide cost savings but involve high risk.

##Contractual Terms and Recommen

**Retrieval Evaluation**

In [14]:
from tqdm.auto import tqdm

In [15]:
df_question = pd.read_csv(r'/workspaces/Supply-Chain-Management/Data/ground-truth-retrieval.csv')

In [16]:
ground_truth = df_question.to_dict(orient='records')

In [17]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['contractid']
        results = search_function(q)
        relevance = [d['contractid'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/12500 [00:00<?, ?it/s]

[False, False, False, False, True, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, True, False, False, False, False, False]
[]
[False, False, False, False, True, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]
[]
[False, False, False, False, False, False, False, False, False, False]
[False, False, False, False, False, False, False, False, False, False]


KeyboardInterrupt: 

In [18]:
ground_truth

[{'contractid': 1,
  'question': 'What are the specific standards that Supplier_2 is not complying with?'},
 {'contractid': 1,
  'question': 'How often should compliance monitoring be conducted to mitigate risks?'},
 {'contractid': 1,
  'question': 'What are the key terms that need to be renegotiated to ensure timely payments?'},
 {'contractid': 1,
  'question': 'What measures can be taken to address the quality issues identified?'},
 {'contractid': 1,
  'question': 'How can we mitigate the medium compliance and legal risks associated with this contract?'},
 {'contractid': 2,
  'question': 'What are the specific compliance issues related to substandard quality that need to be addressed?'},
 {'contractid': 2,
  'question': 'How can we mitigate the risks associated with supply chain disruptions?'},
 {'contractid': 2,
  'question': 'What are the potential consequences of not resolving the compliance and legal risks?'},
 {'contractid': 2,
  'question': 'What alternative suppliers should we

Finding the best paramters

In [21]:
# Display column names and a few rows from each dataset
print("Supplier Contracts Dataset Columns:")
print(df.columns)
print("\nSample Rows from Supplier Contracts Dataset:")
print(df.head())

print("\nGround Truth Retrieval Dataset Columns:")
print(df_question.columns)
print("\nSample Rows from Ground Truth Retrieval Dataset:")
print(df_question.head())

Supplier Contracts Dataset Columns:
Index(['contractid', 'datesigned', 'contracttype', 'suppliername', 'risklevel',
       'complianceissues', 'keyterms', 'pastperformancescore',
       'negotiationrecommendation', 'qualityissues', 'supplychaindisruptions',
       'increasedcosts', 'complianceandlegalrisks', 'missedopportunities',
       'damagedrelationships', 'qualitymetrics', 'deliverymetrics',
       'costmetrics', 'relationshipmetrics'],
      dtype='object')

Sample Rows from Supplier Contracts Dataset:
   contractid datesigned             contracttype suppliername risklevel  \
0           1  5/31/2024   Distribution Agreement   Supplier_2    Medium   
1           2  6/18/2023  Manufacturing Agreement  Supplier_28    Medium   
2           3  4/26/2022        Service Agreement  Supplier_55       Low   
3           4   9/2/2023         Supply Agreement  Supplier_79    Medium   
4           5   1/5/2024  Manufacturing Agreement   Supplier_8       Low   

                complianceis