In [1]:
import itertools
import cohere
import psycopg2
import os
from dotenv import load_dotenv
from console_app import retrieve_similar, rerank_docs, chatbot_response
import time

# Load environment variables
load_dotenv()
co = cohere.Client(os.getenv("COHERE_API_KEY"))

# DB connection
db_config = {
    'dbname': os.getenv('DB_NAME'),
    'user': os.getenv('DB_USER'),
    'password': os.getenv('DB_PASSWORD'),
    'host': os.getenv('DB_HOST'),
    'port': int(os.getenv('DB_PORT')),
}
conn = psycopg2.connect(**db_config)
cursor = conn.cursor()


In [2]:
param_grid = {
    "embedding_model": ["small"],
    "top_k": [3, 4, 5, 6],
    "rerank_n": [2, 3, 4, 5],
    "generation_model": ["command-r7b-12-2024", "command-r-08-2024"],
    "max_tokens": [50, 100, 150, 200]
}

# Generate all combinations of parameters with itertools library
search_space = list(itertools.product(
    param_grid["embedding_model"],
    param_grid["top_k"],
    param_grid["rerank_n"],
    param_grid["generation_model"],
    param_grid["max_tokens"]
))

In [3]:
eval_queries = [
    {"query": "Tell me about supply chain verification projects", "expected": "BlockVerify"},
    {"query": "Any cybersecurity platforms based on AI?", "expected": "CyberDefend, DataFort"},
    {"query": "Who is responsible for the education platform for teacher collaboration?", "expected": "Aiden Ward"},
    {"query": "Which projects are related to AI in Agriculture?", "expected": "GreenFarm Analytics, AgriTechOptimizer, CropGuardian, CropWatcher"},
    {"query": "List all responsible people called Martinez in Construction", "expected": "Daniel Martinez, Jacob Martinez"}
]
# Coherence rate limits: 10 requests per minute for free tier
MAX_CALLS_PER_MIN = 10
results = []

for emb_model, top_k, rerank_n, gen_model, max_tokens in search_space:
    print(f"Testing: emb={emb_model}, top_k={top_k}, rerank_n={rerank_n}, gen={gen_model}, max_tokens={max_tokens}")

    correct = 0
    for case in eval_queries:
        docs = retrieve_similar(case["query"], top_k, emb_model)
        if rerank_n <= top_k:
            reranked = rerank_docs(case["query"], docs, rerank_n)
        answer = chatbot_response(case["query"], reranked, gen_model, max_tokens=max_tokens)

        expected_keywords = [word.strip().lower() for word in case["expected"].split(",")]
        for keyword in expected_keywords:
            if keyword in answer.lower():
                correct += 1
        
        time.sleep(60 / MAX_CALLS_PER_MIN + 0.1)

    results.append(((emb_model, top_k, rerank_n, gen_model, max_tokens), correct))


Testing: emb=small, top_k=3, rerank_n=2, gen=command-r7b-12-2024, max_tokens=50
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r7b-12-2024, max_tokens=100
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r7b-12-2024, max_tokens=150
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r7b-12-2024, max_tokens=200
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r-08-2024, max_tokens=50
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r-08-2024, max_tokens=100
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r-08-2024, max_tokens=150
Testing: emb=small, top_k=3, rerank_n=2, gen=command-r-08-2024, max_tokens=200
Testing: emb=small, top_k=3, rerank_n=3, gen=command-r7b-12-2024, max_tokens=50
Testing: emb=small, top_k=3, rerank_n=3, gen=command-r7b-12-2024, max_tokens=100
Testing: emb=small, top_k=3, rerank_n=3, gen=command-r7b-12-2024, max_tokens=150
Testing: emb=small, top_k=3, rerank_n=3, gen=command-r7b-12-2024, max_tokens=200
Testing: emb=small, top_k=3, rerank_n=3

In [6]:
best = max(results, key=lambda x: x[1])
print("Best Hyperparameters:", best[0])
print("Best Accuracy:", best[1])

Best Hyperparameters: ('small', 4, 4, 'command-r-08-2024', 100)
Best Accuracy: 10


In [7]:
# Sort results by accuracy descending
sorted_results = sorted(results, key=lambda x: x[1], reverse=True)

# Print all results ranked
print("\n=== Ranked Grid Search Results ===")
for rank, (params, acc) in enumerate(sorted_results, start=1):
    print(f"{rank}. Accuracy: {acc:.3f} | Params: {params}")

# Get the best
best = sorted_results[0]
print("\nBest Hyperparameters:", best[0])
print("Best Accuracy:", best[1])


=== Ranked Grid Search Results ===
1. Accuracy: 10.000 | Params: ('small', 4, 4, 'command-r-08-2024', 100)
2. Accuracy: 10.000 | Params: ('small', 4, 4, 'command-r-08-2024', 150)
3. Accuracy: 10.000 | Params: ('small', 4, 4, 'command-r-08-2024', 200)
4. Accuracy: 10.000 | Params: ('small', 5, 4, 'command-r-08-2024', 100)
5. Accuracy: 10.000 | Params: ('small', 5, 4, 'command-r-08-2024', 150)
6. Accuracy: 10.000 | Params: ('small', 5, 4, 'command-r-08-2024', 200)
7. Accuracy: 10.000 | Params: ('small', 5, 5, 'command-r-08-2024', 100)
8. Accuracy: 10.000 | Params: ('small', 5, 5, 'command-r-08-2024', 150)
9. Accuracy: 10.000 | Params: ('small', 5, 5, 'command-r-08-2024', 200)
10. Accuracy: 10.000 | Params: ('small', 6, 4, 'command-r-08-2024', 100)
11. Accuracy: 10.000 | Params: ('small', 6, 4, 'command-r-08-2024', 150)
12. Accuracy: 10.000 | Params: ('small', 6, 4, 'command-r-08-2024', 200)
13. Accuracy: 10.000 | Params: ('small', 6, 5, 'command-r-08-2024', 100)
14. Accuracy: 10.000 | P

Based on these results we see that the top 11 combination from the grid search yield the highest accuracy. Our choice of model parameters will be based on saving ressources, while keeping optimal performance. 

Model command-r-08-2024 performs best with 100 tokens and the biggest top_k for the best possible retrieval.

The chosen model parameters are:

Accuracy: 8.000 | Params: ('small', 5, 4, 'command-r-08-2024', 100)