## Config the environment

In [1]:
import llmselector, os
if not os.path.exists('../cache/db_livecodebench.sqlite'): 
    !wget -P ../cache https://github.com/LLMSELECTOR/LLMSELECTOR/releases/download/0.0.1/db_livecodebench.sqlite
llmselector.config.config(
    db_path=f"../cache/db_livecodebench.sqlite" )

## Load the livecodebench dataset

In [2]:
from llmselector.data_utils.livecodebench import DataLoader_livecodebench 
from sklearn.model_selection import train_test_split
Mydataloader = DataLoader_livecodebench()
q_data = Mydataloader.get_query_df()
train_df, test_df = train_test_split(q_data,test_size=0.5, random_state=2025)

## Use a single LLM

In [3]:
from llmselector.compoundai.module.selfrefine import SelfRefine
from llmselector.compoundai.optimizer import OptimizerFullSearch
from llmselector.compoundai.metric import Metric, compute_score
model_list = ['gpt-4o-2024-05-13','claude-3-5-sonnet-20240620','gemini-1.5-pro']
Agents_SameModel ={}
for name in model_list:
    Agents_SameModel[name] = SelfRefine()
    Opt0 = OptimizerFullSearch(model_list = [name])
    Opt0.optimize( train_df, Metric('em'), Agents_SameModel[name])
results = compute_score(Agents_SameModel, test_df, Metric('em'))
print(results)

  0%|                                                     | 0/1 [00:00<?, ?it/s]
Processing:   0%|                                       | 0/239 [00:00<?, ?it/s][A
Processing: 100%|███████████████████████████| 239/239 [00:00<00:00, 2112.99it/s][A
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  6.55it/s]


('gpt-4o-2024-05-13', 'gpt-4o-2024-05-13', 'gpt-4o-2024-05-13')


  0%|                                                     | 0/1 [00:00<?, ?it/s]
Processing: 100%|███████████████████████████| 239/239 [00:00<00:00, 2475.55it/s][A
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  8.11it/s]


('claude-3-5-sonnet-20240620', 'claude-3-5-sonnet-20240620', 'claude-3-5-sonnet-20240620')


  0%|                                                     | 0/1 [00:00<?, ?it/s]
Processing: 100%|███████████████████████████| 239/239 [00:00<00:00, 2396.69it/s][A
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  7.68it/s]


('gemini-1.5-pro', 'gemini-1.5-pro', 'gemini-1.5-pro')


100%|███████████████████████████████████████| 240/240 [00:00<00:00, 2217.09it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 9242.46it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 2219.86it/s]
100%|██████████████████████████████████████| 240/240 [00:00<00:00, 16705.94it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 2100.21it/s]
100%|██████████████████████████████████████| 240/240 [00:00<00:00, 11689.13it/s]

                         Name  Mean_Score
0           gpt-4o-2024-05-13    0.862500
1  claude-3-5-sonnet-20240620    0.891667
2              gemini-1.5-pro    0.866667





## Optimize model selection

In [4]:
from llmselector.compoundai.optimizer import OptimizerLLMDiagnoser
LLMSELECTOR = SelfRefine()
Optimizer = OptimizerLLMDiagnoser()
Optimizer.optimize( train_df, Metric('em'), LLMSELECTOR)

100%|█████████████████████████████████████████| 239/239 [00:03<00:00, 70.26it/s]
100%|█████████████████████████████████████████| 239/239 [00:03<00:00, 72.41it/s]
100%|█████████████████████████████████████████| 239/239 [00:03<00:00, 68.17it/s]
100%|█████████████████████████████████████████| 239/239 [00:02<00:00, 84.95it/s]
100%|█████████████████████████████████████████| 239/239 [00:03<00:00, 73.19it/s]
100%|█████████████████████████████████████████| 239/239 [00:02<00:00, 87.44it/s]


In [5]:
results = compute_score({"LLMSELECTOR":LLMSELECTOR}, test_df, Metric('em'))
print(results)

100%|███████████████████████████████████████| 240/240 [00:00<00:00, 2412.71it/s]
100%|██████████████████████████████████████| 240/240 [00:00<00:00, 15858.23it/s]

          Name  Mean_Score
0  LLMSELECTOR    0.954167



