## Set up model comparison

**gpt model**

gpt-4-1106-preview

**local model on server**

available models:

| NAME           | ID           | SIZE   |
|----------------|--------------|--------|
| llama2:70b     | c3a7af098300 | 38 GB  |
| llama2:7b      | fe938a131f40 | 3.8 GB |
| llama2:latest  | fe938a131f40 | 3.8 GB |
| mistral:7b     | 4d9f4b269c33 | 4.1 GB |
| mixtral:latest | 99a9202f8a7a | 26 GB  |
| mixtral:instruct| 7708c059a8bb | 26 GB  |	



**API for calling Google Gemini pro**

GO TO: https://makersuite.google.com/app/apikey to get the apikey for gemini pro

export GOOGLEAI_KEY = xxxx

model = 'gemini-pro'

In [1]:
import pandas as pd
import numpy as np
import json 
from utils.openai_query import openai_chat
from utils.prompt_factory import make_user_prompt_with_score
from utils.server_model_query import server_model_chat
from utils.llm_analysis_utils import process_analysis, save_progress
from utils.genai_query import query_genai_model
from tqdm import tqdm
import constant
import openai
import os
import logging
import re
%load_ext autoreload

%autoreload 2


openai QUERY being run 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


**Example for running in the jupyter notebook**

In [2]:
## load variables
initialize = True # if True, then initialize the input table with llm names, analysis and score to None 
# Replace with your actual values
config_file = './jsonFiles/toyexample_gpt35.json'  # replace with your actual config file 
input_file = './data/GO_term_analysis/100_selected_go_contaminated.csv' # replace with your actual input file
input_sep = ','  # replace with the separator
set_index = 'GO'  # replace with your column name that you want to set as index or None
gene_column = 'Genes'  # replace with your actual column name for the gene list
gene_sep = ' '  # replace with your actual separator
gene_features = None  # replace with your path to the gene features or None if you don't want to include in the prompt
direct = False # if True, then the prompt will be a direct sentence asking for a name and analysis from the gene set, otherwise default or customize prompt
out_file = 'data/GO_term_analysis/model_compare/PETER_COMPARE_TEST'  # replace with your actual output file name

customized_prompt = False # if True, then the prompt will be the custom prompt, if False, then the prompt will use default

# load the config file
with open(config_file) as json_file:
    config = json.load(json_file)

if customized_prompt:
    # make sure the file exist 
    if os.path.isfile(config['CUSTOM_PROMPT_FILE']):
        with open(config['CUSTOM_PROMPT_FILE'], 'r') as f: # replace with your actual customized prompt file
            customized_prompt = f.read()
            assert len(customized_prompt) > 1, "Customized prompt is empty"
    else:
        print("Customized prompt file does not exist")
        customized_prompt = None
else:
    customized_prompt = None

# Load OpenAI key, context, and model used
openai.api_key = os.environ["OPENAI_API_KEY"]

context = config['CONTEXT']
model = config['MODEL']
temperature = config['TEMP']
max_tokens = config['MAX_TOKENS']
if model.startswith('gpt'):
    rate_per_token = config['RATE_PER_TOKEN']
    DOLLAR_LIMIT = config['DOLLAR_LIMIT']
LOG_FILE = config['LOG_NAME']+'_.log'

SEED = constant.SEED
column_prefix = model.split('-')[0]

In [3]:
# handle the logger so it create a new one for each model run
def get_logger(filename):
    logger = logging.getLogger(filename)
    logger.setLevel(logging.INFO)
    if not logger.handlers:
        file_handler = logging.FileHandler(filename)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    return logger


def main(df):
    analysis_dict  = {}

    logger = get_logger(f'{out_file}.log')

    i = 0 #used for track progress and saving the file
    for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
        #only process None rows 
        if pd.notna(row[f'{column_prefix} Analysis']):
            continue
        
        gene_data = row[gene_column]
        # if gene_data is not a string, then skip
        if type(gene_data) != str:
            
            logger.warning(f'Gene set {idx} is not a string, skipping')
            continue
        genes = gene_data.split(gene_sep)
        
        if len(genes) >1000:
            logger.warning(f'Gene set {idx} is too big, skipping')
            continue

        try:
            prompt = make_user_prompt_with_score(genes)
            # print(prompt)
            finger_print = None
            if model.startswith('gpt'):
                print("Accessing OpenAI API")
                analysis, finger_print = openai_chat(context, prompt, model, temperature, max_tokens, rate_per_token, LOG_FILE, DOLLAR_LIMIT, SEED)
            elif model.startswith('gemini'):
                print("Using Google Gemini API")
                analysis, error_message = query_genai_model(f"{context}\n{prompt}", model, temperature, max_tokens, LOG_FILE) 
            else:
                print("Using server model")
                analysis, error_message= server_model_chat(context, prompt, model, temperature, max_tokens,LOG_FILE, SEED)

            
            if analysis:
                # print(analysis)
                llm_name, llm_score, llm_analysis = process_analysis(analysis)
                # clean up the score and return float
                try:
                    llm_score_value =  float(re.sub("[^0-9.-]", "", llm_score))
                except ValueError:
                    llm_score_value = llm_score
            
                
                df.loc[idx, f'{column_prefix} Name'] = llm_name
                df.loc[idx, f'{column_prefix} Analysis'] = llm_analysis
                df.loc[idx, f'{column_prefix} Score'] = llm_score_value
                
                analysis_dict[f'{idx}_{column_prefix}'] = analysis
                # Log success with fingerprint
                logger.info(f'Success for {idx} {column_prefix}.')
                if finger_print:
                    logger.info(f'GPT_Fingerprint for {idx}: {finger_print}')
                    
            else:
                logger.error(f'Error for query gene set {idx}: {error_message}')

        except Exception as e:
            logger.error(f'Error for {idx}: {e}')
            continue
        i += 1
        if i % 10 == 0:
            # bin scores into no score, low score, medium score, high score
            bins = [-np.inf, 0, 0.79, 0.86, np.inf] # 0 is no score (name not assigned), between 0 to 0.79 is low score, between 0.8 to 0.86 is medium score, above 0.86 is high score
            labels = ['Name not assigned', 'Low Score', 'Medium Score', 'High Score']  # Define the corresponding labels
            
            df[f'{column_prefix} Score bins'] = pd.cut(df[f'{column_prefix} Score'], bins=bins, labels=labels)
                
            save_progress(df, analysis_dict, out_file)
            # df.to_csv(f'{out_file}.tsv', sep='\t', index=True)
            print(f"Saved progress for {i} genesets")
    # save the final file
    save_progress(df, analysis_dict, out_file)
    

In [5]:
import os 
from glob import glob


initialize = True 
input_file = 'data/GO_term_analysis/toy_example_w_contaminated.csv'
input_sep = constant.GO_FILE_SEP
set_index = constant.GO_INDEX_COL  
gene_column = constant.GO_GENE_COL 
gene_sep = ' '

## create a param file 
configs = glob('./jsonFiles/toyexample_*.json')
params = []
for conf_file in configs:
    model_names = '_'.join(conf_file.split('/')[-1].split('.')[0].split('_')[1:])
    # print(model_names)
    out_file = f'data/GO_term_analysis/LLM_processed_toy_example_w_contamination_{model_names}'  
    param = f"--config {conf_file} \
        --initialize \
        --input {input_file} \
        --input_sep  '{input_sep}'\
        --set_index {set_index} \
        --gene_column {gene_column}\
        --gene_sep '{gene_sep}' \
        --start 0 \
        --end 10 \
        --output_file {out_file}"
    print(param)
    params.append(param)

with open('toy_example_params.txt', 'w') as f:
    for p in params:
        f.write(p+'\n')

--config ./jsonFiles/toyexample_gemini_pro.json         --initialize         --input data/GO_term_analysis/toy_example_w_contaminated.csv         --input_sep  ','        --set_index GO         --gene_column Genes        --gene_sep ' '         --start 0         --end 10         --output_file data/GO_term_analysis/LLM_processed_toy_example_w_contamination_gemini_pro
--config ./jsonFiles/toyexample_llama2_70b.json         --initialize         --input data/GO_term_analysis/toy_example_w_contaminated.csv         --input_sep  ','        --set_index GO         --gene_column Genes        --gene_sep ' '         --start 0         --end 10         --output_file data/GO_term_analysis/LLM_processed_toy_example_w_contamination_llama2_70b
--config ./jsonFiles/toyexample_gpt35.json         --initialize         --input data/GO_term_analysis/toy_example_w_contaminated.csv         --input_sep  ','        --set_index GO         --gene_column Genes        --gene_sep ' '         --start 0         --end 10  

In [6]:
#Define your own loop for running the pipeline
## 12-18-2023: this loop is for run the default gene set and the contaminated gene sets 
## can modify this loop for different models or only run on default gene set

##12-27-23: edited the prompt 

##01-26-2023: test with bin scores

if __name__ == "__main__":
    
    df = pd.read_csv(input_file, sep=input_sep, index_col=set_index)
    
    if 'gpt' in model:
        name_fix = '_'.join(model.split('-')[:2])
    else:
        name_fix = model.replace(':', '_')
    column_prefix = name_fix + '_default'
    print(column_prefix)
    if initialize:
        # initialize the input file with llm names, analysis and score to None
        df[f'{column_prefix} Name'] = None
        df[f'{column_prefix} Analysis'] = None
        df[f'{column_prefix} Score'] = -np.inf
    main(df)  ## run with the real set 
    
    ## run the pipeline for contaiminated gene sets 
    contaminated_columns = [col for col in df.columns if col.endswith('contaminated_Genes')]
    # print(contaminated_columns)
    for col in contaminated_columns:
        gene_column = col ## Note need to change the gene_column to the contaminated column
        contam_prefix = '_'.join(col.split('_')[0:2])
        
        column_prefix = name_fix + '_' +contam_prefix
        print(column_prefix)

        if initialize:
            # initialize the input file with llm names, analysis and score to None
            df[f'{column_prefix} Name'] = None
            df[f'{column_prefix} Analysis'] = None
            df[f'{column_prefix} Score'] = -np.inf
        main(df)
    df.head()


gpt_3.5_default


  0%|          | 0/11 [00:00<?, ?it/s]

Accessing OpenAI API


  9%|▉         | 1/11 [00:03<00:37,  3.73s/it]

1456
Accessing OpenAI API


 18%|█▊        | 2/11 [00:06<00:30,  3.42s/it]

1371
Accessing OpenAI API


 27%|██▋       | 3/11 [00:10<00:26,  3.35s/it]

1447
Accessing OpenAI API


 36%|███▋      | 4/11 [00:11<00:17,  2.56s/it]

1162
Accessing OpenAI API


 45%|████▌     | 5/11 [00:14<00:15,  2.62s/it]

1544
Accessing OpenAI API


 55%|█████▍    | 6/11 [00:20<00:19,  3.96s/it]

2000
Accessing OpenAI API


 64%|██████▎   | 7/11 [00:23<00:14,  3.52s/it]

1408
Accessing OpenAI API


 73%|███████▎  | 8/11 [00:34<00:17,  5.93s/it]

1230
Accessing OpenAI API


 82%|████████▏ | 9/11 [00:41<00:12,  6.32s/it]

1363
Accessing OpenAI API


 91%|█████████ | 10/11 [00:43<00:05,  5.04s/it]

1212
Saved progress for 10 genesets
Accessing OpenAI API


100%|██████████| 11/11 [00:46<00:00,  4.25s/it]


1659
gpt_3.5_50perc_contaminated


  0%|          | 0/11 [00:00<?, ?it/s]

Accessing OpenAI API


  9%|▉         | 1/11 [00:02<00:25,  2.51s/it]

1358
Accessing OpenAI API


 18%|█▊        | 2/11 [00:06<00:29,  3.26s/it]

1486
Accessing OpenAI API


 27%|██▋       | 3/11 [00:09<00:25,  3.13s/it]

1437
Accessing OpenAI API


 36%|███▋      | 4/11 [00:14<00:26,  3.78s/it]

1676
Accessing OpenAI API


 45%|████▌     | 5/11 [00:15<00:17,  3.00s/it]

1373
Accessing OpenAI API


 55%|█████▍    | 6/11 [00:18<00:14,  2.80s/it]

1460
Accessing OpenAI API


 64%|██████▎   | 7/11 [00:20<00:10,  2.72s/it]

1404
Accessing OpenAI API


 73%|███████▎  | 8/11 [00:23<00:07,  2.63s/it]

1274
Accessing OpenAI API


 82%|████████▏ | 9/11 [00:26<00:05,  2.85s/it]

1395
Accessing OpenAI API


 91%|█████████ | 10/11 [00:28<00:02,  2.61s/it]

1238
Saved progress for 10 genesets
Accessing OpenAI API


100%|██████████| 11/11 [00:31<00:00,  2.88s/it]


1699
gpt_3.5_100perc_contaminated


  0%|          | 0/11 [00:00<?, ?it/s]

Accessing OpenAI API


  9%|▉         | 1/11 [00:02<00:26,  2.65s/it]

1360
Accessing OpenAI API


 18%|█▊        | 2/11 [00:04<00:21,  2.41s/it]

1254
Accessing OpenAI API


 27%|██▋       | 3/11 [00:07<00:20,  2.58s/it]

1385
Accessing OpenAI API


 36%|███▋      | 4/11 [00:10<00:20,  2.87s/it]

1323
Accessing OpenAI API


 45%|████▌     | 5/11 [00:13<00:15,  2.57s/it]

1482
Accessing OpenAI API


 55%|█████▍    | 6/11 [00:14<00:11,  2.34s/it]

1409
Accessing OpenAI API


 64%|██████▎   | 7/11 [00:17<00:10,  2.53s/it]

1439
Accessing OpenAI API


 73%|███████▎  | 8/11 [00:20<00:07,  2.56s/it]

1196
Accessing OpenAI API


 82%|████████▏ | 9/11 [00:22<00:04,  2.50s/it]

1269
Accessing OpenAI API


 91%|█████████ | 10/11 [00:26<00:02,  2.83s/it]

1229
Saved progress for 10 genesets
Accessing OpenAI API


100%|██████████| 11/11 [00:29<00:00,  2.64s/it]

1668





In [4]:
# check if there is any None in the analysis column, then rerun the pipeline

initialize = False 

SEED = 42
# model_options = ['gemini-pro','mistral:7b', 'mixtral:latest', 'llama2:7b', 'llama2:70b']
model_options = ['gemini-pro']  # llama2 7b has formatting issue, ingore 
input_sep = '\t'

if __name__ == "__main__":
    for m in model_options:
        model = m
        
        if '-' in model:
            name_fix = '_'.join(model.split('-')[:2])
        else:
            name_fix = model.replace(':', '_')
        input_file = f'data/GO_term_analysis/LLM_processed_toy_example_w_contamination_{name_fix}.tsv' # replace with your actual input file
        out_file = f'data/GO_term_analysis/LLM_processed_toy_example_w_contamination_{name_fix}'  # save to the same file name as the input file
        LOG_FILE = config['LOG_NAME']+f'_{name_fix}'+'.log'

        df = pd.read_csv(input_file, sep=input_sep, index_col=set_index)
        # print(df.head())
        column_prefix = name_fix + '_default' #this is default
        print(column_prefix)
        
        gene_column = constant.GO_GENE_COL
        print(gene_column)
        if initialize:
            # initialize the input file with llm names, analysis and score to None
            df[f'{column_prefix} Name'] = None
            df[f'{column_prefix} Analysis'] = None
            df[f'{column_prefix} Score'] = None
        main(df)  ## run with the real set 
        
        ## run the pipeline for contaiminated gene sets 
        contaminated_columns = [col for col in df.columns if col.endswith('contaminated_Genes')]
        # print(contaminated_columns)
        for col in contaminated_columns:
            gene_column = col ## Note need to change the gene_column to the contaminated column
            print(gene_column)
            contam_prefix = '_'.join(col.split('_')[0:2])
            column_prefix = name_fix + '_' +contam_prefix
        
            print(column_prefix)

            if initialize:
                # initialize the input file with llm names, analysis and score to None
                df[f'{column_prefix} Name'] = None
                df[f'{column_prefix} Analysis'] = None
                df[f'{column_prefix} Score'] = None
            main(df)
            
print("Done")

gemini_pro_default
Genes


100%|██████████| 11/11 [00:00<00:00, 8918.88it/s]


50perc_contaminated_Genes
gemini_pro_50perc_contaminated


100%|██████████| 11/11 [00:00<00:00, 38738.32it/s]


100perc_contaminated_Genes
gemini_pro_100perc_contaminated


100%|██████████| 11/11 [00:00<00:00, 44192.86it/s]

Done





## Run for the 100 sets

In [5]:
## set up parameters for running the pipeline for every 50 rows
import os 
from glob import glob
# Define start, step, and end values
start = 0
step = 50
end = 100

# Create a range list
range_list = list(range(start, end + step, step))

# Create tuples for each consecutive pair in the list
tuple_list = [(range_list[i], range_list[i+1]) for i in range(len(range_list)-1)]


initialize = True 
input_file = 'data/GO_term_analysis/model_comparison_terms.csv'
input_sep = constant.GO_FILE_SEP
set_index = constant.GO_INDEX_COL  
gene_column = constant.GO_GENE_COL 
gene_sep = ' '

## create a param file 
configs = glob('./jsonFiles/model_comparison_*.json')
params = []
for start, end in tuple_list:
    for conf_file in configs:
        model_names = '_'.join(conf_file.split('/')[-1].split('.')[0].split('_')[1:])
        print(model_names)
        
        out_file = f'data/GO_term_analysis/model_compare/LLM_processed_model_compare_{model_names}_{start}_{end}'  
        param = f"--config {conf_file} \
            --initialize \
            --input {input_file} \
            --input_sep  '{input_sep}'\
            --set_index {set_index} \
            --gene_column {gene_column}\
            --gene_sep '{gene_sep}' \
            --run_contaminated \
            --start {start} \
            --end {end} \
            --output_file {out_file}"
        print(param)
        params.append(param)
print('number of params: ', len(params))
    

with open('model_compare_params.txt', 'w') as f:
    for p in params:
        f.write(p+'\n')

comparison_gemini_pro
--config ./jsonFiles/model_comparison_gemini_pro.json             --initialize             --input data/GO_term_analysis/model_comparison_terms.csv             --input_sep  ','            --set_index GO             --gene_column Genes            --gene_sep ' '             --run_contaminated             --start 0             --end 50             --output_file data/GO_term_analysis/model_compare/LLM_processed_model_compare_comparison_gemini_pro_0_50
comparison_mixtral_instruct
--config ./jsonFiles/model_comparison_mixtral_instruct.json             --initialize             --input data/GO_term_analysis/model_comparison_terms.csv             --input_sep  ','            --set_index GO             --gene_column Genes            --gene_sep ' '             --run_contaminated             --start 0             --end 50             --output_file data/GO_term_analysis/model_compare/LLM_processed_model_compare_comparison_mixtral_instruct_0_50
comparison_llama2_70b
--config ./j

## Checkout and combine the output from the batch run 

In [6]:
from glob import glob
import pandas as pd
import json

processed_files = glob('data/GO_term_analysis/model_compare/LLM_processed_model_compare*.tsv')
# processed_files
# check any with None in the analysis column
for file in processed_files:
    model_names = '_'.join(file.split('/')[-1].split('.')[0].split('_')[-4:])
    
    df = pd.read_csv(file, sep='\t')
    # column names end with Analysis
    analysis_cols = [col for col in df.columns if col.endswith('Analysis')]
    for col in analysis_cols:
        if df[col].isna().sum() > 0:
            n_none = df[col].isna().sum()
            print(f'{model_names} {col} has {n_none} None in the analysis column')
        else:
            print(f'{model_names} {col} pass')
        print('-----------------------')
    

    

compare_100set_gpt_4 gpt_4_default Analysis pass
-----------------------
compare_100set_gpt_4 gpt_4_50perc_contaminated Analysis pass
-----------------------
compare_100set_gpt_4 gpt_4_100perc_contaminated Analysis pass
-----------------------
compare_100set_gemini_pro gemini_pro_default Analysis pass
-----------------------
compare_100set_gemini_pro gemini_pro_50perc_contaminated Analysis pass
-----------------------
compare_100set_gemini_pro gemini_pro_100perc_contaminated Analysis pass
-----------------------
compare_100set_mixtral_instruct mixtral_instruct_default Analysis pass
-----------------------
compare_100set_mixtral_instruct mixtral_instruct_50perc_contaminated Analysis pass
-----------------------
compare_100set_mixtral_instruct mixtral_instruct_100perc_contaminated Analysis pass
-----------------------
compare_100set_llama2_70b llama2_70b_default Analysis pass
-----------------------
compare_100set_llama2_70b llama2_70b_50perc_contaminated Analysis pass
------------------

In [5]:
## combine the 0-50 and 50-100 files together
from glob import glob
import pandas as pd
import json

processed_files = glob('data/GO_term_analysis/model_compare/LLM_processed_model_compare*.tsv')

# model_names = ['mixtral_instruct']
for file in processed_files:
    model_name = '_'.join(file.split('/')[-1].split('.')[0].split('_')[-4:-2])
    model_names.append(model_name)
model_names = list(set(model_names))

for model in model_names:
    print(model)
    files = [file for file in processed_files if model in file]
    print(files)
    df = pd.concat([pd.read_csv(file, sep='\t', index_col='GO') for file in files])
    
    # add the toy example in as well 
    toy_file = f'data/GO_term_analysis/LLM_processed_toy_example_w_contamination_{model}.tsv'
    
    df = pd.concat([df, pd.read_csv(toy_file, sep='\t', index_col='GO')])
    # check any with None in the analysis column
    analysis_columns = [col for col in df.columns if col.endswith('Analysis')]
    for col in analysis_columns:
        if df[col].isna().sum() > 0:
            n_none = df[col].isna().sum()
            print(f'{model} {col} has {n_none} None in the analysis column')
    
    print(df.shape)
    df.to_csv(f'data/GO_term_analysis/model_compare/LLM_processed_model_compare_100set_{model}.tsv', sep='\t', index=True)
    print('------------saved--------------')

In [7]:
##check for each 100 set file, how many 'systems of unrelated proteins' are assigened to each gene set 
from glob import glob
import pandas as pd
import json

files = glob('data/GO_term_analysis/model_compare/LLM_processed_model_compare_100set*.tsv')
unnamed_dict = {}
model_names = []

for file in files:
    model_name = '_'.join(file.split('/')[-1].split('.')[0].split('_')[-2:])
    model_names.append(model_name)
    df = pd.read_csv(file, sep='\t', index_col='GO')
    name_columns = [col for col in df.columns if col.endswith('Name')]
    
    for col in name_columns:
        gene_set_type = col.split(' ')[0]
        # print(gene_set_type)
        #number of names contains 'unrelated proteins'
        n_unrelated = df[col].str.contains('unrelated proteins').sum()
        n_total = df.shape[0]
        print(f'{gene_set_type} has {n_unrelated} gene sets named with unrelated proteins, {n_unrelated/n_total*100:.2f}%')
        unnamed_dict[f'{gene_set_type}'] = {'n_unrelated': n_unrelated, 'n_named': n_total-n_unrelated}
    score_columns = [col for col in df.columns if col.endswith('Score')]
    for c in score_columns:
        gene_set_type = c.split(' ')[0]
        # print(gene_set_type)
        # number of scores are 0
        n_zero = df[c].eq(0).sum()
        n_total = df.shape[0]
        print(f'{gene_set_type} has {n_zero} gene sets with score 0, {n_zero/n_total*100:.2f}%')
        
    print('------------------')

gpt_4_default has 4 gene sets named with unrelated proteins, 4.00%
gpt_4_50perc_contaminated has 28 gene sets named with unrelated proteins, 28.00%
gpt_4_100perc_contaminated has 87 gene sets named with unrelated proteins, 87.00%
gpt_4_default has 4 gene sets with score 0, 4.00%
gpt_4_50perc_contaminated has 28 gene sets with score 0, 28.00%
gpt_4_100perc_contaminated has 87 gene sets with score 0, 87.00%
------------------
gemini_pro_default has 20 gene sets named with unrelated proteins, 20.00%
gemini_pro_50perc_contaminated has 41 gene sets named with unrelated proteins, 41.00%
gemini_pro_100perc_contaminated has 59 gene sets named with unrelated proteins, 59.00%
gemini_pro_default has 20 gene sets with score 0, 20.00%
gemini_pro_50perc_contaminated has 41 gene sets with score 0, 41.00%
gemini_pro_100perc_contaminated has 59 gene sets with score 0, 59.00%
------------------
mixtral_instruct_default has 0 gene sets named with unrelated proteins, 0.00%
mixtral_instruct_50perc_contamin

In [11]:
## check the time for each model 
from glob import glob
import json
log_files = glob('./logs/model_comparison_*.log')
# print(log_files)
models = ['gpt_4', 'gemini_pro']
# , 'mixtral_instruct','llama2_70b']
total_time_per_run = 0
for model in models:
    logs = [file for file in log_files if model in file]
    time = 0
    runs = 0
    for log in logs:
        with open(log, 'r') as f:
            data = json.load(f)
        time += data['time_taken_total']
        runs += data['runs']
        if  model == 'gpt_4':
            cost = data['dollars_spent']/data['runs']
    time_per_run = time/runs
    if model in ['mixtral_instruct','llama2_70b']:
        time_per_run = time_per_run/ 1e9 
        
    print(f'{model} takes {time_per_run :.2f} seconds per run')
    if  model == 'gpt_4':
        print(f'{model} takes {cost} dollars per run')
    total_time_per_run += time_per_run

with open ('/cellar/users/mhu/Projects/llm_evaluation_for_gene_set_interpretation/logs/toy_example_gpt35_.log', 'r') as f:
    data = json.load(f)
    time = data['time_taken_total']
    runs = data['runs']
    time_per_run = time/runs  
    total_time_per_run += time_per_run 
    cost = data['dollars_spent']/runs
print(f'gpt 3.5 takes {time_per_run:.2f} seconds per run')  
print(f'gpt 3.5 takes {cost} dollars per run')
print('average time usage: ', total_time_per_run/5)


ZeroDivisionError: division by zero