# Retrieve missing (davinci) modelnames and conditions

For some of the really expensive davinci runs we didn't run the evaluation as we somehow didn't correctly parse the model name.

To check which models we could re-evaluate we use the `https://api.openai.com/v1/fine-tunes` endpoints to create `json` files with all the models.

In [2]:
from glob import glob

from fastcore.utils import load_pickle
from fastcore.xtras import loads

In [3]:
already_evaluated = glob("results/polymer_classification/*polymers*.pkl")

In [4]:
train_filenames = [load_pickle(f)['train_filename'] for f in already_evaluated]

In [11]:
with open('run_files/20220829_fine_tunes.json', 'r') as f:
    models = loads(f.read())

with open('run_files/20220831_fine_tunes_gmail.json', 'r') as f:
    models_gmail = loads(f.read())

In [6]:
models['data'][0]['training_files'][0]['filename']

'run_files/2022-08-17-20-10-17_train_prompts_polymers_200.jsonl'

In [8]:
models['data'][0]['fine_tuned_model']

'ada:ft-lsmoepfl-2022-08-18-00-13-00'

In [12]:
train_filenames = {m['training_files'][0]['filename']: m['fine_tuned_model'] for m in models['data']}
train_filenames_gmail = {m['training_files'][0]['filename']: m['fine_tuned_model'] for m in models_gmail['data']}


In [13]:
train_filenames_gmail

{'run_files/2022-08-22-13-13-25_train_prompts_polymers_10.jsonl': 'ada:ft-personal-2022-08-22-18-14-27',
 'run_files/2022-08-22-13-15-07_train_prompts_polymers_10.jsonl': 'ada:ft-personal-2022-08-22-18-16-05',
 'run_files/2022-08-22-13-16-40_train_prompts_polymers_50.jsonl': 'ada:ft-personal-2022-08-22-18-18-57',
 'run_files/2022-08-22-13-19-34_train_prompts_polymers_50.jsonl': 'ada:ft-personal-2022-08-22-18-20-51',
 'run_files/2022-08-22-13-21-28_train_prompts_polymers_100.jsonl': 'ada:ft-personal-2022-08-22-18-23-13',
 'run_files/2022-08-22-13-24-19_train_prompts_polymers_100.jsonl': 'ada:ft-personal-2022-08-22-18-26-06',
 'run_files/2022-08-22-13-26-47_train_prompts_polymers_200.jsonl': 'ada:ft-personal-2022-08-22-18-29-29',
 'run_files/2022-08-22-13-30-05_train_prompts_polymers_200.jsonl': 'ada:ft-personal-2022-08-22-18-32-50',
 'run_files/2022-08-22-13-33-25_train_prompts_polymers_500.jsonl': 'ada:ft-personal-2022-08-22-18-39-00',
 'run_files/2022-08-22-13-39-38_train_prompts_poly

In [25]:
models_to_evaluate = [m for m in models['data'] if m['training_files'][0]['filename'] not in train_filenames]

In [26]:
models_to_evaluate

[{'object': 'fine-tune',
  'id': 'ft-jK0ziTZX6y5d2DXbB3kdct4w',
  'hyperparams': {'n_epochs': 4,
   'batch_size': 1,
   'prompt_loss_weight': 0.1,
   'learning_rate_multiplier': 0.1},
  'organization_id': 'org-TFRJXw3PPQocOWbu71eI2t9U',
  'model': 'ada',
  'training_files': [{'object': 'file',
    'id': 'file-5iALgqTwcEm1wMYcfIciXCPl',
    'purpose': 'fine-tune',
    'filename': 'run_files/2022-08-17-20-10-17_train_prompts_polymers_200.jsonl',
    'bytes': 26684,
    'created_at': 1660781422,
    'status': 'processed',
    'status_details': None}],
  'validation_files': [{'object': 'file',
    'id': 'file-Y5nT2jV8zINhQxMXObQ8Mmt3',
    'purpose': 'fine-tune',
    'filename': 'run_files/2022-08-17-20-10-17_valid_prompts_polymers_2925.jsonl',
    'bytes': 392066,
    'created_at': 1660781424,
    'status': 'processed',
    'status_details': None}],
  'result_files': [{'object': 'file',
    'id': 'file-vDqt84z3b4DSgcdm7lmQGSf0',
    'purpose': 'fine-tune-results',
    'filename': 'compile