In [14]:
from evals.utils import load_secrets

secrets = load_secrets("SECRETS")
try:
    org = secrets["DEFAULT_ORG"]
    openai_api_key = secrets["OPENAI_API_KEY"]
except KeyError:
    print(f"Organization or API key not found in secrets")
    raise

import openai
prompt = [{'role': 'system', 'content': ''}, {'role': 'user', 'content': 'What is the next word in the following text? Respond only with that single word and nothing else, including punctuation.\nHurfeish (Arabic: حرفيش; Hebrew: חֻרְפֵישׁ; lit. "milk thistle" or possibly from "snake" ) is a  Druze\n'}]
openai.api_key = openai_api_key
response = openai.ChatCompletion.create(
    model="gpt-4o-mini",
    # messages=[{"role": "user", "content": "Hello you old wrinkly herron"}],
    messages=prompt,
    organization=org
)

response

<OpenAIObject chat.completion id=chatcmpl-AdDlhj4zX3VJ1YI3Zu7RHoYLojKDl at 0x167bef0b0> JSON: {
  "id": "chatcmpl-AdDlhj4zX3VJ1YI3Zu7RHoYLojKDl",
  "object": "chat.completion",
  "created": 1733911081,
  "model": "gpt-4o-mini-2024-07-18",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "village",
        "refusal": null
      },
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 76,
    "completion_tokens": 2,
    "total_tokens": 78,
    "prompt_tokens_details": {
      "cached_tokens": 0,
      "audio_tokens": 0
    },
    "completion_tokens_details": {
      "reasoning_tokens": 0,
      "audio_tokens": 0,
      "accepted_prediction_tokens": 0,
      "rejected_prediction_tokens": 0
    }
  },
  "system_fingerprint": "fp_bba3c8e70b"
}

```bash
python -m scripts.sweep_full_study \
--study_name="full_sweep_test" \
--model_configs="gpt-3.5-turbo" \
--val_only_model_configs="gpt-4o" \
--tasks='{"wikipedia": ["identity", "sentiment"]}' \
--prompt_configs="minimal" \
--val_tasks='{"number_triplets": ["identity", "sentiment"]}' \
--n_object_train=30_000 \
--n_object_val=250 \
--n_meta_val=50
```

## Exp1

### Curriculum

Stage 1 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp1" \
  --model_configs="gpt-4o-mini" \
  --val_only_model_configs="gpt-4o" \
  --tasks='{"wikipedia": ["identity","is_even","number_of_tokens"], "countries_long": ["first_character"], "colors_long": ["third_character"], "wealth_seeking": ["matches_wealth_seeking"], "power_seeking": ["matches_power_seeking"], "arc_challenge_non_cot": ["identity", "is_either_a_or_c", "is_either_b_or_d"]}' \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_train=5000 \
  --n_object_val=250 \
  --n_meta_val=50 \
  --task_order='["wikipedia", "countries_long", "colors_long", "wealth_seeking", "power_seeking", "arc_challenge_non_cot"]' \
  --finetuning_overrides='{"gpt-4o-mini":{"epochs":1,"learning_rate":2,"batch_size":20},"gpt-4o":{"epochs":1,"learning_rate":2,"batch_size":20}}'
```



Stage 2 of curriculum
```bash
python -m evals.run_finetuning study_name=curriculum_exp1/gpt-4o train_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp1/gpt-4o-mini/train_dataset.jsonl val_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp1/gpt-4o-mini/val_dataset.jsonl language_model=gpt-4o-2024-08-06 notes= epochs=1 learning_rate=2 batch_size=20
```

Stage 3 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp1" \
  --val_only_model_configs="finetuned/curriculum_exp1/gpt-4o/ft_gpt-4o-2024-08-06_personal__Af6zpWIK" \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_val=250 \
  --n_meta_val=50 \
  --skip_finetuning
```

### Without curriculum

Stage 1 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp1" \
  --model_configs="gpt-4o-mini" \
  --tasks='{"wikipedia": ["identity","is_even","number_of_tokens"], "countries_long": ["first_character"], "colors_long": ["third_character"], "wealth_seeking": ["matches_wealth_seeking"], "power_seeking": ["matches_power_seeking"], "arc_challenge_non_cot": ["identity", "is_either_a_or_c", "is_either_b_or_d"]}' \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_train=5000 \
  --n_object_val=250 \
  --n_meta_val=50 \
  --finetuning_overrides='{"gpt-4o-mini":{"epochs":1,"learning_rate":2,"batch_size":20},"gpt-4o":{"epochs":1,"learning_rate":2,"batch_size":20}}'
```

Stage 2 of curriculum
```bash
python -m evals.run_finetuning study_name=curriculum_exp1/gpt-4o train_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp1/gpt-4o-mini/train_dataset.jsonl val_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp1/gpt-4o-mini/val_dataset.jsonl language_model=gpt-4o-2024-08-06 notes= epochs=1 learning_rate=2 batch_size=20
```

Stage 3 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp1" \
  --val_only_model_configs="finetuned/curriculum_exp1/gpt-4o/ft_gpt-4o-2024-08-06_personal__Af6zpWIK" \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_val=250 \
  --n_meta_val=50 \
  --skip_finetuning
```

## Exp4

### Curriculum

Stage 1 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp4" \
  --model_configs="gpt-4o-mini" \
  --val_only_model_configs="gpt-4o-mini" \
  --tasks='{"wikipedia": ["identity","is_even","number_of_tokens"], "countries_long": ["first_character"], "colors_long": ["third_character"], "wealth_seeking": ["matches_wealth_seeking"], "power_seeking": ["matches_power_seeking"], "arc_challenge_non_cot": ["identity", "is_either_a_or_c", "is_either_b_or_d"]}' \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_train=2500 \
  --n_object_val=250 \
  --n_meta_val=100 \
  --property_order='["identity", "first_character", "second_character", "third_character", "is_even", "number_of_tokens", "is_either_a_or_c", "is_either_b_or_d", "matches_wealth_seeking", "matches_power_seeking"]' \
  --finetuning_overrides='{"gpt-4o-mini":{"epochs":1,"learning_rate":2,"batch_size":20},"gpt-4o":{"epochs":1,"learning_rate":2,"batch_size":20},"llama-8b-fireworks":{"epochs":1,"lora_rank":32,"batch_size":16}}'
```



Stage 2 of curriculum
```bash
python -m evals.run_finetuning study_name=curriculum_exp4/gpt-4o train_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/train_dataset.jsonl val_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/val_dataset.jsonl language_model=gpt-4o notes= epochs=1 learning_rate=2 batch_size=20
```

Stage 3 of curriculum
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp4" \
  --val_only_model_configs="finetuned/curriculum_exp4/gpt-4o/ft_gpt-4o-2024-08-06_personal__AfVQ5JfD" \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_val=250 \
  --n_meta_val=50 \
  --skip_finetuning
```

### Without curriculum


Finetuning mini again without curriculum
```bash
python -m evals.run_finetuning study_name=curriculum_exp4/gpt-4o-mini train_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/train_dataset_shuffled.jsonl val_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/val_dataset.jsonl language_model=gpt-4o-mini notes= epochs=1 learning_rate=2 batch_size=20
```

Run gpt-4o-mini finetuned on shuffled data over evals
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp4" \
  --val_only_model_configs="finetuned/curriculum_exp4/gpt-4o-mini/ft_gpt-4o-mini-2024-07-18_personal__AfVvZIoz" \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_val=250 \
  --n_meta_val=50 \
  --skip_finetuning
```


Finetuning gpt4o again without curriculum
```bash
python -m evals.run_finetuning study_name=curriculum_exp4/gpt-4o train_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/train_dataset_shuffled.jsonl val_path=/Users/pbu5262/Documents/python_scripts/introspection_self_prediction/exp/finetuning/curriculum_exp4/gpt-4o-mini/val_dataset.jsonl language_model=gpt-4o notes= epochs=1 learning_rate=2 batch_size=20
```

Run gpt-4o finetuned on shuffled data over evals
```bash
python -m scripts.sweep_full_study \
  --study_name="curriculum_exp4" \
  --val_only_model_configs="finetuned/curriculum_exp4/gpt-4o/ft_gpt-4o-2024-08-06_personal__AfWML6jg" \
  --val_tasks='{"survival_instinct": ["matches_survival_instinct"], "myopic_reward": ["matches_myopic_reward"], "animals_long": ["first_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "mmlu_non_cot": ["is_either_a_or_c", "is_either_b_or_d"], "english_words_long": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"], "stories_sentences": ["first_character", "second_character", "third_character", "first_and_second_character", "first_word", "second_word", "starts_with_vowel", "third_word"]}' \
  --prompt_configs="minimal" \
  --n_object_val=250 \
  --n_meta_val=100 \
  --skip_finetuning
```