<a href="https://colab.research.google.com/github/sparks-baird/self-driving-lab-demo/blob/main/notebooks/ac-2023/bayes-opt/1.0-sgb-clslab-light-simple.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quick Start

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [10]:
import pandas as pd
raw_df = pd.read_csv("cookie-dataset/choc_chip_cookie_ingredients.csv")
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1990 entries, 0 to 1989
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    1990 non-null   int64  
 1   Ingredient    1990 non-null   object 
 2   Text          1990 non-null   object 
 3   Recipe_Index  1990 non-null   object 
 4   Rating        980 non-null    float64
 5   Quantity      1990 non-null   float64
 6   Unit          1990 non-null   object 
dtypes: float64(2), int64(1), object(4)
memory usage: 109.0+ KB


In [36]:
# collate the data for each recipe based on the "Recipe_Index" key. The data should be a list of dictionaries, where each dictionary has the ingredient name, quantity, and unit. The rating should be stored in a separate column.
df = raw_df.groupby("Recipe_Index").apply(lambda x: x[["Ingredient", "Quantity", "Unit", "Rating"]].to_dict(orient="records")).reset_index(name="data")
# for each row (recipe) expand the data column into separate columns based on the Ingredient key
df["rating"] = df["data"].apply(lambda x: x[0]["Rating"])

# drop rows where the rating is NaN
df = df.dropna(subset=["rating"])

# find all unique ingredients
ingredients = set()
for row in df["data"]:
    for ingredient in row:
        ingredients.add(ingredient["Ingredient"])

ingredients

{'all purpose flour',
 'almond extract',
 'almonds',
 'applesauce',
 'baking powder',
 'baking soda',
 'bittersweet chocolate chip',
 'bourbon',
 'bread flour',
 'brown rice flour',
 'butter',
 'cake flour',
 'cake mix',
 'cinnamon',
 'coconut',
 'coconut extract',
 'cookie mix',
 'corn syrup',
 'cream',
 'crispy rice',
 'dark chocolate chip',
 'egg',
 'graham cracker',
 'honey',
 'instant coffee',
 'light brown sugar',
 'liquer',
 'macadmia',
 'maple',
 'margarine',
 'marshmallows',
 'milk',
 'milk chocolate chip',
 'nestle',
 'nuts',
 'oat',
 'peanut butter',
 'peanut butter chips',
 'pecan',
 'pudding mix',
 'raisins',
 'salt',
 'semisweet chocolate chip',
 'shortening',
 'sour cream',
 'sugar',
 'tartar',
 'toffee',
 'vanilla',
 'vegetable oil',
 'walnut',
 'water',
 'wheat',
 'white chocolate chip',
 'white pepper',
 'xanthan gum',
 'zucchini'}

In [42]:
# extract the sugar quantity from the sugar ingredient
def extract_sugar_quantity(row):
    for ingredient in row:
        if ingredient["Ingredient"] == "sugar":
            return ingredient["Quantity"]
    return None

# extract the sugar unit from the sugar ingredient
def extract_sugar_unit(row):
    for ingredient in row:
        if ingredient["Ingredient"] == "sugar":
            return ingredient["Unit"]
    return None

df["sugar"] = df["data"].apply(extract_sugar_quantity)
df["sugar_unit"] = df["data"].apply(extract_sugar_unit) # all in cups

sugar_unit
cup    87
Name: count, dtype: int64

In [43]:
df["sugar"]

0     1.000000
1     0.800000
2     0.738462
3     0.333333
4     0.400000
        ...   
94    1.200000
95    0.960000
96    4.000000
97    1.000000
98    0.600000
Name: sugar, Length: 98, dtype: float64

## Setup

### Create the `parameters` dictionary
For the sake of simplicity, we'll describe the added sugar as a weight percentage. The lower bound for the sugar content is 0, and we'll arbitrarily set the upper bound to 25% (see `bounds`).

In [None]:
bounds = {"sugar_frac": (0.0, 0.25)}

#### Examples

<details><summary>Branin equation</summary>

```python
parameters = [
        {
            "name": "x1",
            "type": "range",
            "bounds": [-5.0, 10.0],
        },
        {
            "name": "x2",
            "type": "range",
            "bounds": [0.0, 10.0],
        },
    ]
```

</details>

<details><summary>Hartmann 6 equation</summary>

```python
parameters = [
        {
            "name": "x1",
            "type": "range",
            "bounds": [0.0, 1.0],
            "value_type": "float",  # Optional, defaults to inference from type of "bounds".
            "log_scale": False,  # Optional, defaults to False.
        },
        {
            "name": "x2",
            "type": "range",
            "bounds": [0.0, 1.0],
        },
        {
            "name": "x3",
            "type": "range",
            "bounds": [0.0, 1.0],
        },
        {
            "name": "x4",
            "type": "range",
            "bounds": [0.0, 1.0],
        },
        {
            "name": "x5",
            "type": "range",
            "bounds": [0.0, 1.0],
        },
        {
            "name": "x6",
            "type": "range",
            "bounds": [0.0, 1.0],
        },
    ]
```

</details>

In [None]:
parameters = ... # insert your code here

### AxClient

In [None]:
from ax.service.ax_client import AxClient

ax_client = AxClient()
ax_client.create_experiment(
    name = "ac-2023-tutorial",
    parameters = ..., # insert your code here
    minimize = ..., # insert your code here
)

### Optimization loop

In [None]:
for _ in range(15):
    parameters, trial_index = ax_client.get_next_trial()
    raw_data = ... # insert your code here
    ax_client.complete_trial(trial_index=trial_index, raw_data=raw_data)

### Best parameters

In [None]:
best_parameters, metrics = ax_client.get_best_parameters()