In [5]:
## Load required modules

import pandas as pd
import numpy as np
import itertools

from sklearn.cluster import KMeans

from sklearn.utils.extmath import cartesian

from sklearn.decomposition import PCA

from scipy.cluster.vq import vq

import matplotlib.pyplot as plt
import seaborn as sns

import GPyOpt

# Seed dataset selection

## Define possible dataset

In [7]:
# Generate possible dataset

# Creates ranges of possible values for excipients
T20 = np.arange(0,6,1)
T80 = np.arange(0,6,1)
P188 = np.arange(0,6,1)
DMSO = np.arange(0,6,1)
PG = np.arange(0,6,1)

# Create a dataframe of all the possible excipient combinations
possiblequeries = pd.DataFrame(cartesian((T20,T80,P188,DMSO,PG)))

possiblequeries.columns = ["T20","T80", "P188", "DMSO","PG"]

possiblequeries

Unnamed: 0,T20,T80,P188,DMSO,PG
0,0,0,0,0,0
1,0,0,0,0,1
2,0,0,0,0,2
3,0,0,0,0,3
4,0,0,0,0,4
...,...,...,...,...,...
7771,5,5,5,5,1
7772,5,5,5,5,2
7773,5,5,5,5,3
7774,5,5,5,5,4


## KMeans select seed dataset

In [8]:
# Use kmeans to select a seed dataset

kmeans = KMeans(n_clusters=96, random_state=42) # n_clusters = number of datapoints to investigate

kmeans.fit(possiblequeries.values)

labels = kmeans.predict(possiblequeries)
centroids = kmeans.cluster_centers_

# Get the closest point to each centroid
closest, distances = vq(centroids, possiblequeries.values)

# Output the corresponding rows from the possible queries dataframe
closest_points = possiblequeries.iloc[closest]

closest_points

Unnamed: 0,T20,T80,P188,DMSO,PG
5882,4,3,1,2,2
7046,5,2,3,4,2
2251,1,4,2,3,1
3400,2,3,4,2,4
1856,1,2,3,3,2
...,...,...,...,...,...
7514,5,4,4,4,2
2419,1,5,1,1,1
406,0,1,5,1,4
6334,4,5,1,5,4


In [9]:
# Remove selected samples from possiblequeries
possiblequeries_update = possiblequeries[~possiblequeries.index.isin(closest_points.index)]

# Save new list of untested formulations
possiblequeries_update.to_csv("possiblequeries_seed_update.csv")

# Analyse seed data

In [269]:
plate1_file = pd.read_csv("seed_data-plate1.CSV",
                         sep = ';', skiprows=5)

plate2_file = pd.read_csv("seed_data-plate2.CSV",
                          sep=';',skiprows=5)

plate3_file = pd.read_csv("seed_data-plate3.CSV",
                          sep=';',skiprows=5)

In [270]:
# Set the index of the file to "Content"
plate1_file = plate1_file.set_index("Content").drop("Well",axis=1)
plate2_file = plate2_file.set_index("Content").drop("Well",axis=1)
plate3_file = plate3_file.set_index("Content").drop("Well",axis=1)

In [271]:
def process_plate(plate_file, wavelength):
    # Rotate the dataframe
    plate_file_T = plate_file.T    
    # Convert all wavelength values to numbers
    plate_file_T['Wavelength [nm]'] = pd.to_numeric(plate_file_T['Wavelength [nm]'])    
    # Extract the column of our wavelength
    wavelength_column = plate_file_T[plate_file_T["Wavelength [nm]"] == wavelength].T   
    # Rename the column
    wavelength_column.columns = wavelength_column.loc["Wavelength [nm]"]    
    # Remove the old row for wavelength (as its now the column name)
    wavelength_column = wavelength_column.drop("Wavelength [nm]")   
    ## Blank correcting this time because I forgot to do that
    wavelength_column[wavelength] -= 0.0383
    # Copies the index into a new column
    wavelength_column['Content'] = wavelength_column.index
    # Splits the 'Content' column into a new column called 'Formulation'
    wavelength_column['Formulation'] = wavelength_column['Content'].str.split('X').str[1]
    wavelength_column['Formulation'] = pd.to_numeric(wavelength_column['Formulation'])
    # Sorts by formulation number (this will then map onto the order tonton made the formulations in)
    wavelength_column = wavelength_column.sort_values(by='Formulation')
    # Reset the index
    wavelength_column.reset_index(drop=True, inplace=True)
    return wavelength_column[wavelength].values

In [272]:
# Set wavelength
wavelength = 435

absorbance_p1 = process_plate(plate1_file, wavelength)
absorbance_p2 = process_plate(plate2_file, wavelength)
absorbance_p3 = process_plate(plate3_file, wavelength)

In [273]:
# Combine seed results into a dataframe
sample_numbers = range(1, len(absorbance_p1) + 1)
combined_df = pd.DataFrame({
    'Sample Number': sample_numbers,
    'Plate1_Absorbance': absorbance_p1,
    'Plate2_Absorbance': absorbance_p2,
    'Plate3_Absorbance': absorbance_p3
})

# Calculate mean and standard deviation for samples
combined_df['Mean_Absorbance'] = combined_df[['Plate1_Absorbance', 'Plate2_Absorbance', 'Plate3_Absorbance']].mean(axis=1)
combined_df['Std_Absorbance'] = combined_df[['Plate1_Absorbance', 'Plate2_Absorbance', 'Plate3_Absorbance']].std(axis=1)


In [277]:
# Combine chosen datapoints and results into one dataframe

closest_points = closest_points.reset_index(drop=True)
combined_df = combined_df.reset_index(drop=True) 

combined_df = pd.concat([closest_points,combined_df], axis=1)

combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Plate1_Absorbance,Plate2_Absorbance,Plate3_Absorbance,Mean_Absorbance,Std_Absorbance
0,4,3,1,2,2,1,0.2357,0.1137,0.2577,0.202367,0.077571
1,5,2,3,4,2,2,0.1957,0.3127,0.2997,0.269367,0.064127
2,1,4,2,3,1,3,0.1407,0.2477,0.0847,0.1577,0.082819
3,2,3,4,2,4,4,0.1297,0.3887,0.1207,0.213033,0.152198
4,1,2,3,3,2,5,0.1327,0.1867,0.0767,0.132033,0.055003
...,...,...,...,...,...,...,...,...,...,...,...
91,5,4,4,4,2,92,0.1577,0.3997,0.4917,0.3497,0.172522
92,1,5,1,1,1,93,0.1477,0.1307,0.3837,0.2207,0.141418
93,0,1,5,1,4,94,0.0487,0.0447,0.1627,0.085367,0.067002
94,4,5,1,5,4,95,0.1677,0.1757,0.5017,0.2817,0.190568


In [331]:
## Create a master spreadsheet for all results

# Set round number (0=seed)
round_number = 0

master_results = pd.DataFrame({
    'T20':combined_df['T20'],
    'T80':combined_df['T80'],
    'P188':combined_df['P188'],
    'DMSO':combined_df['DMSO'],
    'PG':combined_df['PG'],
    'Rep 1 Absorbance':absorbance_p1,
    'Rep 2 Absorbance':absorbance_p2,
    'Rep 3 Absorbance':absorbance_p3,
    'Mean Absorbance':combined_df['Mean_Absorbance'],
    'Std Absorbance':combined_df['Std_Absorbance'],
    'Round Number':round_number
})

# master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0
...,...,...,...,...,...,...,...,...,...,...,...
91,5,4,4,4,2,0.1577,0.3997,0.4917,0.3497,0.172522,0
92,1,5,1,1,1,0.1477,0.1307,0.3837,0.2207,0.141418,0
93,0,1,5,1,4,0.0487,0.0447,0.1627,0.085367,0.067002,0
94,4,5,1,5,4,0.1677,0.1757,0.5017,0.2817,0.190568,0


# Seed Data Bayesian Optimisation 

In [334]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [335]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
y_init=np.array(-filtered_df['Mean_Absorbance'])

# Reshape into a 1x96 matrix
y_init=y_init.reshape(len(y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
x_init = np.array(filtered_df[columns_to_extract])

x_step = x_init.copy()
y_step = y_init.copy()

# Set up a minimums list, and find the first min
y_min = []
y_min.append(y_init.min())
y_min

[-0.3743666666666667]

In [336]:
np.random.seed(234567)

# Run the BO
bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = x_step, Y = y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [337]:
# Find the suggested next locations
x_next = bo_step.suggest_next_locations().astype(int)

# Merge x and y values together and continue
x_step=np.vstack((x_step, x_next)) # this will give array of all x values previously collected

x_next

array([[5, 5, 5, 0, 5],
       [5, 4, 3, 4, 1],
       [5, 4, 5, 5, 2],
       [5, 4, 5, 0, 5],
       [5, 4, 0, 5, 1],
       [4, 4, 4, 4, 1],
       [4, 5, 5, 3, 3],
       [3, 4, 4, 5, 2],
       [4, 4, 2, 3, 4],
       [4, 4, 0, 5, 2],
       [5, 5, 4, 2, 5],
       [5, 5, 5, 5, 0],
       [4, 5, 5, 3, 5],
       [4, 3, 0, 2, 1],
       [5, 4, 1, 0, 3],
       [5, 5, 5, 3, 0],
       [5, 2, 3, 5, 2],
       [5, 5, 1, 1, 1],
       [5, 0, 0, 4, 2],
       [5, 5, 0, 5, 5],
       [5, 3, 2, 2, 5],
       [5, 5, 3, 3, 1],
       [4, 3, 0, 3, 4],
       [5, 5, 5, 2, 5],
       [5, 4, 5, 4, 1],
       [5, 3, 4, 3, 0],
       [5, 5, 5, 2, 4],
       [5, 5, 0, 2, 3],
       [5, 4, 5, 4, 3],
       [4, 4, 0, 3, 4],
       [5, 4, 1, 4, 3]])

In [338]:
## Create a dataframe in format for robot
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round1_totest = pd.DataFrame(x_next, columns=column_names)

# Add blank row
round1_totest.loc[len(round1_totest)] = [0] * len(column_names)

# Repeat 3 times
round1_totest_robot = pd.concat([round1_totest] * 3, ignore_index=True)
round1_totest_robot.to_csv('round1_totest_robot.csv', index=False)

# Round 1

## Plate Processing

In [340]:
# New plate processing function for single plate rounds
def new_process_plate(plate_file, wavelength):
    # Rotate the dataframe
    plate_file_T = plate_file.T    
    # Convert all wavelength values to numbers
    plate_file_T['Wavelength [nm]'] = pd.to_numeric(plate_file_T['Wavelength [nm]'])    
    # Extract the column of our wavelength
    wavelength_column = plate_file_T[plate_file_T["Wavelength [nm]"] == wavelength].T   
    # Rename the column
    wavelength_column.columns = wavelength_column.loc["Wavelength [nm]"]    
    # Remove the old row for wavelength (as its now the column name)
    wavelength_column = wavelength_column.drop("Wavelength [nm]")
    # Copies the index into a new column
    wavelength_column['Content'] = wavelength_column.index
    # Splits the 'Content' column into a new column called 'Formulation'
    wavelength_column['Formulation'] = wavelength_column['Content'].str.split('X').str[1]
    wavelength_column['Formulation'] = pd.to_numeric(wavelength_column['Formulation'])
    # Sorts by formulation number (this will then map onto the order tonton made the formulations in)
    wavelength_column = wavelength_column.sort_values(by='Formulation')
    # Reset the index
    wavelength_column.reset_index(drop=True,inplace=True)

    # Returns a list of values (not a dataframe)
    return wavelength_column[wavelength].values

In [341]:
# Read the file
round1_plate = pd.read_csv("round1.csv",
                          sep=';',skiprows=5)

# Set the index of the file to "Content"
round1_plate = round1_plate.set_index("Content").drop("Well",axis=1)

wavelength = 435

round1_plate

Unnamed: 0_level_0,Blank corrected based on Raw Data (Abs Spectrum),Blank corrected based on Raw Data (Abs Spectrum).1,Blank corrected based on Raw Data (Abs Spectrum).2,Blank corrected based on Raw Data (Abs Spectrum).3,Blank corrected based on Raw Data (Abs Spectrum).4,Blank corrected based on Raw Data (Abs Spectrum).5,Blank corrected based on Raw Data (Abs Spectrum).6,Blank corrected based on Raw Data (Abs Spectrum).7,Blank corrected based on Raw Data (Abs Spectrum).8,Blank corrected based on Raw Data (Abs Spectrum).9,...,Blank corrected based on Raw Data (Abs Spectrum).148,Blank corrected based on Raw Data (Abs Spectrum).149,Blank corrected based on Raw Data (Abs Spectrum).150,Blank corrected based on Raw Data (Abs Spectrum).151,Blank corrected based on Raw Data (Abs Spectrum).152,Blank corrected based on Raw Data (Abs Spectrum).153,Blank corrected based on Raw Data (Abs Spectrum).154,Blank corrected based on Raw Data (Abs Spectrum).155,Blank corrected based on Raw Data (Abs Spectrum).156,Unnamed: 159
Content,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wavelength [nm],220,225,230,235,240,245,250,255,260,265,...,960.000000,965.000000,970.000000,975.000000,980.000000,985.000000,990.0000,995.000000,1000.000000,
Sample X1,-0.244,-0.242,-0.374,-0.344,-0.258,3.324,n.a.,-0.171,-0.053,-0.037,...,0.006000,0.005000,0.004000,0.004000,0.003000,0.005000,0.0040,0.004000,0.004000,
Sample X9,-0.335,-0.442,-0.008,-0.258,0.070,n.a.,n.a.,0.047,0.078,-0.055,...,0.005000,0.005000,0.004000,0.005000,0.004000,0.006000,0.0040,0.005000,0.005000,
Sample X17,-0.107,-0.145,-0.053,-0.183,-0.050,n.a.,n.a.,-0.035,n.a.,-0.022,...,0.028000,0.028000,0.028000,0.030000,0.030000,0.034000,0.0240,0.034000,0.032000,
Sample X25,0.048,0.094,-0.267,-0.017,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.161000,0.152000,0.157000,0.156000,0.162000,0.198000,0.1050,0.209000,0.191000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Blank B,,,,,,,,,,,...,,,,,,,,,,
Sample X70,0.405,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,0.034,...,0.001000,0.000667,0.000967,-0.000067,0.000533,-0.000067,0.0020,0.000167,0.000367,
Sample X78,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,0.097,...,0.002000,0.002000,0.002000,0.002000,0.001000,0.002000,0.0020,0.002000,0.002000,
Sample X86,0.584,0.425,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.000933,0.000567,0.000267,0.000533,0.000033,-0.000167,0.0002,-0.000133,0.000167,


In [342]:
# Process file data
round1_absorbance = new_process_plate(round1_plate, wavelength)

# Remove blank rows
round1_absorbance = np.delete(round1_absorbance, [95,94,93])
round1_absorbance

array([0.271, 0.159, 0.171, 0.319, 0.547, 0.17, 0.231, 0.119, 0.104,
       0.174, 0.179, 0.143, 0.158, 0.27, 0.171, 0.192, 0.332, 0.492,
       0.394, 0.247, 0.376, 0.145, 0.452, 0.142, 0.881, 0.736, 0.454,
       0.36, 0.459, 0.45, 0.34, 0.315, 0.271, 0.159, 0.293, 0.278, 0.318,
       0.593, 0.317, 0.333, 0.275, 0.432, 0.461, 0.25, 0.164, 0.109,
       0.299, 0.211, 0.204, 0.368, 0.359, 0.118, 0.215, 0.261, 0.339,
       0.374, 0.363, 0.145, 0.19, 0.156, 0.175, 0.148, 0.314, 0.452,
       0.426, 0.325, 0.31, 0.219, 0.139, 0.321, 0.303, 0.432, 0.339,
       0.381, 0.238, 0.308, 0.151, 0.245, 0.205, 0.205, 0.375, 0.222,
       0.263, 0.172, 0.226, 0.194, 0.579, 0.239, 0.574, 0.39, 0.283,
       0.485, 0.449], dtype=object)

In [343]:
# Arrange into dataframe
sample_numbers = range(1, 32)

round1_df = pd.DataFrame({
    'Sample Number':sample_numbers,
    'Rep 1 Absorbance':round1_absorbance[0:31],
    'Rep 2 Absorbance':round1_absorbance[31:62],
    'Rep 3 Absorbance':round1_absorbance[62:93],
})

round1_df['Mean Absorbance'] = round1_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].mean(axis=1)
round1_df['Std Absorbance'] = round1_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].std(axis=1)
round1_df['Round Number'] = 1

round1_df

Unnamed: 0,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,1,0.271,0.315,0.314,0.3,0.02512,1
1,2,0.159,0.271,0.452,0.294,0.147848,1
2,3,0.171,0.159,0.426,0.252,0.150808,1
3,4,0.319,0.293,0.325,0.312333,0.01701,1
4,5,0.547,0.278,0.31,0.378333,0.146943,1
5,6,0.17,0.318,0.219,0.235667,0.075395,1
6,7,0.231,0.593,0.139,0.321,0.240008,1
7,8,0.119,0.317,0.321,0.252333,0.115487,1
8,9,0.104,0.333,0.303,0.246667,0.12446,1
9,10,0.174,0.275,0.432,0.293667,0.130009,1


In [344]:
# Combine chosen datapoints and results into one dataframe

chosen_round1 = round1_totest.reset_index(drop=True)
chosen_round1 = chosen_round1.drop([31]).reset_index(drop=True) # Drop the blank input

round1_df = round1_df.reset_index(drop=True)

round1_combined_df = pd.concat([chosen_round1,round1_df], axis=1)
round1_combined_df.to_csv("round_1_results.csv", index=False)
round1_combined_df["Relative SD"]= (round1_combined_df["Std Absorbance"]/round1_combined_df["Mean Absorbance"])*100                                   
round1_combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,5,5,5,0,5,1,0.271,0.315,0.314,0.3,0.02512,1,8.373238
1,5,4,3,4,1,2,0.159,0.271,0.452,0.294,0.147848,1,50.2884
2,5,4,5,5,2,3,0.171,0.159,0.426,0.252,0.150808,1,59.844375
3,5,4,5,0,5,4,0.319,0.293,0.325,0.312333,0.01701,1,5.446041
4,5,4,0,5,1,5,0.547,0.278,0.31,0.378333,0.146943,1,38.839639
5,4,4,4,4,1,6,0.17,0.318,0.219,0.235667,0.075395,1,31.992016
6,4,5,5,3,3,7,0.231,0.593,0.139,0.321,0.240008,1,74.768951
7,3,4,4,5,2,8,0.119,0.317,0.321,0.252333,0.115487,1,45.767783
8,4,4,2,3,4,9,0.104,0.333,0.303,0.246667,0.12446,1,50.456825
9,4,4,0,5,2,10,0.174,0.275,0.432,0.293667,0.130009,1,44.270933


In [346]:
# Add results to master results sheet
round1_combined_df = round1_combined_df.drop("Sample Number", axis=1)

master_results = pd.concat([master_results,round1_combined_df],axis=0,ignore_index=True)
master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0,
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0,
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0,
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0,
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
122,5,5,5,2,4,0.454,0.145,0.574,0.391,0.22133,1,56.606156
123,5,5,0,2,3,0.36,0.19,0.39,0.313333,0.107858,1,34.422744
124,5,4,5,4,3,0.459,0.156,0.283,0.299333,0.152159,1,50.832597
125,4,4,0,3,4,0.45,0.175,0.485,0.37,0.169779,1,45.886289


## Round 1 Bayesian Optimisation

In [348]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [349]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
round1_y_init=np.array(-master_results['Mean Absorbance'])

# Reshape into a 1x31 matrix
round1_y_init=round1_y_init.reshape(len(round1_y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round1_x_init = np.array(master_results[columns_to_extract])

round1_x_step = round1_x_init.copy()
round1_y_step = round1_y_init.copy()

# Set up a minimums list, and find the first minimum
y_min.append(round1_y_init.min())
y_min

[-0.3743666666666667, -0.6113333333333333]

In [350]:
np.random.seed(234567)

# Run the BO
round1_bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = round1_x_step, Y = round1_y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [351]:
# Find the suggested next locations
round1_x_next = round1_bo_step.suggest_next_locations().astype(int)

# merge x and y values together and continue
round1_x_step=np.vstack((round1_x_step, round1_x_next))

round1_x_next

array([[5, 5, 5, 5, 5],
       [4, 4, 2, 5, 2],
       [5, 4, 3, 1, 0],
       [4, 3, 0, 5, 0],
       [4, 3, 0, 3, 2],
       [4, 5, 5, 5, 5],
       [5, 2, 1, 3, 0],
       [5, 4, 4, 2, 0],
       [5, 1, 1, 3, 4],
       [5, 5, 0, 4, 1],
       [4, 4, 0, 5, 0],
       [4, 3, 2, 5, 4],
       [4, 4, 5, 0, 2],
       [5, 2, 5, 5, 1],
       [5, 4, 5, 5, 5],
       [2, 4, 3, 4, 3],
       [4, 1, 3, 2, 4],
       [5, 3, 1, 1, 2],
       [5, 0, 1, 1, 2],
       [5, 4, 1, 4, 1],
       [5, 3, 3, 0, 1],
       [4, 3, 1, 3, 4],
       [0, 5, 3, 5, 5],
       [5, 0, 5, 2, 0],
       [4, 2, 2, 1, 5],
       [4, 4, 5, 4, 1],
       [0, 3, 4, 4, 5],
       [5, 0, 1, 4, 5],
       [3, 5, 5, 4, 5],
       [5, 3, 5, 0, 1],
       [1, 5, 5, 5, 2]])

In [352]:
## Create a dataframe for robot
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round2_totest = pd.DataFrame(round1_x_next, columns=column_names)

# Add a blank row 
round2_totest.loc[len(round2_totest)] = [0] * len(column_names)

# Repeat 3 times
round2_totest_robot = pd.concat([round2_totest] * 3, ignore_index=True)
round2_totest_robot.to_csv('round2_totest_robot.csv', index=False)
round2_totest_robot

Unnamed: 0,T20,T80,P188,DMSO,PG
0,5,5,5,5,5
1,4,4,2,5,2
2,5,4,3,1,0
3,4,3,0,5,0
4,4,3,0,3,2
...,...,...,...,...,...
91,5,0,1,4,5
92,3,5,5,4,5
93,5,3,5,0,1
94,1,5,5,5,2


# Round 2

## Plate Processing

In [353]:
# Read the file
round2_plate = pd.read_csv("round2.CSV",
                          sep=';',skiprows=5)

# Set the index of the file to "Content"
round2_plate = round2_plate.set_index("Content").drop("Well",axis=1)

wavelength = 435

round2_plate

Unnamed: 0_level_0,Blank corrected based on Raw Data (Abs Spectrum),Blank corrected based on Raw Data (Abs Spectrum).1,Blank corrected based on Raw Data (Abs Spectrum).2,Blank corrected based on Raw Data (Abs Spectrum).3,Blank corrected based on Raw Data (Abs Spectrum).4,Blank corrected based on Raw Data (Abs Spectrum).5,Blank corrected based on Raw Data (Abs Spectrum).6,Blank corrected based on Raw Data (Abs Spectrum).7,Blank corrected based on Raw Data (Abs Spectrum).8,Blank corrected based on Raw Data (Abs Spectrum).9,...,Blank corrected based on Raw Data (Abs Spectrum).148,Blank corrected based on Raw Data (Abs Spectrum).149,Blank corrected based on Raw Data (Abs Spectrum).150,Blank corrected based on Raw Data (Abs Spectrum).151,Blank corrected based on Raw Data (Abs Spectrum).152,Blank corrected based on Raw Data (Abs Spectrum).153,Blank corrected based on Raw Data (Abs Spectrum).154,Blank corrected based on Raw Data (Abs Spectrum).155,Blank corrected based on Raw Data (Abs Spectrum).156,Unnamed: 159
Content,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wavelength [nm],220.000,225.000,230,235,240,245,250,255,260,265,...,960.000,965.000,970.000,975.000,980.000,985.000,990.000,995.000,1000.000,
Sample X1,0.090,-0.102,-0.367,-0.148,-0.068,n.a.,n.a.,n.a.,-0.026,-0.186,...,0.008,0.007,0.006,0.007,0.006,0.008,0.006,0.007,0.008,
Sample X9,-0.037,-0.163,-0.208,0.188,-0.019,n.a.,n.a.,n.a.,-0.085,-0.054,...,0.007,0.007,0.006,0.006,0.005,0.008,0.005,0.007,0.007,
Sample X17,-0.095,-0.115,0.017,0.414,0.024,n.a.,n.a.,-0.008,-0.038,-0.073,...,0.010,0.008,0.008,0.010,0.008,0.010,0.008,0.010,0.010,
Sample X25,-0.095,-0.058,-0.090,0.096,-0.114,n.a.,n.a.,-0.020,-0.148,-0.090,...,0.008,0.007,0.007,0.007,0.006,0.008,0.006,0.008,0.008,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Blank B,,,,,,,,,,,...,,,,,,,,,,
Sample X70,0.057,-0.015,n.a.,0.310,-0.034,n.a.,n.a.,n.a.,n.a.,0.081,...,0.013,0.013,0.014,0.014,0.014,0.015,0.014,0.015,0.015,
Sample X78,0.271,0.359,n.a.,n.a.,0.155,n.a.,n.a.,n.a.,n.a.,0.002,...,0.006,0.005,0.004,0.005,0.004,0.006,0.004,0.005,0.005,
Sample X86,0.110,-0.036,0.110,0.156,0.137,n.a.,n.a.,n.a.,-0.038,-0.024,...,0.016,0.017,0.016,0.016,0.017,0.017,0.016,0.017,0.016,


In [354]:
# Process file data
round2_absorbance = new_process_plate(round2_plate, wavelength)

# Remove blank rows
round2_absorbance = np.delete(round2_absorbance, [95,94,93])
round2_absorbance

array([0.21, 0.258, 0.441, 0.541, 0.464, 0.231, 0.433, 0.44, 0.222, 0.213,
       0.25, 0.179, 0.224, 0.538, 0.493, 0.242, 0.226, 0.242, 0.353,
       0.456, 0.48, 0.259, 0.575, 0.237, 0.224, 0.287, 0.307, 0.351,
       0.578, 0.355, 0.436, 0.13, 0.132, 0.173, 0.214, 0.336, 0.197,
       0.296, 0.203, 0.242, 0.238, 0.277, 0.224, 0.302, 0.152, 0.448,
       0.271, 0.242, 0.264, 0.271, 0.777, 0.666, 0.437, 0.917, 0.461,
       0.234, 0.221, 0.18, 0.271, 0.239, 0.3, 0.214, 1.165, 0.851, 0.634,
       0.884, 0.521, 0.355, 0.822, 0.551, 0.192, 0.249, 0.123, 0.153,
       0.246, 0.194, 0.301, 0.221, 0.157, 0.235, 0.198, 0.404, 0.273,
       0.197, 0.392, 0.242, 0.173, 0.129, 0.158, 0.269, 0.323, 0.195,
       0.248], dtype=object)

In [355]:
# Arrange into dataframe
sample_numbers = range(1, 32)

round2_df = pd.DataFrame({
    'Sample Number':sample_numbers,
    'Rep 1 Absorbance':round2_absorbance[0:31],
    'Rep 2 Absorbance':round2_absorbance[31:62],
    'Rep 3 Absorbance':round2_absorbance[62:93],
})

round2_df['Mean Absorbance'] = round2_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].mean(axis=1)
round2_df['Std Absorbance'] = round2_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].std(axis=1)
round2_df['Round Number'] = 2

round2_df

Unnamed: 0,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,1,0.21,0.13,1.165,0.501667,0.575854,2
1,2,0.258,0.132,0.851,0.413667,0.383946,2
2,3,0.441,0.173,0.634,0.416,0.231515,2
3,4,0.541,0.214,0.884,0.546333,0.335032,2
4,5,0.464,0.336,0.521,0.440333,0.094744,2
5,6,0.231,0.197,0.355,0.261,0.083162,2
6,7,0.433,0.296,0.822,0.517,0.272875,2
7,8,0.44,0.203,0.551,0.398,0.177761,2
8,9,0.222,0.242,0.192,0.218667,0.025166,2
9,10,0.213,0.238,0.249,0.233333,0.018448,2


In [356]:
# Combine chosen datapoints and results into one dataframe

chosen_round2 = round2_totest.reset_index(drop=True)
chosen_round2 = chosen_round2.drop([31]).reset_index(drop=True) # Drop the blank input

round2_df = round2_df.reset_index(drop=True)

round2_combined_df = pd.concat([chosen_round2,round2_df], axis=1)
round2_combined_df.to_csv("round_2_results.csv", index=False)
round2_combined_df["Relative SD"]= (round2_combined_df["Std Absorbance"]/round2_combined_df["Mean Absorbance"])*100                                   
round2_combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,5,5,5,5,5,1,0.21,0.13,1.165,0.501667,0.575854,2,114.78826
1,4,4,2,5,2,2,0.258,0.132,0.851,0.413667,0.383946,2,92.815248
2,5,4,3,1,0,3,0.441,0.173,0.634,0.416,0.231515,2,55.652543
3,4,3,0,5,0,4,0.541,0.214,0.884,0.546333,0.335032,2,61.323705
4,4,3,0,3,2,5,0.464,0.336,0.521,0.440333,0.094744,2,21.516316
5,4,5,5,5,5,6,0.231,0.197,0.355,0.261,0.083162,2,31.863024
6,5,2,1,3,0,7,0.433,0.296,0.822,0.517,0.272875,2,52.780547
7,5,4,4,2,0,8,0.44,0.203,0.551,0.398,0.177761,2,44.663587
8,5,1,1,3,4,9,0.222,0.242,0.192,0.218667,0.025166,2,11.508894
9,5,5,0,4,1,10,0.213,0.238,0.249,0.233333,0.018448,2,7.906339


In [358]:
# Add results to master results sheet
round2_combined_df = round2_combined_df.drop("Sample Number", axis=1)

master_results = pd.concat([master_results,round2_combined_df],axis=0,ignore_index=True)
master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0,
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0,
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0,
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0,
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
153,0,3,4,4,5,0.307,0.18,0.158,0.215,0.08043,2,37.409346
154,5,0,1,4,5,0.351,0.271,0.269,0.297,0.046776,2,15.749516
155,3,5,5,4,5,0.578,0.239,0.323,0.38,0.176542,2,46.458363
156,5,3,5,0,1,0.355,0.3,0.195,0.283333,0.081292,2,28.691173


## Round 2 Bayesian Optimisation

In [359]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [360]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
round2_y_init=np.array(-master_results['Mean Absorbance'])

# Reshape into a 1x31 matrix
round2_y_init=round2_y_init.reshape(len(round2_y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round2_x_init = np.array(master_results[columns_to_extract])

round2_x_step = round2_x_init.copy()
round2_y_step = round2_y_init.copy()

# Set up a minimums list, and find the minimum
y_min.append(round2_y_init.min())
y_min

[-0.3743666666666667, -0.6113333333333333, -0.628]

In [361]:
np.random.seed(234567)

# Run the BO
round2_bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = round2_x_step, Y = round2_y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [362]:
# Find the suggested next locations
round2_x_next = round2_bo_step.suggest_next_locations().astype(int)

# merge x and y values together and continue
round2_x_step=np.vstack((round2_x_step, round2_x_next))

round2_x_next

array([[5, 4, 3, 5, 4],
       [5, 4, 0, 5, 0],
       [5, 3, 1, 5, 4],
       [4, 4, 3, 1, 3],
       [5, 3, 3, 4, 3],
       [5, 4, 5, 1, 2],
       [5, 0, 1, 4, 2],
       [4, 4, 5, 3, 0],
       [0, 5, 4, 5, 5],
       [3, 4, 5, 0, 3],
       [5, 5, 2, 5, 3],
       [5, 3, 5, 4, 2],
       [5, 0, 5, 5, 2],
       [5, 2, 2, 0, 4],
       [4, 3, 1, 5, 2],
       [5, 2, 4, 5, 5],
       [5, 1, 0, 5, 2],
       [5, 1, 2, 1, 1],
       [5, 5, 1, 3, 5],
       [4, 3, 1, 0, 3],
       [3, 5, 5, 5, 4],
       [2, 4, 3, 5, 1],
       [5, 2, 1, 5, 0],
       [4, 1, 3, 3, 5],
       [4, 2, 2, 3, 1],
       [3, 0, 0, 5, 4],
       [5, 0, 0, 3, 0],
       [5, 5, 0, 5, 1],
       [5, 4, 3, 5, 2],
       [4, 4, 1, 1, 1],
       [5, 5, 5, 3, 2]])

In [363]:
## Create a dataframe for robot to read
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round3_totest = pd.DataFrame(round2_x_next, columns=column_names)

# Add a blank row
round3_totest.loc[len(round3_totest)] = [0] * len(column_names)

# Repeat 3 times
round3_totest_robot = pd.concat([round3_totest] * 3, ignore_index=True)
round3_totest_robot.to_csv('round3_totest_robot.csv', index=False)
round3_totest_robot

Unnamed: 0,T20,T80,P188,DMSO,PG
0,5,4,3,5,4
1,5,4,0,5,0
2,5,3,1,5,4
3,4,4,3,1,3
4,5,3,3,4,3
...,...,...,...,...,...
91,5,5,0,5,1
92,5,4,3,5,2
93,4,4,1,1,1
94,5,5,5,3,2


# Round 3

## Plate processing

In [364]:
# Read the file
round3_plate = pd.read_csv("round3.CSV",
                          sep=';',skiprows=5)

# Set the index of the file to "Content"
round3_plate = round3_plate.set_index("Content").drop("Well",axis=1)

wavelength = 435

round3_plate

Unnamed: 0_level_0,Blank corrected based on Raw Data (Abs Spectrum),Blank corrected based on Raw Data (Abs Spectrum).1,Blank corrected based on Raw Data (Abs Spectrum).2,Blank corrected based on Raw Data (Abs Spectrum).3,Blank corrected based on Raw Data (Abs Spectrum).4,Blank corrected based on Raw Data (Abs Spectrum).5,Blank corrected based on Raw Data (Abs Spectrum).6,Blank corrected based on Raw Data (Abs Spectrum).7,Blank corrected based on Raw Data (Abs Spectrum).8,Blank corrected based on Raw Data (Abs Spectrum).9,...,Blank corrected based on Raw Data (Abs Spectrum).148,Blank corrected based on Raw Data (Abs Spectrum).149,Blank corrected based on Raw Data (Abs Spectrum).150,Blank corrected based on Raw Data (Abs Spectrum).151,Blank corrected based on Raw Data (Abs Spectrum).152,Blank corrected based on Raw Data (Abs Spectrum).153,Blank corrected based on Raw Data (Abs Spectrum).154,Blank corrected based on Raw Data (Abs Spectrum).155,Blank corrected based on Raw Data (Abs Spectrum).156,Unnamed: 159
Content,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wavelength [nm],220,225,230,235,240,245,250,255,260,265,...,960.000,965.000,970.000,975.000,980.000,985.000,990.0000,995.000,1000.000000,
Sample X1,0.078,0.135,0.121,0.052,n.a.,n.a.,n.a.,n.a.,0.151,0.055,...,0.079,0.078,0.078,0.080,0.082,0.094,0.0620,0.097,0.092000,
Sample X9,0.364,0.107,0.115,-0.088,0.075,n.a.,n.a.,n.a.,0.101,n.a.,...,-0.005,-0.004,-0.005,-0.004,-0.005,-0.005,-0.0020,-0.006,-0.005000,
Sample X17,-0.350,0.503,0.016,-0.037,n.a.,n.a.,n.a.,n.a.,0.156,0.034,...,-0.002,-0.003,-0.004,-0.002,-0.002,-0.002,-0.0009,-0.004,-0.000667,
Sample X25,-0.319,0.371,0.374,n.a.,0.104,n.a.,n.a.,n.a.,n.a.,0.042,...,0.008,0.007,0.006,0.008,0.007,0.009,0.0080,0.008,0.009000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Blank B,,,,,,,,,,,...,,,,,,,,,,
Sample X70,0.289,0.661,0.265,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,-0.005,-0.004,-0.004,-0.005,-0.005,-0.005,-0.0030,-0.006,-0.004000,
Sample X78,-0.052,0.164,0.181,-0.046,n.a.,n.a.,n.a.,n.a.,n.a.,0.032,...,-0.006,-0.005,-0.005,-0.006,-0.006,-0.006,-0.0030,-0.008,-0.006000,
Sample X86,0.040,0.144,0.020,-0.167,-0.045,n.a.,n.a.,n.a.,0.124,0.056,...,0.004,0.006,0.005,0.005,0.006,0.006,0.0060,0.006,0.006000,


In [365]:
# Process file data
round3_absorbance = new_process_plate(round3_plate, wavelength)

# Remove blank rows
round3_absorbance = np.delete(round3_absorbance, [95,94,93])
round3_absorbance

array([1.051, 0.608, 0.62, 0.618, 0.794, 1.068, 0.707, 0.488, 0.696,
       0.458, 0.541, 0.812, 0.803, 0.634, 0.447, 0.406, 0.695, 0.558,
       0.671, 0.718, 1.287, 1.178, 0.524, 0.654, 0.67, 0.475, 0.565,
       0.745, 0.32, 0.606, 0.299, 0.834, 0.554, 0.877, 0.695, 0.642,
       0.882, 0.845, 0.51, 0.454, 0.408, 0.32, 0.461, 0.637, 0.526, 0.391,
       0.528, 0.388, 0.359, 0.732, 0.531, 0.332, 0.452, 0.283, 0.382,
       0.512, 0.397, 0.385, 0.461, 0.191, 0.36, 0.315, 0.614, 0.387,
       0.334, 0.444, 0.408, 0.582, 0.628, 0.475, 0.456, 0.231, 0.474,
       0.398, 0.423, 0.382, 0.271, 0.28, 0.353, 0.421, 0.587, 0.437,
       0.498, 0.449, 0.352, 0.325, 1.491, 0.508, 0.866, 0.666, 0.292,
       0.436, 0.459], dtype=object)

In [366]:
# Arrange into a dataframe
sample_numbers = range(1, 32)

round3_df = pd.DataFrame({
    'Sample Number':sample_numbers,
    'Rep 1 Absorbance':round3_absorbance[0:31],
    'Rep 2 Absorbance':round3_absorbance[31:62],
    'Rep 3 Absorbance':round3_absorbance[62:93],
})

round3_df['Mean Absorbance'] = round3_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].mean(axis=1)
round3_df['Std Absorbance'] = round3_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].std(axis=1)
round3_df['Round Number'] = 3

round3_df

Unnamed: 0,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,1,1.051,0.834,0.614,0.833,0.218502,3
1,2,0.608,0.554,0.387,0.516333,0.115214,3
2,3,0.62,0.877,0.334,0.610333,0.271629,3
3,4,0.618,0.695,0.444,0.585667,0.128586,3
4,5,0.794,0.642,0.408,0.614667,0.194446,3
5,6,1.068,0.882,0.582,0.844,0.245218,3
6,7,0.707,0.845,0.628,0.726667,0.109829,3
7,8,0.488,0.51,0.475,0.491,0.017692,3
8,9,0.696,0.454,0.456,0.535333,0.139145,3
9,10,0.458,0.408,0.231,0.365667,0.119274,3


In [367]:
# Combine chosen datapoints and results into one dataframe

chosen_round3 = round3_totest.reset_index(drop=True)
chosen_round3 = chosen_round3.drop([31]).reset_index(drop=True) # Drop the blank input

round3_df = round3_df.reset_index(drop=True)

round3_combined_df = pd.concat([chosen_round3,round3_df], axis=1)
round3_combined_df.to_csv("round_3_results.csv", index=False)
round3_combined_df["Relative SD"]= (round3_combined_df["Std Absorbance"]/round3_combined_df["Mean Absorbance"])*100                                   
round3_combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,5,4,3,5,4,1,1.051,0.834,0.614,0.833,0.218502,3,26.230698
1,5,4,0,5,0,2,0.608,0.554,0.387,0.516333,0.115214,3,22.313937
2,5,3,1,5,4,3,0.62,0.877,0.334,0.610333,0.271629,3,44.505031
3,4,4,3,1,3,4,0.618,0.695,0.444,0.585667,0.128586,3,21.955475
4,5,3,3,4,3,5,0.794,0.642,0.408,0.614667,0.194446,3,31.634418
5,5,4,5,1,2,6,1.068,0.882,0.582,0.844,0.245218,3,29.054297
6,5,0,1,4,2,7,0.707,0.845,0.628,0.726667,0.109829,3,15.114035
7,4,4,5,3,0,8,0.488,0.51,0.475,0.491,0.017692,3,3.603219
8,0,5,4,5,5,9,0.696,0.454,0.456,0.535333,0.139145,3,25.992218
9,3,4,5,0,3,10,0.458,0.408,0.231,0.365667,0.119274,3,32.618285


In [369]:
# Add results to master results sheet
round3_combined_df = round3_combined_df.drop("Sample Number", axis=1)

master_results = pd.concat([master_results,round3_combined_df],axis=0,ignore_index=True)
master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0,
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0,
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0,
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0,
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
184,5,0,0,3,0,0.565,0.385,0.866,0.605333,0.243023,3,40.147024
185,5,5,0,5,1,0.745,0.461,0.666,0.624,0.146584,3,23.491097
186,5,4,3,5,2,0.32,0.191,0.292,0.267667,0.067855,3,25.350649
187,4,4,1,1,1,0.606,0.36,0.436,0.467333,0.125958,3,26.952425


## Bayesian optimisation

In [370]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [371]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
round3_y_init=np.array(-master_results['Mean Absorbance'])

# Reshape into a 1x31 matrix
round3_y_init=round3_y_init.reshape(len(round3_y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round3_x_init = np.array(master_results[columns_to_extract])

round3_x_step = round3_x_init.copy()
round3_y_step = round3_y_init.copy()

# Set up a minimums list, and find the first min
y_min.append(round3_y_init.min())
y_min

[-0.3743666666666667, -0.6113333333333333, -0.628, -0.891]

In [372]:
np.random.seed(234567)

# Run the BO
round3_bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = round3_x_step, Y = round3_y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [373]:
# Find the suggested next locations
round3_x_next = round3_bo_step.suggest_next_locations().astype(int)

# merge x and y values together and continue
round3_x_step=np.vstack((round3_x_step, round3_x_next))
round3_x_next

array([[5, 4, 4, 4, 1],
       [5, 1, 0, 5, 0],
       [4, 4, 3, 3, 4],
       [5, 1, 2, 4, 1],
       [4, 4, 5, 5, 1],
       [5, 0, 0, 5, 1],
       [4, 1, 3, 5, 0],
       [4, 3, 1, 2, 0],
       [5, 1, 0, 2, 1],
       [5, 5, 3, 5, 5],
       [2, 4, 3, 5, 3],
       [5, 3, 2, 3, 4],
       [5, 5, 0, 1, 0],
       [3, 4, 5, 1, 1],
       [4, 3, 1, 0, 2],
       [5, 3, 4, 2, 1],
       [4, 2, 2, 4, 2],
       [5, 0, 2, 0, 5],
       [5, 5, 2, 2, 5],
       [5, 0, 0, 5, 5],
       [5, 0, 2, 4, 2],
       [5, 1, 1, 2, 0],
       [5, 4, 0, 2, 2],
       [5, 1, 2, 5, 4],
       [0, 3, 4, 5, 0],
       [5, 2, 4, 5, 2],
       [4, 3, 3, 4, 4],
       [4, 4, 1, 4, 4],
       [5, 2, 5, 3, 2],
       [4, 1, 2, 3, 4],
       [3, 0, 3, 3, 1]])

In [374]:
## Create a dataframe for robot
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round4_totest = pd.DataFrame(round3_x_next, columns=column_names)

# Add a blank row
round4_totest.loc[len(round4_totest)] = [0] * len(column_names)

# Repeat 3 times
round4_totest_robot = pd.concat([round4_totest] * 3, ignore_index=True)
round4_totest_robot.to_csv('round4_totest_robot.csv', index=False)
round4_totest_robot

Unnamed: 0,T20,T80,P188,DMSO,PG
0,5,4,4,4,1
1,5,1,0,5,0
2,4,4,3,3,4
3,5,1,2,4,1
4,4,4,5,5,1
...,...,...,...,...,...
91,4,4,1,4,4
92,5,2,5,3,2
93,4,1,2,3,4
94,3,0,3,3,1


# Round 4

## Plate Processing

In [375]:
# Read the file
round4_plate = pd.read_csv("round4.CSV",
                          sep=';',skiprows=5)

# Set the index of the file to "Content"
round4_plate = round4_plate.set_index("Content").drop("Well",axis=1)

wavelength = 435

round4_plate

Unnamed: 0_level_0,Blank corrected based on Raw Data (Abs Spectrum),Blank corrected based on Raw Data (Abs Spectrum).1,Blank corrected based on Raw Data (Abs Spectrum).2,Blank corrected based on Raw Data (Abs Spectrum).3,Blank corrected based on Raw Data (Abs Spectrum).4,Blank corrected based on Raw Data (Abs Spectrum).5,Blank corrected based on Raw Data (Abs Spectrum).6,Blank corrected based on Raw Data (Abs Spectrum).7,Blank corrected based on Raw Data (Abs Spectrum).8,Blank corrected based on Raw Data (Abs Spectrum).9,...,Blank corrected based on Raw Data (Abs Spectrum).148,Blank corrected based on Raw Data (Abs Spectrum).149,Blank corrected based on Raw Data (Abs Spectrum).150,Blank corrected based on Raw Data (Abs Spectrum).151,Blank corrected based on Raw Data (Abs Spectrum).152,Blank corrected based on Raw Data (Abs Spectrum).153,Blank corrected based on Raw Data (Abs Spectrum).154,Blank corrected based on Raw Data (Abs Spectrum).155,Blank corrected based on Raw Data (Abs Spectrum).156,Unnamed: 159
Content,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wavelength [nm],220,225,230,235,240,245,250,255,260,265,...,960.000,965.000,970.000,975.000,980.000,985.000,990.000,995.000,1000.000,
Sample X1,-0.762,-0.627,-0.489,3.271,3.329,n.a.,n.a.,3.482,3.467,n.a.,...,0.033,0.033,0.033,0.034,0.034,0.041,0.027,0.042,0.040,
Sample X9,0.166,-0.173,-0.025,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.166,0.155,0.161,0.154,0.168,0.205,0.100,0.222,0.194,
Sample X17,-0.744,-0.448,-0.370,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,3.476,...,0.062,0.060,0.061,0.062,0.064,0.077,0.045,0.080,0.074,
Sample X25,-0.135,-0.410,-0.028,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.164,0.151,0.155,0.150,0.163,0.207,0.088,0.226,0.195,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Blank B,,,,,,,,,,,...,,,,,,,,,,
Sample X70,-0.091,0.333,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.128,0.124,0.127,0.125,0.130,0.150,0.097,0.157,0.144,
Sample X78,0.267,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.012,0.013,0.014,0.014,0.014,0.016,0.012,0.017,0.016,
Sample X86,0.380,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.005,0.006,0.006,0.006,0.005,0.008,0.004,0.008,0.007,


In [376]:
# Process file data
round4_absorbance = new_process_plate(round4_plate, wavelength)

# Remove blank rows
round4_absorbance = np.delete(round4_absorbance, [95,94,93])
round4_absorbance

array([0.165, 0.617, 0.469, 0.296, 0.488, 0.528, 0.416, 0.266, 0.609,
       1.449, 1.298, 0.855, 0.678, 0.953, 0.819, 0.734, 0.125, 0.292,
       0.233, 0.275, 0.424, 0.187, 0.207, 0.2, 0.429, 0.896, 0.519, 0.78,
       0.609, 0.418, 0.261, 0.229, 0.408, 0.425, 0.418, 0.345, 0.561,
       0.345, 0.432, 0.264, 0.425, 0.258, 0.32, 0.35, 0.335, 0.406, 0.386,
       0.313, 0.4, 0.664, 0.42, 0.537, 0.429, 0.733, 0.236, 0.31, 0.338,
       0.223, 0.315, 0.447, 0.393, 0.384, 0.346, 0.57, 0.557, 0.523, 0.57,
       0.374, 0.699, 0.43, 0.327, 0.332, 0.307, 0.338, 0.39, 0.274, 0.464,
       0.258, 0.64, 0.414, 0.643, 0.551, 0.708, 0.438, 0.655, 0.402,
       0.566, 0.784, 0.44, 0.463, 0.5, 0.501, 0.967], dtype=object)

In [377]:
# Arrange into dataframe
sample_numbers = range(1, 32)

round4_df = pd.DataFrame({
    'Sample Number':sample_numbers,
    'Rep 1 Absorbance':round4_absorbance[0:31],
    'Rep 2 Absorbance':round4_absorbance[31:62],
    'Rep 3 Absorbance':round4_absorbance[62:93],
})

round4_df['Mean Absorbance'] = round4_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].mean(axis=1)
round4_df['Std Absorbance'] = round4_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].std(axis=1)
round4_df['Round Number'] = 4

round4_df

Unnamed: 0,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,1,0.165,0.229,0.346,0.246667,0.091784,4
1,2,0.617,0.408,0.57,0.531667,0.109646,4
2,3,0.469,0.425,0.557,0.483667,0.067211,4
3,4,0.296,0.418,0.523,0.412333,0.113606,4
4,5,0.488,0.345,0.57,0.467667,0.11387,4
5,6,0.528,0.561,0.374,0.487667,0.099811,4
6,7,0.416,0.345,0.699,0.486667,0.187281,4
7,8,0.266,0.432,0.43,0.376,0.095268,4
8,9,0.609,0.264,0.327,0.4,0.18372,4
9,10,1.449,0.425,0.332,0.735333,0.6198,4


In [378]:
# Combine chosen datapoints and results into one dataframe

chosen_round4 = round4_totest.reset_index(drop=True)
chosen_round4 = chosen_round4.drop([31]).reset_index(drop=True) # Drop the blank input

round4_df = round4_df.reset_index(drop=True)

round4_combined_df = pd.concat([chosen_round4,round4_df], axis=1)
round4_combined_df.to_csv("round_4_results.csv", index=False)
round4_combined_df["Relative SD"]= (round4_combined_df["Std Absorbance"]/round4_combined_df["Mean Absorbance"])*100                                   
round4_combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,5,4,4,4,1,1,0.165,0.229,0.346,0.246667,0.091784,4,37.209797
1,5,1,0,5,0,2,0.617,0.408,0.57,0.531667,0.109646,4,20.623148
2,4,4,3,3,4,3,0.469,0.425,0.557,0.483667,0.067211,4,13.896163
3,5,1,2,4,1,4,0.296,0.418,0.523,0.412333,0.113606,4,27.551991
4,4,4,5,5,1,5,0.488,0.345,0.57,0.467667,0.11387,4,24.348498
5,5,0,0,5,1,6,0.528,0.561,0.374,0.487667,0.099811,4,20.467154
6,4,1,3,5,0,7,0.416,0.345,0.699,0.486667,0.187281,4,38.482485
7,4,3,1,2,0,8,0.266,0.432,0.43,0.376,0.095268,4,25.337245
8,5,1,0,2,1,9,0.609,0.264,0.327,0.4,0.18372,4,45.929974
9,5,5,3,5,5,10,1.449,0.425,0.332,0.735333,0.6198,4,84.288337


In [380]:
 # Add results to master results sheet
round4_combined_df = round4_combined_df.drop("Sample Number", axis=1)

master_results = pd.concat([master_results,round4_combined_df],axis=0,ignore_index=True)
master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0,
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0,
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0,
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0,
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
215,4,3,3,4,4,0.519,0.223,0.44,0.394,0.153268,4,38.900442
216,4,4,1,4,4,0.78,0.315,0.463,0.519333,0.237563,4,45.743901
217,5,2,5,3,2,0.609,0.447,0.5,0.518667,0.082597,4,15.924952
218,4,1,2,3,4,0.418,0.393,0.501,0.437333,0.056536,4,12.927467


## Bayesian optimisation

In [381]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [382]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
round4_y_init=np.array(-master_results['Mean Absorbance'])

# Reshape into a 1x31
round4_y_init=round4_y_init.reshape(len(round4_y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round4_x_init = np.array(master_results[columns_to_extract])

round4_x_step = round4_x_init.copy()
round4_y_step = round4_y_init.copy()

# Set up a minimums list, and find minimum
y_min.append(round4_y_init.min())
y_min

[-0.3743666666666667, -0.6113333333333333, -0.628, -0.891, -0.891]

In [383]:
np.random.seed(234567)

# Run the BO
round4_bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = round4_x_step, Y = round4_y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [384]:
# Find the suggested next locations
round4_x_next = round4_bo_step.suggest_next_locations().astype(int)

# merge x and y values together and continue
round4_x_step=np.vstack((round4_x_step, round4_x_next)) 
round4_x_next

array([[5, 5, 4, 5, 5],
       [5, 4, 5, 4, 0],
       [5, 0, 0, 5, 0],
       [2, 4, 3, 5, 5],
       [5, 2, 3, 2, 0],
       [4, 3, 1, 2, 1],
       [4, 3, 1, 3, 5],
       [4, 2, 2, 5, 3],
       [5, 0, 2, 3, 4],
       [3, 4, 5, 1, 5],
       [5, 5, 4, 5, 3],
       [5, 3, 3, 2, 2],
       [5, 1, 1, 5, 4],
       [4, 4, 1, 4, 5],
       [5, 1, 2, 5, 2],
       [5, 5, 1, 0, 4],
       [5, 3, 1, 4, 5],
       [5, 0, 1, 2, 1],
       [4, 3, 3, 4, 5],
       [4, 1, 2, 4, 4],
       [4, 5, 0, 1, 2],
       [5, 5, 3, 2, 4],
       [0, 3, 4, 5, 2],
       [5, 3, 5, 0, 5],
       [5, 1, 3, 1, 5],
       [5, 0, 1, 5, 4],
       [3, 1, 1, 5, 2],
       [4, 5, 0, 4, 5],
       [5, 4, 1, 1, 3],
       [4, 2, 3, 5, 2],
       [4, 3, 2, 4, 4]])

In [385]:
## Create a dataframe for robot
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round5_totest = pd.DataFrame(round4_x_next, columns=column_names)

# Add a blank row
round5_totest.loc[len(round5_totest)] = [0] * len(column_names)

# Repeat 3 times
round5_totest_robot = pd.concat([round5_totest] * 3, ignore_index=True)
round5_totest_robot.to_csv('round5_totest_robot.csv', index=False)
round5_totest_robot

Unnamed: 0,T20,T80,P188,DMSO,PG
0,5,5,4,5,5
1,5,4,5,4,0
2,5,0,0,5,0
3,2,4,3,5,5
4,5,2,3,2,0
...,...,...,...,...,...
91,4,5,0,4,5
92,5,4,1,1,3
93,4,2,3,5,2
94,4,3,2,4,4


# Round 5

## Plate Processing

In [386]:
# Read the file
round5_plate = pd.read_csv("round5.CSV",
                          sep=';',skiprows=5)

# Set the index of the file to "Content"
round5_plate = round5_plate.set_index("Content").drop("Well",axis=1)

wavelength = 435

round5_plate

Unnamed: 0_level_0,Blank corrected based on Raw Data (Abs Spectrum),Blank corrected based on Raw Data (Abs Spectrum).1,Blank corrected based on Raw Data (Abs Spectrum).2,Blank corrected based on Raw Data (Abs Spectrum).3,Blank corrected based on Raw Data (Abs Spectrum).4,Blank corrected based on Raw Data (Abs Spectrum).5,Blank corrected based on Raw Data (Abs Spectrum).6,Blank corrected based on Raw Data (Abs Spectrum).7,Blank corrected based on Raw Data (Abs Spectrum).8,Blank corrected based on Raw Data (Abs Spectrum).9,...,Blank corrected based on Raw Data (Abs Spectrum).148,Blank corrected based on Raw Data (Abs Spectrum).149,Blank corrected based on Raw Data (Abs Spectrum).150,Blank corrected based on Raw Data (Abs Spectrum).151,Blank corrected based on Raw Data (Abs Spectrum).152,Blank corrected based on Raw Data (Abs Spectrum).153,Blank corrected based on Raw Data (Abs Spectrum).154,Blank corrected based on Raw Data (Abs Spectrum).155,Blank corrected based on Raw Data (Abs Spectrum).156,Unnamed: 159
Content,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wavelength [nm],220,225,230,235,240,245,250,255,260,265,...,960.000,965.000,970.000,975.000,980.000,985.000,990.000,995.000,1000.000,
Sample X1,-0.847,2.724,3.281,3.341,3.348,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.008,0.006,0.006,0.007,0.007,0.009,0.006,0.009,0.009,
Sample X9,-0.908,2.746,3.427,3.429,n.a.,n.a.,n.a.,n.a.,n.a.,3.332,...,0.002,0.002,0.002,0.003,0.002,0.004,0.003,0.004,0.004,
Sample X17,-0.530,2.839,3.305,n.a.,3.432,n.a.,n.a.,n.a.,n.a.,3.487,...,0.091,0.086,0.088,0.088,0.091,0.107,0.066,0.113,0.102,
Sample X25,-1.033,2.459,3.262,3.427,3.484,n.a.,n.a.,n.a.,n.a.,3.464,...,0.116,0.107,0.110,0.107,0.117,0.145,0.067,0.157,0.138,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Blank B,,,,,,,,,,,...,,,,,,,,,,
Sample X70,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.006,0.005,0.006,0.005,0.006,0.008,0.006,0.007,0.008,
Sample X78,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.656,0.638,0.650,0.633,0.658,0.710,0.560,0.737,0.698,
Sample X86,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,n.a.,...,0.011,0.010,0.011,0.011,0.010,0.010,0.011,0.012,0.010,


In [387]:
# Process file data
round5_absorbance = new_process_plate(round5_plate, wavelength)

# Remove blank rows
round5_absorbance = np.delete(round5_absorbance, [95,94,93])
round5_absorbance

array([0.38, 0.456, 0.249, 0.328, 0.219, 0.286, 0.42, 0.278, 0.085, 0.11,
       0.2, 0.298, 0.165, 0.222, 0.079, 0.082, 0.372, 0.547, 0.35, 0.153,
       0.322, 0.554, 0.157, 0.204, 0.312, 0.291, 0.225, 0.249, 0.128,
       0.297, 0.113, 0.325, 0.587, 0.413, 0.153, 0.242, 0.176, 0.186,
       0.12, 0.238, 0.691, 0.592, 0.3, 0.199, 1.023, 0.209, 0.226, 0.197,
       0.217, 0.293, 0.104, 0.113, 0.122, 0.143, 0.125, 0.33, 0.242,
       0.116, 0.12, 0.145, 0.247, 0.225, 0.184, 0.12, 0.31, 0.122, 0.238,
       0.243, 0.286, 0.235, 0.203, 1.828, 1.016, 1.13, 1.437, 0.192, 0.33,
       0.928, 0.215, 0.24, 0.227, 0.082, 0.177, 0.144, 0.076, 0.117,
       0.162, 0.207, 0.163, 0.181, 0.12, 0.188, 0.157], dtype=object)

In [388]:
# Arrange into dataframe
sample_numbers = range(1, 32)

round5_df = pd.DataFrame({
    'Sample Number':sample_numbers,
    'Rep 1 Absorbance':round5_absorbance[0:31],
    'Rep 2 Absorbance':round5_absorbance[31:62],
    'Rep 3 Absorbance':round5_absorbance[62:93],
})

round5_df['Mean Absorbance'] = round5_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].mean(axis=1)
round5_df['Std Absorbance'] = round5_df[['Rep 1 Absorbance','Rep 2 Absorbance','Rep 3 Absorbance']].std(axis=1)
round5_df['Round Number'] = 5

round5_df

Unnamed: 0,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number
0,1,0.38,0.325,0.184,0.296333,0.101096,5
1,2,0.456,0.587,0.12,0.387667,0.240882,5
2,3,0.249,0.413,0.31,0.324,0.082891,5
3,4,0.328,0.153,0.122,0.201,0.111072,5
4,5,0.219,0.242,0.238,0.233,0.012288,5
5,6,0.286,0.176,0.243,0.235,0.055435,5
6,7,0.42,0.186,0.286,0.297333,0.117411,5
7,8,0.278,0.12,0.235,0.211,0.081688,5
8,9,0.085,0.238,0.203,0.175333,0.080164,5
9,10,0.11,0.691,1.828,0.876333,0.873866,5


In [389]:
# Combine chosen datapoints and results into one dataframe

chosen_round5 = round5_totest.reset_index(drop=True)
chosen_round5 = chosen_round5.drop([31]).reset_index(drop=True)

round5_df = round5_df.reset_index(drop=True)

round5_combined_df = pd.concat([chosen_round5,round5_df], axis=1)
round5_combined_df.to_csv("round_5_results.csv", index=False)
round5_combined_df["Relative SD"]= (round5_combined_df["Std Absorbance"]/round5_combined_df["Mean Absorbance"])*100                                   
round5_combined_df

Unnamed: 0,T20,T80,P188,DMSO,PG,Sample Number,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,5,5,4,5,5,1,0.38,0.325,0.184,0.296333,0.101096,5,34.115522
1,5,4,5,4,0,2,0.456,0.587,0.12,0.387667,0.240882,5,62.136476
2,5,0,0,5,0,3,0.249,0.413,0.31,0.324,0.082891,5,25.583795
3,2,4,3,5,5,4,0.328,0.153,0.122,0.201,0.111072,5,55.259726
4,5,2,3,2,0,5,0.219,0.242,0.238,0.233,0.012288,5,5.273908
5,4,3,1,2,1,6,0.286,0.176,0.243,0.235,0.055435,5,23.589211
6,4,3,1,3,5,7,0.42,0.186,0.286,0.297333,0.117411,5,39.487991
7,4,2,2,5,3,8,0.278,0.12,0.235,0.211,0.081688,5,38.714897
8,5,0,2,3,4,9,0.085,0.238,0.203,0.175333,0.080164,5,45.721149
9,3,4,5,1,5,10,0.11,0.691,1.828,0.876333,0.873866,5,99.718484


In [391]:
# Add results to master results sheet
round5_combined_df = round5_combined_df.drop("Sample Number", axis=1)

master_results = pd.concat([master_results,round5_combined_df],axis=0,ignore_index=True)
master_results.to_csv('master_results.csv', index=False)
master_results

Unnamed: 0,T20,T80,P188,DMSO,PG,Rep 1 Absorbance,Rep 2 Absorbance,Rep 3 Absorbance,Mean Absorbance,Std Absorbance,Round Number,Relative SD
0,4,3,1,2,2,0.2357,0.1137,0.2577,0.202367,0.077571,0,
1,5,2,3,4,2,0.1957,0.3127,0.2997,0.269367,0.064127,0,
2,1,4,2,3,1,0.1407,0.2477,0.0847,0.1577,0.082819,0,
3,2,3,4,2,4,0.1297,0.3887,0.1207,0.213033,0.152198,0,
4,1,2,3,3,2,0.1327,0.1867,0.0767,0.132033,0.055003,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
246,3,1,1,5,2,0.225,0.116,0.163,0.168,0.054672,5,32.542707
247,4,5,0,4,5,0.249,0.12,0.181,0.183333,0.064532,5,35.19908
248,5,4,1,1,3,0.128,0.145,0.12,0.131,0.012767,5,9.745912
249,4,2,3,5,2,0.297,0.247,0.188,0.244,0.054562,5,22.361431


## Bayesian Optimisation

In [392]:
# Define the domain
domain =[{'name': 'T80', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'T20', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'P188', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'DMSO', 'type': 'discrete', 'domain': (0,1,2,3,4,5)},
        {'name': 'PG', 'type': 'discrete', 'domain': (0,1,2,3,4,5)}]

In [393]:
# Initial x and y data, make wavelength data negative as we are finding the minimum
round5_y_init=np.array(-master_results['Mean Absorbance'])

# Reshape into a 1x31
round5_y_init=round5_y_init.reshape(len(round5_y_init),1)

# Extract excipient concs from dataframe
columns_to_extract = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round5_x_init = np.array(master_results[columns_to_extract])

round5_x_step = round5_x_init.copy()
round5_y_step = round5_y_init.copy()

# Set up a minimums list, and find the minumum
y_min.append(round5_y_init.min())
y_min

[-0.3743666666666667, -0.6113333333333333, -0.628, -0.891, -0.891, -0.891]

In [394]:
np.random.seed(234567)

# Run the BO
round5_bo_step=GPyOpt.methods.BayesianOptimization(f = None, domain=domain, X = round5_x_step, Y = round5_y_step, acquisition_type = 'EI', 
                                            evaluator_type = 'thompson_sampling', batch_size = 31, de_duplication=True)

In [395]:
# Find the suggested next locations
round5_x_next = round5_bo_step.suggest_next_locations().astype(int)

# merge x and y values together and continue
round5_x_step=np.vstack((round5_x_step, round5_x_next)) 
round5_x_next

array([[5, 5, 0, 3, 4],
       [4, 3, 1, 4, 0],
       [5, 5, 1, 5, 5],
       [4, 3, 1, 5, 3],
       [4, 4, 4, 1, 5],
       [5, 3, 5, 5, 1],
       [5, 1, 3, 5, 2],
       [3, 4, 5, 2, 3],
       [5, 2, 4, 0, 1],
       [4, 5, 0, 3, 3],
       [5, 3, 0, 5, 1],
       [5, 0, 1, 5, 2],
       [5, 4, 4, 5, 2],
       [3, 0, 3, 4, 0],
       [4, 1, 2, 5, 3],
       [5, 0, 3, 1, 2],
       [0, 3, 4, 5, 3],
       [5, 1, 1, 2, 4],
       [5, 5, 4, 3, 0],
       [5, 3, 4, 0, 4],
       [4, 1, 4, 1, 0],
       [5, 3, 2, 3, 1],
       [5, 1, 2, 3, 1],
       [4, 0, 3, 5, 0],
       [4, 3, 2, 3, 5],
       [5, 1, 3, 3, 0],
       [4, 4, 2, 1, 0],
       [5, 4, 2, 0, 3],
       [3, 3, 3, 5, 1],
       [4, 2, 5, 5, 1],
       [3, 1, 1, 5, 5]])

In [396]:
## Create a dataframe for robot
column_names = ['T20', 'T80', 'P188', 'DMSO', 'PG']
round6_totest = pd.DataFrame(round5_x_next, columns=column_names)

# Add a blank row
round6_totest.loc[len(round6_totest)] = [0] * len(column_names)

# Repeat 3 times
round6_totest_robot = pd.concat([round6_totest] * 3, ignore_index=True)
round6_totest_robot.to_csv('round6_totest_robot.csv', index=False)
round6_totest_robot

Unnamed: 0,T20,T80,P188,DMSO,PG
0,5,5,0,3,4
1,4,3,1,4,0
2,5,5,1,5,5
3,4,3,1,5,3
4,4,4,4,1,5
...,...,...,...,...,...
91,5,4,2,0,3
92,3,3,3,5,1
93,4,2,5,5,1
94,3,1,1,5,5
