# notebooks.smart_microfluidics_models

## Setup

In [1]:
!git clone https://github.com/leonardoLavagna/smart_microfluidics

Cloning into 'smart_microfluidics'...
remote: Enumerating objects: 353, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (44/44), done.[K
remote: Total 353 (delta 17), reused 0 (delta 0), pack-reused 308 (from 2)[K
Receiving objects: 100% (353/353), 2.50 MiB | 8.86 MiB/s, done.
Resolving deltas: 100% (203/203), done.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle
import numpy as np

## Get Data

In [3]:
file_path = "smart_microfluidics/data/cleaned_data.csv"
data = pd.read_csv(file_path, encoding="latin1")

## Binary classifier for output

In [4]:
file_path = "smart_microfluidics/data/cleaned_data.csv"
yes_data = data[data["OUTPUT"] == 1]

## Random forest regressor

In [5]:
features = yes_data.drop(columns=["SIZE", "PDI", "OUTPUT"])
targets = yes_data[["SIZE", "PDI"]]
categorical_columns = features.select_dtypes(include=["object"]).columns
numerical_columns = features.select_dtypes(include=["float64", "int64"]).columns

preprocessor = ColumnTransformer(transformers=[("cat", OneHotEncoder(drop="first"), categorical_columns),],remainder="passthrough")
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)
model = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))])
model.fit(X_train, y_train)
pickle_file_path = "random_forest_model.pkl"
with open(pickle_file_path, "wb") as file:
    pickle.dump(model, file)

print(f"Model saved to {pickle_file_path}")

Model saved to random_forest_model.pkl


In [6]:
with open("random_forest_model.pkl", "rb") as file:
    loaded_model = pickle.load(file)
input_data = pd.DataFrame({
    "ML": ["HSPC"],
    "CHIP": ["Micromixer"],
    "TLP": [5.0],
    "ESM": [0.0],
    "HSPC": [3.75],
    "CHOL": [0.0],
    "PEG": [1.25],
    "TFR ": [1.0],
    "FRR": [3.0],
    "BUFFER": ["PBS"],
    "OUTPUT": [1]
})

predictions = loaded_model.predict(input_data)
print("Predicted SIZE and PDI:", predictions)


Predicted SIZE and PDI: [[115.56333333   0.34353444]]


## XGBoost

In [7]:
features = yes_data.drop(columns=["SIZE", "PDI"])
targets = yes_data[["SIZE", "PDI"]]
categorical_columns = features.select_dtypes(include=["object"]).columns
numerical_columns = features.select_dtypes(include=["float64", "int64"]).columns

preprocessor = ColumnTransformer(transformers=[("cat", OneHotEncoder(drop="first"), categorical_columns),],remainder="passthrough")
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)
xgboost_model = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=42)))])

xgboost_model.fit(X_train, y_train)
pickle_file_path = "xgboost_model.pkl"
with open(pickle_file_path, "wb") as file:
    pickle.dump(xgboost_model, file)
print(f"Model saved to {pickle_file_path}")

Model saved to xgboost_model.pkl


In [8]:
with open("xgboost_model.pkl", "rb") as file:
    loaded_model = pickle.load(file)
input_data = pd.DataFrame({
    "ML": ["HSPC"],
    "CHIP": ["Micromixer"],
    "TLP": [5.0],
    "ESM": [0.0],
    "HSPC": [3.75],
    "CHOL": [0.0],
    "PEG": [1.25],
    "TFR ": [1.0],
    "FRR": [3.0],
    "BUFFER": ["PBS"],
    "OUTPUT": [1]
})

predictions = loaded_model.predict(input_data)
print("Predicted SIZE and PDI:", predictions)


Predicted SIZE and PDI: [[118.00202     0.3360173]]


## Inverse problem

In [9]:
targets = yes_data[["SIZE", "PDI"]]
numerical_columns = ["ESM", "HSPC", "CHOL", "PEG", "TFR ", "FRR"]
features = yes_data[numerical_columns]
X_train, X_test, y_train, y_test = train_test_split(targets, features, test_size=0.2, random_state=42)

inverse_model = MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=42))
inverse_model.fit(X_train, y_train)

pickle_file_path = "inverse_xgboost_model.pkl"
with open(pickle_file_path, "wb") as file:
    pickle.dump(inverse_model, file)

print(f"Inverse model saved to {pickle_file_path}")


Inverse model saved to inverse_xgboost_model.pkl


In [10]:
with open("inverse_xgboost_model.pkl", "rb") as file:
    inverse_model = pickle.load(file)
input_data = pd.DataFrame({"SIZE": [118.0], "PDI": [0.33]})

predictions = inverse_model.predict(input_data)
predictions = np.abs(predictions)
predictions = np.where(predictions < 0.5, 0, predictions)
print("Predicted values for ESM, HSPC, CHOL, PEG, TFR, and FRR:", predictions)

Predicted values for ESM, HSPC, CHOL, PEG, TFR, and FRR: [[0.        3.7620564 0.        1.2572409 0.9628239 3.0061107]]
