Follows: https://auto.gluon.ai/stable/tutorials/tabular/tabular-quick-start.html

In [None]:
# !pip install pandas numpy autogluon ipywidgets git+https://github.com/Ramprasad-Group/psmiles.git

import pandas as pd
import numpy as np
from psmiles import PolymerSmiles as PS
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor

df = pd.read_json(
    "https://raw.githubusercontent.com/kuennethgroup/colab_tutorials/main/lecture2/data/polymers_tend_to_crystalize.json"
)[['smiles', 'value']]

# Compute the fingerprint using the PSMILES package
fps = np.vstack(df.smiles.apply(lambda x: PS(x).fingerprint()))
# TODO: Scale fps with scaler from sklearn

fps = pd.DataFrame(fps)

df = pd.concat((fps, df), axis=1)

# Drop the smiles columne and make sure they are all floats
df = df.drop(columns='smiles')
# df = df.astype(np.float32)

# Remove columns that are zero
# df = df.loc[:, (df!=0).any(axis=0)]

# Normalize the tendency to crystalize 
# df['value'] = df['value'] / 100


# split in train and test

df_train, df_test = train_test_split(df, test_size=0.20, random_state=42)
display(df_train)
display(df_test)

predictor = TabularPredictor(label="value", problem_type="regression",
).fit(df_train, time_limit=500, presets="best_quality")


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
metrics = predictor.evaluate(df_test, silent=True)


y_pred = predictor.predict(df_test.drop(columns=["value"]))
y_pred

ax.plot(y_pred, df_test["value"], "o")
ax.plot([0, 100], [0, 100], "k--")
ax.text(0.1, 0.9, f"R2 = {metrics['r2']:.3f}", transform=ax.transAxes)
ax.set_title("Testdata set")
ax.set_ylabel("truth")
ax.set_xlabel("pred")