In [None]:
from tpot import TPOTRegressor
from sklearn.linear_model import LassoCV
import numpy as np
import pandas as pd

In [None]:
X = pd.read_pickle("cached_df.pkl")
print(X)

In [None]:
X = X.drop(columns=["material", "formula", "structure", "composition"])
X = X.loc[:, (X!=0).any(axis=0)]

In [None]:
# Functionalizing features
functions = {
    "squared": lambda x: x**2,
    "cubed": lambda x: x**3
}

X_copy = X.copy()
for key,value in functions.items():
    tmp_df = pd.DataFrame()
    tmp_df = X_copy.applymap(value)
    tmp_df.columns = [f"{key}({col})" for col in tmp_df.columns]
    X = pd.concat([X, tmp_df], axis=1)
pd.set_option("display.max_columns", 400)
print(X)

In [None]:
Y_arr = np.array([0.42, 0.94, 0.76, 0.37, 0.25, 0.91, 0.71])
X_arr = X.to_numpy()

In [None]:
# Lasso Regularization
lasso = LassoCV(cv=5, alphas=np.logspace(-6,6,100), random_state=10)
lasso.fit(X_arr, Y_arr)
print(lasso.alpha_)
print(lasso.coef_)

In [None]:
# Pearson Correlation
corr = X.corr(method="pearson")
print(corr)

In [None]:
tpot = TPOTRegressor(generations=30, population_size=10, scoring='r2')
tpot.fit(X_arr,Y_arr)


In [None]:
tpot.export('best_ml_algo.py')