In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from cosy.models import CosyNetMultiInput
from cosy.losses import trace_norm_loss, MaskedMeanSquaredError

In [2]:
df = pd.read_csv("/Users/tompope/Documents/python_enviroment/SoftwareDevelopment/soft_parameter_sharing/data/desulferization_data_w_tobacc.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,framework,CO2_tp_lod,N2_tp_lod,SO2_tp_lod,RDF_electronegativity_2.00,RDF_electronegativity_2.00.1,RDF_electronegativity_2.01,RDF_electronegativity_2.03,RDF_electronegativity_2.04,...,RDF_vdWaalsVolume_28.53,RDF_vdWaalsVolume_29.01,RDF_vdWaalsVolume_29.51,RDF_vdWaalsVolume_30.00,PLD,LCD,density,VF,SAv,SAg
0,0,GIQXUA_clean,1.918899,0.01752,2.747258,0.000605,0.000606,0.000607,0.000609,0.000614,...,0.0,0.0,0.0,0.0,3.8687,4.89334,1.79283,0.13844,2675.75,1492.47
1,1,JALPAQ_clean,0.050226,0.027788,0.008087,0.000861,0.00087,0.000888,0.000916,0.000954,...,0.0,0.0,0.0,0.0,2.90553,3.9755,1.66088,0.01992,841.869,506.881
2,2,SAQQEH_clean,2.006302,0.249367,0.533491,0.000771,0.000781,0.000802,0.000833,0.000876,...,0.0,0.0,0.0,0.0,9.13279,10.56525,0.914389,0.35942,2030.21,2220.29
3,3,BEFNEH_clean,1.036156,0.033717,1.343768,0.000899,0.000899,0.0009,0.000903,0.00091,...,0.0,0.0,0.0,0.0,3.33494,4.2694,2.16564,0.0729,1864.99,861.172
4,4,GELVID_clean,0.552087,0.064768,0.027879,0.00082,0.000831,0.000852,0.000884,0.000928,...,0.0,0.0,0.0,0.0,2.43534,3.61901,1.57413,0.02776,1334.15,847.547


In [3]:
'''
  - read data into dataframe
  - generate class data
  - extract and split feature and class data
  - scale feature data
'''


df = df.sample(frac=1).reset_index(drop=True)
df.drop(columns=['Unnamed: 0'], inplace=True)
# df = df[df.framework.str.startswith('str_')]


train_df_full, test_df = train_test_split(df, test_size=0.2, random_state=2020)

train_df, val_df = train_test_split(train_df_full, test_size=0.1, random_state=2020)

X_train = train_df.iloc[:, 4:].to_numpy().astype(np.float32)
X_train_CO2 = train_df.iloc[:, 4:-6].to_numpy().astype(np.float32)

y_CO2 = train_df.CO2_tp_lod.to_numpy().astype(np.float32)
y_N2 = train_df.N2_tp_lod.to_numpy().astype(np.float32)
y_SO2 = train_df.SO2_tp_lod.to_numpy().astype(np.float32)

X_val = val_df.iloc[:, 4:].to_numpy().astype(np.float32)
X_val_CO2 = val_df.iloc[:, 4:-6].to_numpy().astype(np.float32)
y_CO2_val = val_df.CO2_tp_lod.to_numpy().astype(np.float32)
y_N2_val = val_df.N2_tp_lod.to_numpy().astype(np.float32)
y_SO2_val = val_df.SO2_tp_lod.to_numpy().astype(np.float32)

X_test = test_df.iloc[:, 4:].to_numpy().astype(np.float32)
X_test_CO2 = test_df.iloc[:, 4:-6].to_numpy().astype(np.float32)
y_CO2_test =  test_df.CO2_tp_lod.to_numpy().astype(np.float32)
y_N2_test = test_df.N2_tp_lod.to_numpy().astype(np.float32)
y_SO2_test = test_df.SO2_tp_lod.to_numpy().astype(np.float32)


# y_CO2[100:1000] = -1

sc = StandardScaler()


X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [4]:
df.head()

Unnamed: 0,framework,CO2_tp_lod,N2_tp_lod,SO2_tp_lod,RDF_electronegativity_2.00,RDF_electronegativity_2.00.1,RDF_electronegativity_2.01,RDF_electronegativity_2.03,RDF_electronegativity_2.04,RDF_electronegativity_2.07,...,RDF_vdWaalsVolume_28.53,RDF_vdWaalsVolume_29.01,RDF_vdWaalsVolume_29.51,RDF_vdWaalsVolume_30.00,PLD,LCD,density,VF,SAv,SAg
0,str_m3_o6_o22_f0_pcu_sym_105_faps_mpnn_charges,0.140591,0.137672,0.009904,0.000454,0.000461,0.000477,0.000501,0.000537,0.000586,...,0.0,0.0,0.0,0.0,8.81252,11.6177,0.640023,0.4243,2224.3,3475.34
1,str_m28_o16_wji_out_mpnn_charges,0.331812,0.306155,0.328909,0.000685,0.000698,0.000724,0.000765,0.000821,0.000895,...,0.002088,0.0011,0.000501,0.000158,24.32915,25.51339,0.261124,0.72232,1446.19,5538.32
2,str_m2_o3_o10_f0_pcu_sym_100_faps_mpnn_charges,1.200102,0.268655,1.815579,0.000711,0.000722,0.000744,0.000778,0.000825,0.000885,...,0.0,0.0,0.0,0.0,8.24056,9.49506,0.559317,0.423,2704.99,4836.23
3,str_m9_o6_o23_f0_sra_sym_99_faps_mpnn_charges,0.360538,0.214823,0.16656,0.000673,0.000683,0.000703,0.000733,0.000775,0.00083,...,0.0,0.0,0.0,0.0,10.97711,11.50512,0.598658,0.4438,2290.26,3825.66
4,str_m29_o83_o111_f0_pts_sym_24_faps_mpnn_charges,0.659675,0.246471,4.576828,0.000663,0.000675,0.000699,0.000737,0.00079,0.00086,...,0.0,0.0,0.0,0.0,6.8778,9.76524,0.666541,0.38238,2471.08,3707.32


In [5]:
input_ = tf.keras.Input(shape=(X_train.shape[1]))
x = tf.keras.layers.Dense(1200, activation='relu')(input_)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(400, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(500, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(1, activation='relu')(x)

model = tf.keras.Model(inputs=input_, outputs=x)

Metal device set to: Apple M1


2023-02-20 12:50:18.695465: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-20 12:50:18.695562: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
input_ = tf.keras.Input(shape=(X_train_CO2.shape[1]))
x = tf.keras.layers.Dense(1200, activation='relu')(input_)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(400, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(500, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(1, activation='relu')(x)

modelco2 = tf.keras.Model(inputs=input_, outputs=x)

In [7]:
cosy_model = CosyNetMultiInput(
    model_config=[model.get_config(), modelco2.get_config(), model.get_config()],
    number_models=3,
    scalar=1.0,
    layer_cutoff=-1,
)

In [8]:
opt = tf.keras.optimizers.Adam(learning_rate=0.00025)

losses = {
    'output_1':MaskedMeanSquaredError(),
    'output_2': MaskedMeanSquaredError(),
    'output_3': MaskedMeanSquaredError(),
    }

R2 = tfa.metrics.RSquare()

cosy_model.compile(
            optimizer=opt,
            loss=losses,
            metrics=[R2],
            )

In [9]:
cosy_model.summary()

ValueError: Expected a symbolic Tensor for the metric value, received: <function CosyNetMultiInput.call.<locals>.<lambda> at 0x2d0601160>

In [None]:
cosy_model.fit(
    x=[X_train, X_train_CO2, X_train], y=[y_N2, y_CO2, y_SO2],
    validation_data=([X_val, X_val_CO2, X_val], [y_N2_val, y_CO2_val, y_SO2_val]),
    batch_size=32, epochs = 4, verbose=1
)

In [None]:
models = cosy_model.get_models()

In [None]:
from sklearn.metrics import r2_score
cosy_model.evaluate([X_test, X_test_CO2, X_test], [y_N2_test, y_CO2_test, y_SO2_test])
predictions = cosy_model.predict([X_test, X_test_CO2, X_test])

R2 = tfa.metrics.RSquare()

pred1 = np.array(predictions[0]).reshape(-1)
pred2 = np.array(predictions[1]).reshape(-1)
pred3 = np.array(predictions[2]).reshape(-1)


print(r2_score(y_N2_test, pred1))
print(r2_score(y_CO2_test, pred2))
print(r2_score(y_SO2_test, pred3))

In [None]:
y = y_N2

In [None]:
np.shape(y)

In [None]:
y_N2

In [None]:
masker = tf.not_equal(y, -1)

In [None]:
masker

In [None]:
y = tf.boolean_mask(y, masker)

In [None]:
y

In [None]:
models = cosy_model.get_models()

In [None]:
models[2].layers[1].weights

In [None]:
for idx, params in enumerate(zip(*[layer.weights for layer in cosy_model.task_nets.layers])):
    if "kernel" in params[1].name and idx > 0:
        print(params)