# WC predict with structure data(Zeo++)

## Module Import

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] ="0"
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras

## Data Load

In [None]:
data = pd.read_csv("zeo_gcmc.csv", index_col = "structure")
data = data.sample(frac=1, random_state=43)
data

## Filling in missing data

In [None]:
data.loc[['acs+N422+E45', 'bnn+N242+E19', 'hms+N495+N133+E16+E187', 'msw+N144+E38', 'pcu+N283+E27'], :]

In [None]:
data.loc['acs+N422+E45']['Density', 'AV_Volume_fraction', 'AV_cm^3/g', 'Di', 'Df', 'Dif'] = [0.649936, 0.45384, 0.698284, 12.37427, 10.62515, 12.34505]
data.loc['bnn+N242+E19']['Density', 'AV_Volume_fraction', 'AV_cm^3/g', 'Di', 'Df', 'Dif'] = [0.694804, 0.41954, 0.603825, 12.09847, 9.95645, 12.09793]
data.loc['hms+N495+N133+E16+E187']['Density', 'AV_Volume_fraction', 'AV_cm^3/g', 'Di', 'Df', 'Dif'] = [0.507084, 0.52928, 1.04377, 12.066059, 10.14879, 12.36303]
data.loc['msw+N144+E38']['Density', 'AV_Volume_fraction', 'AV_cm^3/g', 'Di', 'Df', 'Dif'] = [0.788979, 0.40974, 0.519330, 8.97779, 7.51442, 8.97779]
data.loc['pcu+N283+E27']['Density', 'AV_Volume_fraction', 'AV_cm^3/g', 'Di', 'Df', 'Dif'] = [1.00712, 0.22842, 0.226804, 6.78506, 4.90377, 6.76609]

In [None]:
data.loc[['acs+N422+E45', 'bnn+N242+E19', 'hms+N495+N133+E16+E187', 'msw+N144+E38', 'pcu+N283+E27'], :]

In [None]:
data.loc['acs+N422+E45']['gcmc_result'] = 188.6036337274-26.9378540502
data.loc['bnn+N242+E19']['gcmc_result'] = 200.0324719640-35.1899809715
data.loc[['acs+N422+E45', 'bnn+N242+E19', 'hms+N495+N133+E16+E187', 'msw+N144+E38', 'pcu+N283+E27'], :]

## Data split and normalize

In [None]:
Y = data["gcmc_result"]
X = data.drop(["gcmc_result"], axis=1)

from sklearn import preprocessing

min_max_scaler_for_X = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler_for_X.fit_transform(X.values)

min_max_scaler_for_Y = preprocessing.MinMaxScaler()
y_scaled = min_max_scaler_for_Y.fit_transform(Y.values.reshape(-1,1))

In [None]:
X_train_scaled = x_scaled[:50000]
y_train_scaled = y_scaled[:50000]
X_test_scaled = x_scaled[50000:]
y_test_scaled = y_scaled[50000:]

print(X_train_scaled.shape, y_train_scaled.shape)
print(X_test_scaled.shape, y_test_scaled.shape)

## Build and train Model

In [None]:
import tensorflow.keras as keras

model = keras.models.Sequential()
model.add(keras.layers.Dense(1000, activation="relu", input_shape=[19]))
model.add(keras.layers.Dense(256, activation="relu"))
model.add(keras.layers.Dense(1))

opt = keras.optimizers.Adam(lr=0.0001)
model.compile(loss="mse", optimizer=opt)
model.fit(X_train_scaled, y_train_scaled, batch_size=32, epochs=4)


In [None]:
y_pred_scaled = model(X_test_scaled).numpy()
y_pred = min_max_scaler_for_Y.inverse_transform(y_pred_scaled)
y_test = min_max_scaler_for_Y.inverse_transform(y_test_scaled)

In [None]:
plt.figure(figsize=(8, 8))
plt.scatter(y_pred, y_test, s=0.5, alpha=0.5, color="blue")
plt.plot([0, 250], [0, 250], color="black", ls="--")

plt.xlabel("Prediction WC (cc/cc)", fontsize=30)
plt.ylabel("Real WC (cc/cc)", fontsize=30)

plt.xlim([0, 200])
plt.ylim([0, 200])

plt.show()