In [0]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

In [0]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('/content/gdrive')

In [4]:
from google.colab import files
uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
  name=fn, length=len(uploaded[fn])))

Saving CoulombVector_Coupling to CoulombVector_Coupling (2)
User uploaded file "CoulombVector_Coupling" with length 66643288 bytes


In [0]:
import io

molecule_dataframe = pd.read_csv(io.StringIO(uploaded['CoulombVector_Coupling'].decode('utf-8')), sep=",")

molecule_dataframe = molecule_dataframe.reindex(np.random.permutation(
molecule_dataframe.index))


In [0]:
lenval = int(len(molecule_dataframe)*0.8)
lentrain = int(len(molecule_dataframe)*0.8*0.8)


In [0]:
columns = []

for i in range(1,13):
  for j in range(13,25):
    if i == 1 and j == 13:
      columns.append(str(f'{i:2d}')+'-'+str(f'{j:2d}'))
    else:
      columns.append(' '+str(f'{i:2d}')+'-'+str(f'{j:2d}'))

Feature = np.zeros((len(molecule_dataframe),len(columns)))

for i in range(len(columns)):
  for sample in range(len(molecule_dataframe)):
    Feature[sample][i] = molecule_dataframe[columns[i]][sample]


In [0]:
Target = np.zeros((len(molecule_dataframe)))
for sample in range(len(molecule_dataframe)):
  Target[sample] = molecule_dataframe[" Coupling(eV)"][sample] * 1000


In [0]:
training_images, training_labels, validation_images, validation_labels, test_images, test_labels = Feature[:lentrain], Target[:lentrain], Feature[lentrain:lenval], Target[lentrain:lenval], Feature[lenval:], Target[lenval:]


In [10]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(Feature, Target)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
some_data = test_images[:5]
some_labels = test_labels[:5]
print("예측: ", lin_reg.predict(some_data))
print("레이블: ", list(some_labels))

예측:  [-0.00674066  0.01827606  0.01564413  0.01089832  0.01072623]
레이블:  [0.319643, -0.32650890000000005, -0.01253727, 0.18616310000000003, 0.3391186]


In [12]:
from sklearn.metrics import mean_squared_error
coupling_predictions = lin_reg.predict(training_images)
lin_mse = mean_squared_error(training_labels, coupling_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

0.8626964482472821

In [13]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor()
tree_reg.fit(training_images, training_labels)

coupling_predictions = tree_reg.predict(training_images)
tree_mse = mean_squared_error(training_labels, coupling_predictions)
tree_rmse = np.sqrt(tree_mse)
tree_rmse

4.198326340242779e-05

In [0]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(tree_reg, test_images, test_labels,
                        scoring="neg_mean_squared_error", cv=10)
tree_rmse_scores = np.sqrt(-scores)

In [18]:
def display_scores(scores):
  print("Scores:", scores)
  print("Mean:", scores.mean())
  print("Standard deviation:", scores.std())
  
display_scores(tree_rmse_scores)

Scores: [1.39006117 1.21297426 1.06583887 1.19112134 1.11985684 1.51529264
 1.50373965 1.43202042 1.41298101 1.55470695]
Mean: 1.3398593150038116
Standard deviation: 0.16788832527757003


In [0]:
lin_scores = cross_val_score(lin_reg, training_images, training_labels,
                            scoring="neg_mean_squared_error", cv=10)

lin_rmse_scores = np.sqrt(-lin_scores)
display_scores(lin_rmse_scores)

In [0]:
%matplotlib inline

fig = plt.figure(figsize=(15,10))
ax = plt.axes()

p1 = plt.scatter(range(lentrain), training_labels, s=1)
plt.plot(range(lentrain), np.zeros((lentrain)))
plt.show()

In [0]:
from keras import backend as K

def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epochs, logs={}):
    if (logs.get("loss")<0.01):
      print("\nReached 1% loss!")
      self.model.stop_training = True

callbacks=myCallback()
 
model = tf.keras.Sequential([
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss='mean_squared_error',
              optimizer='adam',
              use_bias=True,
              kernel_initializer='RandomNormal',
              bias_initializer='RandomNormal',
              )

history = model.fit(training_images, 
          training_labels, 
          batch_size=100,
          verbose=0,
          epochs=1000,
          validation_data=(validation_images, validation_labels),
          callbacks=[callbacks],
          shuffle=True,
          steps_per_epoch=50
          )


nlen = len(test_images)

y = [model.predict(test_images)]
y = np.array(y).reshape(nlen)

In [0]:
diff = np.subtract(y, test_labels)
print(max(abs(diff)))

fig = plt.figure(figsize=(30,10))
ax = plt.axes()

p1 = plt.scatter(range(nlen), test_labels, c='r', marker='o', label='true value', s=1)
p2 = plt.scatter(range(nlen), y, c='b', marker='^', label='hypothesis', s=1)
#p3 = plt.plot(range(nlen), diff, linewidth=0.5)
plt.plot(range(nlen), np.zeros((nlen)))
plt.legend([p1,p2],['true value', 'hypothesis'])
plt.show()