In [25]:
import torch
import scipy.io as io
import plotly.offline as py
import plotly.graph_objs as go
import json
import numpy as np
py.init_notebook_mode(connected=True)

In [26]:
mat = io.loadmat('data_Mg_GBperatom_seg_2Al_dump.mat')
length_A = mat['A'].shape[1]
for i in range(30):
    segE = mat['A']['Eseg'][0,i]
    #check whether this is a valid data?
    n1 = segE[:,0] != 0 
    segE = np.squeeze(segE[n1,:])
    atom_ID = segE[:,0].astype(int) - 1

    descriptor = mat['A']['peratom'][0,i][0,0]
    descriptor_temp = np.concatenate([descriptor['pos'],descriptor['pe'],descriptor['cna'],descriptor['centro_fnn'],
                                descriptor['centro_snn'],descriptor['coord'],descriptor['f'],descriptor['stress'],
                                descriptor['voronoi']], axis = 1)
    if i == 0:
        descriptor_all = descriptor_temp[atom_ID]
        segE_all = segE
    else:
        descriptor_temp = descriptor_temp[atom_ID]
        descriptor_all = np.concatenate([descriptor_all, descriptor_temp], axis = 0)
        segE_all = np.concatenate([segE_all, segE])

descriptor_all[:,2] = abs(descriptor_all[:,2]-min(descriptor_all[:,2])-20)
sigma_H = np.sum(descriptor_all[:,11:14], axis = 1)/3
f_mag = np.linalg.norm(descriptor_all[:,8:11], axis = 1, ord = 2)

feature = np.concatenate([descriptor_all, sigma_H[:,np.newaxis], f_mag[:,np.newaxis]], axis = 1)

In [27]:
y_true = segE_all[:,1]
#We should consider the coordinates don't matter.
feature1 = feature[:,3:]
#feature1 = feature1/np.max(feature1)
#feature_space = np.concatenate([np.ones((feature1.shape[0],1)), feature1], axis = 1)
feature_space = feature1
#random shuffle
np.random.seed(10)
idx0 = np.random.permutation(np.arange(len(feature_space)))
feature_space = feature_space[idx0]
y_true = y_true[idx0]
#divide data into training, crossvalildation and test sets.
X_train = feature_space[0:int(len(feature_space)*.8)]
X_mean = np.mean(X_train, axis = 0)
X_std = np.std(X_train, axis = 0)
X_train = (X_train - X_mean)/X_std
X_temp = feature_space[int(len(feature_space)*.8):]
X_temp = (X_temp - X_mean)/X_std

Y_train = y_true[0:int(len(feature_space)*.8)]
Y_mean = np.mean(Y_train, axis = 0)
Y_std = np.std(Y_train, axis = 0)
#Y_train = (Y_train - Y_mean)/Y_std
Y_temp = y_true[int(len(feature_space)*.8):]

In [28]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)
kf.get_n_splits(X_train)
print(kf)

KFold(n_splits=10, random_state=None, shuffle=False)


In [29]:
final_train_loss = []
final_test_loss = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 1e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss.append(SS_res)

0 328.9358215332031
1000 1.5024324655532837
2000 1.377386450767517
3000 1.3248027563095093
4000 1.2713215351104736
5000 1.2182213068008423
6000 1.1990654468536377
7000 1.18455970287323
8000 1.173247218132019
9000 1.1575406789779663
10000 1.1434290409088135
11000 1.1325981616973877
12000 1.1268744468688965
13000 1.1203646659851074
14000 1.1104103326797485
15000 1.1022005081176758
16000 1.0986500978469849
17000 1.0963833332061768
18000 1.0941534042358398
19000 1.0907702445983887
20000 1.085808515548706
21000 1.0840224027633667
22000 1.0830028057098389
23000 1.0798755884170532
24000 1.0759516954421997
25000 1.0751113891601562
26000 1.0741500854492188
27000 1.073286533355713
28000 1.0723596811294556
29000 1.0717436075210571
30000 1.0713862180709839
31000 1.070942759513855
32000 1.0707045793533325
33000 1.0703009366989136
34000 1.0697503089904785
35000 1.0694440603256226
36000 1.0691699981689453
37000 1.0686297416687012
38000 1.0678865909576416
39000 1.0670009851455688
40000 1.0661637783050

In [30]:
final_test_loss

[0.127491426718297,
 0.16109929434034975,
 0.29907708129382277,
 0.193603272480451,
 0.23271422366934097,
 nan,
 0.1336420043511888,
 0.1953853043498497,
 0.1668634993372556,
 0.21009098989289027]