In [1]:
import torch
import scipy.io as io
import plotly.offline as py
import plotly.graph_objs as go
import json
import numpy as np
py.init_notebook_mode(connected=True)

In [2]:
mat = io.loadmat('data_Mg_GBperatom_seg_2Al_dump.mat')
length_A = mat['A'].shape[1]
for i in range(30):
    segE = mat['A']['Eseg'][0,i]
    #check whether this is a valid data?
    n1 = segE[:,0] != 0 
    segE = np.squeeze(segE[n1,:])
    atom_ID = segE[:,0].astype(int) - 1

    descriptor = mat['A']['peratom'][0,i][0,0]
    descriptor_temp = np.concatenate([descriptor['pos'],descriptor['pe'],descriptor['cna'],descriptor['centro_fnn'],
                                descriptor['centro_snn'],descriptor['coord'],descriptor['f'],descriptor['stress'],
                                descriptor['voronoi']], axis = 1)
    if i == 0:
        descriptor_all = descriptor_temp[atom_ID]
        segE_all = segE
    else:
        descriptor_temp = descriptor_temp[atom_ID]
        descriptor_all = np.concatenate([descriptor_all, descriptor_temp], axis = 0)
        segE_all = np.concatenate([segE_all, segE])

descriptor_all[:,2] = abs(descriptor_all[:,2]-min(descriptor_all[:,2])-20)
sigma_H = np.sum(descriptor_all[:,11:14], axis = 1)/3
f_mag = np.linalg.norm(descriptor_all[:,8:11], axis = 1, ord = 2)

feature = np.concatenate([descriptor_all, sigma_H[:,np.newaxis], f_mag[:,np.newaxis]], axis = 1)

In [3]:
y_true = segE_all[:,1]
#We should consider the coordinates don't matter.
feature1 = feature[:,3:]
#feature1 = feature1/np.max(feature1)
#feature_space = np.concatenate([np.ones((feature1.shape[0],1)), feature1], axis = 1)
feature_space = feature1
#random shuffle
np.random.seed(10)
idx0 = np.random.permutation(np.arange(len(feature_space)))
feature_space = feature_space[idx0]
y_true = y_true[idx0]
#divide data into training, crossvalildation and test sets.
X_train = feature_space[0:int(len(feature_space)*.8)]
X_mean = np.mean(X_train, axis = 0)
X_std = np.std(X_train, axis = 0)
X_train = (X_train - X_mean)/X_std
X_temp = feature_space[int(len(feature_space)*.8):]
X_temp = (X_temp - X_mean)/X_std

Y_train = y_true[0:int(len(feature_space)*.8)]
Y_mean = np.mean(Y_train, axis = 0)
Y_std = np.std(Y_train, axis = 0)
#Y_train = (Y_train - Y_mean)/Y_std
Y_temp = y_true[int(len(feature_space)*.8):]

In [4]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)
kf.get_n_splits(X_train)
print(kf)

KFold(n_splits=10, random_state=None, shuffle=False)


In [19]:
final_train_loss = []
final_test_loss = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss.append(SS_res)

0 580.4072875976562
1000 1.9340736865997314
2000 1.5570257902145386
3000 1.4114975929260254
4000 1.3262901306152344
5000 1.2512733936309814
6000 1.2011654376983643
7000 1.1770473718643188
8000 1.1541459560394287
9000 1.1417940855026245
10000 1.1319756507873535
11000 1.1266846656799316
12000 1.1213407516479492
13000 1.117064356803894
14000 1.1120972633361816
15000 1.108014464378357
16000 1.1012574434280396
17000 1.092694878578186
18000 1.0842070579528809
19000 1.0803924798965454
20000 1.0772202014923096
21000 1.074794054031372
22000 1.0722591876983643
23000 1.0702048540115356
24000 1.0678569078445435
25000 1.0627574920654297
26000 1.0581117868423462
27000 1.0541460514068604
28000 1.052486777305603
29000 1.051162600517273
30000 1.0502525568008423
31000 1.0491118431091309
32000 1.0485020875930786
33000 1.0482139587402344
34000 1.0473995208740234
35000 1.0471656322479248
36000 1.047059416770935
37000 1.046992301940918
38000 1.0469330549240112
39000 1.0468775033950806
40000 1.04682612419128

In [20]:
final_test_loss

[0.14331576659515383,
 0.14628443188602547,
 0.13625654959026584,
 0.20291350474222894,
 0.1507180346947643,
 0.15943295377337316,
 0.16299302553456183,
 0.12223330020834222,
 0.15725164757659488,
 0.1409652329180137]

In [21]:
np.array(final_test_loss).mean()

0.1522364447519324

In [22]:
final_train_loss1 = []
final_test_loss1 = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 0.01)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss1.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss1.append(SS_res)

0 385.7215270996094
1000 1.666097640991211
2000 1.4850571155548096
3000 1.3952443599700928
4000 1.3493565320968628
5000 1.3228366374969482
6000 1.303407073020935
7000 1.2833843231201172
8000 1.267615795135498
9000 1.247214674949646
10000 1.2201436758041382
11000 1.1911227703094482
12000 1.1646966934204102
13000 1.1407461166381836
14000 1.1229567527770996
15000 1.109235167503357
16000 1.1038117408752441
17000 1.0975847244262695
18000 1.0909792184829712
19000 1.0816383361816406
20000 1.070539951324463
21000 1.053195834159851
22000 1.0400701761245728
23000 1.0333606004714966
24000 1.0283876657485962
25000 1.025251865386963
26000 1.021652340888977
27000 1.0151294469833374
28000 1.0082088708877563
29000 1.0023434162139893
30000 0.9977775812149048
31000 0.993628978729248
32000 0.9908286929130554
33000 0.9881576299667358
34000 0.9860020875930786
35000 0.9842310547828674
36000 0.9801193475723267
37000 0.9791978597640991
38000 0.9784767627716064
39000 0.977285623550415
40000 0.9763675332069397


In [23]:
np.array(final_test_loss1).mean()

0.1450656739888288

In [5]:
final_train_loss2 = []
final_test_loss2 = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 0.1)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss2.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss2.append(SS_res)

0 1660.513671875
1000 2.0985360145568848
2000 1.7101393938064575
3000 1.53839111328125
4000 1.4189053773880005
5000 1.3603640794754028
6000 1.313180923461914
7000 1.2781507968902588
8000 1.231299877166748
9000 1.2077304124832153
10000 1.1903127431869507
11000 1.1775238513946533
12000 1.1671949625015259
13000 1.1565577983856201
14000 1.1454455852508545
15000 1.1309856176376343
16000 1.1105691194534302
17000 1.0966110229492188
18000 1.0743680000305176
19000 1.0639904737472534
20000 1.0583674907684326
21000 1.0558218955993652
22000 1.0548961162567139
23000 1.0544359683990479
24000 1.0541654825210571
25000 1.053175449371338
26000 1.0530487298965454
27000 1.0529755353927612
28000 1.052897572517395
29000 1.0528194904327393
30000 1.0523841381072998
31000 1.0520613193511963
32000 1.0512303113937378
33000 1.051163911819458
34000 1.05116605758667
35000 1.0511744022369385
36000 1.051188349723816
37000 1.0511494874954224
38000 1.0511605739593506
39000 1.0511722564697266
40000 1.0511772632598877
41

In [6]:
np.array(final_test_loss2).mean()

0.13746027828097668

In [8]:
X_test = torch.from_numpy(X_temp)
X_test = X_test.float()
Y_test_pred = model(X_test)
Y_test_pred = Y_test_pred.detach().numpy()
Y_test_pred = Y_test_pred.squeeze()
Y_test = Y_temp.squeeze()
Y_mean = np.mean(Y_test)
SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))


In [11]:
SS_tot = np.sum(np.power(Y_test - Y_test.mean(), 2))
R_squared = 1-SS_res/SS_tot

In [12]:
R_squared

0.9483332209623662

In [45]:
final_train_loss3 = []
final_test_loss3 = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 1)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss3.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss3.append(SS_res)

0 448.3594055175781
1000 1.6326431035995483
2000 1.3756413459777832
3000 1.248448371887207
4000 1.209316372871399
5000 1.193574070930481
6000 1.174582600593567
7000 1.153003215789795
8000 1.144581913948059
9000 1.1342933177947998
10000 1.11874520778656
11000 1.114279866218567
12000 1.113101840019226
13000 1.1123229265213013
14000 1.1118204593658447
15000 1.1115964651107788
16000 1.1117087602615356
17000 1.1117768287658691
18000 1.1117855310440063
19000 1.1117876768112183
20000 1.111789345741272
21000 1.1117942333221436
22000 1.1118024587631226
23000 1.1117929220199585
24000 1.1117963790893555
25000 1.1117907762527466
26000 1.1117935180664062
27000 1.1117963790893555
28000 1.1117922067642212
29000 1.1117957830429077
30000 1.1117980480194092
31000 1.1117926836013794
32000 1.1117969751358032
33000 1.1117931604385376
34000 1.1117976903915405
35000 1.1117981672286987
36000 1.111801266670227
37000 1.1117959022521973
38000 1.1117948293685913
39000 1.1117959022521973
40000 1.1117949485778809
4

In [44]:
np.array(final_test_loss3).mean()

0.14415752304036222

In [37]:
final_train_loss4 = []
final_test_loss4 = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 2)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss4.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss4.append(SS_res)

0 580.4205932617188
1000 1.6138620376586914
2000 1.4482874870300293
3000 1.2672202587127686
4000 1.2290006875991821
5000 1.199912190437317
6000 1.158446192741394
7000 1.1196818351745605
8000 1.1020140647888184
9000 1.0994009971618652
10000 1.098821759223938
11000 1.0986483097076416
12000 1.0985554456710815
13000 1.0985263586044312
14000 1.0985167026519775
15000 1.0985026359558105
16000 1.0985068082809448
17000 1.0985099077224731
18000 1.0985215902328491
19000 1.0985149145126343
20000 1.0985172986984253
21000 1.0985134840011597
22000 1.0985223054885864
23000 1.0985172986984253
24000 1.0985177755355835
25000 1.0985242128372192
26000 1.098511815071106
27000 1.098515272140503
28000 1.0985150337219238
29000 1.0985171794891357
30000 1.0985150337219238
31000 1.0985252857208252
32000 1.0985187292099
33000 1.098512887954712
34000 1.0985101461410522
35000 1.0985134840011597
36000 1.0985145568847656
37000 1.0985103845596313
38000 1.0985170602798462
39000 1.0985159873962402
40000 1.098518967628479

In [38]:
np.array(final_test_loss4).mean()

0.14553661824816305

In [41]:
final_train_loss5 = []
final_test_loss5 = []
for train_index, test_index in kf.split(X_train):
    X_Fold = X_train[train_index]
    X_tFold = X_train[test_index]
    Y_Fold = Y_train[train_index]
    Y_tFold = Y_train[test_index]
    (N, D_in) = X_Fold.shape
    H = 10
    D_out = 1

    X_Fold = torch.from_numpy(X_Fold)
    X_Fold = X_Fold.float()
    y = torch.from_numpy(Y_Fold)
    y = y.float()
    y = y.view(y.shape[0],1)
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )

    loss_fn = torch.nn.MSELoss(reduction='sum')
    learning_rate = 0.5e-4
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 0.001)
    
    for t in range(50000):
        y_pred = model(X_Fold)

        loss = loss_fn(y_pred, y)
        if t%1000 == 0:
            print(t, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        
    final_train_loss5.append(loss.item())
    
    X_test = torch.from_numpy(X_tFold)
    X_test = X_test.float()
    Y_test_pred = model(X_test)
    Y_test_pred = Y_test_pred.detach().numpy()
    Y_test_pred = Y_test_pred.squeeze()
    Y_test = Y_tFold.squeeze()
    Y_mean = np.mean(Y_test)
    SS_res = np.sum(np.power(Y_test_pred - Y_test, 2))
    
    final_test_loss5.append(SS_res)

0 507.3868713378906
1000 1.6394224166870117
2000 1.4414353370666504
3000 1.3141032457351685
4000 1.2668482065200806
5000 1.2456190586090088
6000 1.2376717329025269
7000 1.2318289279937744
8000 1.224199891090393
9000 1.2163296937942505
10000 1.209450602531433
11000 1.2036579847335815
12000 1.1987675428390503
13000 1.1954026222229004
14000 1.1927059888839722
15000 1.1899595260620117
16000 1.186320185661316
17000 1.171823501586914
18000 1.1614502668380737
19000 1.1529793739318848
20000 1.144784927368164
21000 1.1399708986282349
22000 1.134496808052063
23000 1.1278811693191528
24000 1.119404911994934
25000 1.1119036674499512
26000 1.104179859161377
27000 1.0858913660049438
28000 1.0815588235855103
29000 1.0768496990203857
30000 1.0722092390060425
31000 1.0705841779708862
32000 1.0696285963058472
33000 1.0687506198883057
34000 1.0680444240570068
35000 1.0672287940979004
36000 1.066635012626648
37000 1.0655672550201416
38000 1.0649309158325195
39000 1.0641779899597168
40000 1.063334822654724

In [42]:
np.array(final_test_loss5).mean()

0.1470468269341275