In [None]:
import numpy as np
from numpy import random
from numpy import math
import matplotlib  
import matplotlib.pyplot as plt 

# copy
import copy


#Import Keras and TF
import tensorflow as tf
from tensorflow import keras

from tensorflow.python.keras.models import clone_model
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Add, Dense, Activation, Flatten, Conv2D, Conv1D, MaxPooling2D, Dropout,BatchNormalization, Input, concatenate, Lambda
from tensorflow.python.keras.callbacks import Callback
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator


from numpy import linalg as LA
from tensorflow.python.keras import backend as K

In [None]:
# download the datasets

Path_trivial = 'data_A_expended\\trivial.npy'
Path_0 = 'data_A_expended\\C_0.npy'
Path_1 = 'data_A_expended\\C_1.npy'
Path_random = 'data_A_expended\\C_random.npy'

# data with label 0
data_0 = np.load(Path_trivial)

# data with label 1
data_1 = np.load(Path_1)

# the training dataset
N_train = round(0.95 * data_0.shape[0])

N_train_all = 2 * N_train

train_data = np.zeros((N_train_all, data_0.shape[1], data_0.shape[2], data_0.shape[3]), dtype = float)
train_label = np.zeros((N_train_all), dtype = float)

train_data[:N_train, :, :, :] = data_0[:N_train, :, :, :]
train_data[N_train:, :, :, :] = data_1[:N_train, :, :, :]

train_label[:N_train] = np.zeros((N_train), dtype = float)
train_label[N_train:] = np.ones((N_train), dtype = float)


# the test dataset
N_test = data_0.shape[0] - round(0.95 * data_0.shape[0])

N_test_all = 2 * N_test

test_data = np.zeros((N_test_all, data_0.shape[1], data_0.shape[2], data_0.shape[3]), dtype = float)
test_label = np.zeros((N_test_all), dtype = float)

test_data[:N_test, :, :, :] = data_0[N_train:, :, :, :]
test_data[N_test:, :, :, :] = data_1[N_train:, :, :, :]

test_label[:N_test] = np.zeros((N_test), dtype = float)
test_label[N_test:] = np.ones((N_test), dtype = float)

print(train_data.shape, train_label.shape)
print(test_data.shape, test_label.shape)

In [None]:
def setup_network() : 
    
    # setup network 
    activation = 'relu'   

    # don't use any regularization
    l2 = 0.000
    
    # setup cnn network
    
    input1 = Input(shape=(data_0.shape[1], data_0.shape[2], data_0.shape[3]))  
    
    # convolution layers
    conv1_1 = Conv2D(512, kernel_size=(2,2), padding='valid', activation= 'relu', kernel_regularizer=regularizers.l2(l2), name='conv1_1')(input1)
    
    conv2_1 = Conv2D(256, kernel_size=(1,1), padding='valid', activation= 'relu', kernel_regularizer=regularizers.l2(l2), name='conv2_1')(conv1_1)
    
    conv3_1 = Conv2D(128, kernel_size=(1,1), padding='valid', activation= 'relu', kernel_regularizer=regularizers.l2(l2), name='conv3_1')(conv2_1)
    
    conv4_1 = Conv2D(64, kernel_size=(1,1), padding='valid', activation= 'relu', kernel_regularizer=regularizers.l2(l2), name='conv4_1')(conv3_1)
     
    conv5_1 = Conv2D(32, kernel_size=(1,1), padding='valid', activation= 'relu', kernel_regularizer=regularizers.l2(l2), name='conv5_1')(conv4_1)
    
    conv_all_1 = Conv2D(1, kernel_size=(1,1), padding='valid', activation='linear', kernel_regularizer=regularizers.l2(l2), name='conv2_all_1')(conv5_1)
    
    flat = Flatten()(conv_all_1)
    
    # sum layer
    dense1 = Lambda( lambda x: tf.reshape(K.sum(x, axis = 1), (-1, 1)) , name='output1')(flat)

    model = Model(inputs=input1, outputs=dense1)

    return model


network = setup_network()

network.summary()

In [None]:
# functions for doing 3d rotations: artificially expend the datasets during the training
def matrix_rot(phi, axis):

    c = math.cos(phi)
    s = math.sin(phi)


    R1 = np.array([[ c, -s, 0], [s,  c, 0], [0,   0,  1]])
    R2 = np.array([[ c, 0,  s], [0,  1,  0], [-s, 0,  c]])
    R3 = np.array([[ 1, 0,   0], [0, c, -s], [0, s,  c]])

    if axis == 0:
        return R1
    
    if axis == 1:
        return R2
    
    if axis == 2:
        return R3

def rotate_random(states):
    
    # rotate over random angle around x-axis (in (-pi, pi))
    phi1  = (2 * math.pi * random.random() - math.pi)
    matrix1 = matrix_rot(phi1, 0)
    
    # rotate over random angle around y-axis (in (-pi, pi))
    phi2  = (2 * math.pi * random.random() - math.pi)
    matrix2 = matrix_rot(phi2, 1)
    
    
    # rotate over random angle around z-axis (in (-pi, pi))
    phi3  = (2 * math.pi * random.random() - math.pi)
    matrix3 = matrix_rot(phi3, 2)
    
    states_final = np.zeros((states.shape[0], states.shape[1], states.shape[2], states.shape[3]), dtype = 'float')
    
    for n in range(states.shape[0]):
        for i_x in range(states.shape[1]):
            for i_y in range(states.shape[2]):
                states_new = np.matmul(matrix1, states[n, i_x, i_y, :])
                states_new = np.matmul(matrix2, states_new)
                states_final[n, i_x, i_y, :]  = np.matmul(matrix3, states_new)
            
    return states_final
    

In [None]:
# specify details for the training: learning rate 0.0001
my_adam = keras.optimizers.Adam(lr=0.0001)
network.compile(loss='mae', optimizer=my_adam, metrics=['accuracy'])

# train for 200 epoch 
for i in range(200):
    print('Epoch: ', i)
    
    # rotate the states to artificially expend the training dataset - avoid overfitting (optional)
    #train_data_rotated = rotate_random(train_data)
                                       
    network.fit(train_data, train_label, validation_data=(test_data, test_label), batch_size=512, epochs=1, shuffle=True)

    
# specify details for the training: learning rate 0.00001
my_adam = keras.optimizers.Adam(lr=0.00001)
network.compile(loss='mae', optimizer=my_adam, metrics=['accuracy'])

# train for 200 epoch 
for i in range(200):
    print('Epoch: ', i)
    
    # rotate the states to artificially expend the training dataset - avoid overfitting (optional)
    #train_data_rotated = rotate_random(train_data)
                                       
    network.fit(train_data, train_label, validation_data=(test_data, test_label), batch_size=512, epochs=1, shuffle=True)
        

In [None]:
# Save the network

filepath = 'networks\\A_2d.h5'

network.save(filepath)

In [None]:
from tensorflow.python.keras.models import load_model


filepath = 'networks\\A_2d.h5'

network = load_model(filepath)

In [None]:
chern_num = network.predict(train_data)  
plt.hist(chern_num, 100)

In [None]:
chern_num = network.predict(test_data)  
plt.hist(chern_num, 100)

In [None]:
# create a dataset of random states (preprocessed in the same way as the training and test datasets)
def create_A_random(N_samples, N_k):
    
    data = np.zeros((N_samples, N_k + 1, N_k + 1, 3), dtype = float)
    
    for n in range(N_samples):

        for i_x in range(N_k) : 
            for i_y in range(N_k) : 
            
                
                h_x = random.random() - 0.5
                h_y = random.random() - 0.5
                h_z = random.random() - 0.5

                E = (h_x**2 + h_y**2 + h_z**2)**0.5
        
                data[n, i_x, i_y, 0] = h_x/E 
                data[n, i_x, i_y, 1] = h_y/E 
                data[n, i_x, i_y, 2] = h_z/E 
            
        #periodic boundary conditions
        data[n, N_k, 0, :] = data[n, 0, 0, :]
        data[n, 0, N_k, :] = data[n, 0, 0, :]
        data[n, N_k, N_k, :] = data[n, 0, 0, :]
        
        data[n, N_k, :, :] = data[n, 0, :, :]
        data[n, :, N_k, :] = data[n, :, 0, :]
        
    return data 

# interpolate from (N_k + 1, N_k + 1)  to (2*N_k + 1, 2*N_k + 1) k-points
def expend(data):
    
    N_k = np.shape(data)[1]  - 1   
    
    data_new = np.zeros((data.shape[0], 2*N_k + 1, 2*N_k + 1, 3), dtype = float)
    
    for n in range(data.shape[0]):
        for i_x in range(N_k + 1):    
            for i_y in range(N_k + 1): 
                data_new[n, 2*i_x, 2*i_y, :] = data[n, i_x, i_y, :]
    
        for i_x in range(N_k):
            for i_y in range(N_k + 1): 
                data_new[n, 2*i_x + 1, 2*i_y, :] = interpolate(data_new[n, 2*i_x, 2*i_y, :], data_new[n, 2*i_x + 2, 2*i_y, :])
       
        for i_x in range(N_k + 1):
            for i_y in range(N_k): 
                data_new[n, 2*i_x, 2*i_y + 1, :] = interpolate(data_new[n, 2*i_x, 2*i_y, :], data_new[n, 2*i_x, 2*i_y + 2, :])       
            
      
        for i_x in range(N_k):
            for i_y in range(N_k): 
                data_new[n, 2*i_x + 1, 2*i_y + 1, :] = interpolate(data_new[n, 2*i_x, 2*i_y, :], data_new[n, 2*i_x + 2, 2*i_y + 2, :])        
            
    return data_new 


# interpolation between two vectors      
def interpolate(H1, H2):
    
    H_int = (H1 + H2)/np.linalg.norm(H1 + H2)
        
    return H_int
 


N_samples = 1000
N_k = 10
data_random = expend(create_A_random(N_samples, N_k))

In [None]:
# evaluate on a dataset of random states (observe quantization of the output)
winding_num = network.predict(data_random)  
plt.hist(winding_num, 100)