In [None]:
# python -m visdom.server (터미널에 실행)

In [None]:
# %reset -f

In [None]:
############ HYPER PARAMETER SETTING ############

# ML_model='CNN'
# ML_model='NN'
ML_model='Linear'

batch_size = 200
num_train_images = batch_size*165
learning_rate = 0.001
how_many_epochs = 25

In [None]:
########################################################################

In [None]:
import pandas as pd
import numpy as np
import sys
import time
import math
import random
 
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib import colors
from matplotlib.ticker import PercentFormatter

In [None]:
# np.set_printoptions(threshold=sys.maxsize)

In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn.functional as F

In [None]:
import visdom

vis = visdom.Visdom()
vis.close(env="main")

In [None]:
def loss_tracker(loss_plot, loss_value, num):
    '''num, loss_value, are Tensor'''
    vis.line(X=num,
             Y=loss_value,
             win = loss_plot,
             update='append'
             )

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device =='cuda':
    torch.cuda.manual_seed_all(777)

In [None]:
df = pd.read_excel('paths_data.xlsx', sheet_name='Sheet1')
df_np=df.to_numpy()

In [None]:
how_many_images=df_np.shape[0]
num_test_images=how_many_images-num_train_images
last_column_index=df_np.shape[1]-1

number_of_points=last_column_index-1

Label_total=df_np[:,last_column_index]

paths=df_np[:,:-2].astype(np.int32)

In [None]:
paths

In [None]:
sorted_first_path=np.sort(paths[0])

In [None]:
hell_dict={}

for i in range(len(sorted_first_path)):
    hell_dict[sorted_first_path[i]] = i

In [None]:
tmp_paths=paths.reshape(-1)
new_paths=np.empty(tmp_paths.shape)

for i in range(len(tmp_paths)):
    new_paths[i]=hell_dict[tmp_paths[i]]
    
paths=new_paths.astype(np.int32).reshape(paths.shape)

In [None]:
MATRIX_3d_total=np.zeros((how_many_images,number_of_points,number_of_points))

for i in range(0,how_many_images):
    ithrow=paths[i]
    
    for j in range(0,last_column_index-1):
        MATRIX_3d_total[i,j,ithrow[j]]=1

MATRIX_3d_total_reshaped=MATRIX_3d_total.reshape(how_many_images,1,number_of_points,number_of_points)
# Torch에서는 (batch size, channel, height, width)

In [None]:
# We now have MATRIX_3d_total and corresponding label

In [None]:
total_indices=np.arange(how_many_images)
np.random.shuffle(total_indices)
train_indices=total_indices[0:num_train_images]
test_indices=total_indices[num_train_images:how_many_images]

MATRIX_3d_TRAINING=torch.from_numpy(MATRIX_3d_total_reshaped[train_indices,:,:,:])
MATRIX_3d_TESTING=torch.from_numpy(MATRIX_3d_total_reshaped[test_indices,:,:,:])
label_TRAINING=torch.from_numpy(Label_total[train_indices])
label_TESTING=torch.from_numpy(Label_total[test_indices])

In [None]:
# # Debugging
# np.set_printoptions(threshold=sys.maxsize)
# label_TESTING[32]
# MATRIX_3d_TESTING[32,:,:,0]
# paths[test_indices[32]]

In [None]:
print(MATRIX_3d_TRAINING.shape)
print(MATRIX_3d_TESTING.shape)
print(label_TRAINING.shape)
print(label_TESTING.shape)

In [None]:
########################## PREPROCESSING DONE ##########################

In [None]:
class CNN(nn.Module):  # Torch에서는 (batch size, channel, height, width)
    
    def __init__(self):
        super(CNN, self).__init__()

        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,32,kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32,64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.fc1 = nn.Linear(math.floor(number_of_points/8)*math.floor(number_of_points/8)*128, 800)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(800, 2, bias =True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [None]:
class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.fc1 = torch.nn.Linear(number_of_points*number_of_points,800,bias=True)
        self.relu = nn.ReLU()
        self.fc2 = torch.nn.Linear(800,2,bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self,x):
        out = x.view(x.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [None]:
class Linear(nn.Module):
    def __init__(self):
        super(Linear, self).__init__()
        self.fc1 = torch.nn.Linear(number_of_points*number_of_points,2,bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        
    def forward(self,x):
        out = x.view(x.size(0), -1)
        out = self.fc1(out)
        return out

In [None]:
if ML_model=='CNN':
    model = CNN().to(device)
elif ML_model=='NN':
    model = NN().to(device)
elif ML_model=='Linear':
    model = Linear().to(device)
else:
    raise NameError('Select proper ML model!!')

In [None]:
value = (torch.Tensor(1,1,number_of_points,number_of_points)).to(device)
print( (model(value)).shape )

In [None]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
loss_plt = vis.line(Y=torch.Tensor(1).zero_(),opts=dict(title='loss_tracker', legend=['loss'], showlegend=True))

In [None]:
# training
 
remainder=np.arange(num_train_images)
total_batch = int(num_train_images/batch_size)
epoch=0
 
print('Learning Started!')
 
training_start_time=time.time()
 
while 1:
    avg_cost = 0
    
    np.random.shuffle(remainder)
    
    for i in range(total_batch):
        current_batch_indices=remainder[i*batch_size:(i+1)*batch_size]
        
        batch_xs = (MATRIX_3d_TRAINING[current_batch_indices,:,:,:]).to(device=device, dtype=torch.float)
        batch_ys = (label_TRAINING[current_batch_indices]).to(device=device, dtype=torch.int64)
        
        optimizer.zero_grad()
        hypothesis = model(batch_xs)
        
        cost = criterion(hypothesis, batch_ys)
        cost.backward()
        optimizer.step()        
        
        avg_cost += cost / total_batch
        
    print('[Epoch:{}] cost = {}'.format(epoch+1, avg_cost))
    loss_tracker(loss_plt, torch.Tensor([avg_cost]), torch.Tensor([epoch]))
        
    epoch=epoch+1
    
    if epoch == how_many_epochs:
        break
 
training_end_time=time.time()
print('Learning Finished!')
print('Elapsed time: {}(s)'.format(training_end_time-training_start_time))

In [None]:
########################## POSTPROCESSING ##########################

In [None]:
prediction_start_time=time.time()
 
all_the_matrices=torch.from_numpy(MATRIX_3d_total_reshaped).to(device=device, dtype=torch.float) 
 
with torch.no_grad():
    prediction = model(all_the_matrices)
    PROBS_GOOD_tensor=F.softmax(prediction, dim=1)[:,1]
    
PROBS_GOOD=PROBS_GOOD_tensor.numpy()
predicted_ranking=np.argsort(-PROBS_GOOD, kind='mergesort').astype(np.int32) # from the best one to the worst one
 
prediction_end_time=time.time()
 
print(prediction_end_time-prediction_start_time)

In [None]:
df_ranking = pd.DataFrame(predicted_ranking)
filepath = 'RANKING_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.xlsx'

df_ranking.to_excel(filepath, index=False)

In [None]:
########################## DRAWING ##########################

In [None]:
excel_to_read = 'RANKING_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.xlsx'


df_ranking=pd.read_excel(excel_to_read, sheet_name='Sheet1')  # for robustness
actual_rank=np.arange(df_np.shape[0]).astype(np.int32)

big_bro=df_ranking.T.values[0] # predicted ranking (same as 'predicted_ranking' just above)

In [None]:
t0=time.time()

big_bro_list=list(big_bro)

t1=time.time()

predicted_rank_4_train_indices=[big_bro_list.index(k) for k in train_indices]
predicted_rank_4_test_indices=[big_bro_list.index(k) for k in test_indices]

t2=time.time()

print(t1-t0)
print(t2-t1)

In [None]:
# All data

figure(num=None, figsize=(8, 6), dpi=200, facecolor='w', edgecolor='k')

if len(train_indices) < len(test_indices):
    plt.scatter(predicted_rank_4_test_indices, test_indices, c='blue', s=0.4, label='Test data (blue)')
    plt.scatter(predicted_rank_4_train_indices, train_indices, c='gray', s=0.4, label='Training data (gray)')

else:
    plt.scatter(predicted_rank_4_train_indices, train_indices, c='gray', s=0.4, label='Training data (gray)')
    plt.scatter(predicted_rank_4_test_indices, test_indices, c='blue', s=0.4, label='Test data (blue)')


plt.plot(actual_rank,actual_rank,color='red', linewidth=3, label='Reference line')

nice_title = ML_model + ' Model ' + '(' + str(num_train_images) + ' training data)'

plt.title(nice_title)
plt.xlabel('Predicted rank')
plt.ylabel('Actual rank')
plt.grid()
plt.legend()
plt.xlim(0,df_np.shape[0])
plt.ylim(0,df_np.shape[0])

name = 'RANKING_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.eps'

plt.savefig(name, dpi=1000)

In [None]:
# Top 100

figure(num=None, figsize=(8, 6), dpi=200, facecolor='w', edgecolor='k')

plt.scatter(predicted_rank_4_train_indices, train_indices, c='gray', s=18, label='Training data (gray)')
plt.scatter(predicted_rank_4_test_indices, test_indices, c='blue', s=18, label='Test data (blue)')

plt.plot(actual_rank[0:100],actual_rank[0:100],color='red', linewidth=1, label='Reference line')

nice_title2 = ML_model + ' Model ' + '(' + str(num_train_images) + ' training data, Closeup)'
plt.title(nice_title2)
plt.xlabel('Predicted rank')
plt.ylabel('Actual rank')
plt.grid()
plt.xlim(0,99)
plt.ylim(0,99)
plt.legend()

name = 'RANKING_TOP100_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.eps'

plt.savefig(name, dpi=1000)

In [None]:
config=ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs)

training_time=str(training_end_time-training_start_time)
prediction_time=str(prediction_end_time-prediction_start_time)


with open("time_elapse.txt","a") as f:
    f.write("\n----------\n")
    f.write(config)
    f.write('\n')
    f.write('Training time: ')
    f.write('{}(s)'.format(training_time))
    f.write('\n')
    f.write('Prediction time: ')
    f.write('{}(s)'.format(prediction_time))
    f.write('\n')
    
    

In [None]:
if len(train_indices) >= len(test_indices):
    file_name='Test_indices_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.xlsx'

    datt=np.array(predicted_rank_4_test_indices).astype(np.int32)
    
else:
    file_name='Train_indices_' + ML_model + '_' + str(num_train_images) + '_' + str(how_many_epochs) + '.xlsx'


    datt=np.array(predicted_rank_4_train_indices).astype(np.int32)
    

In [None]:
df_ind = pd.DataFrame(datt)
df_ind.to_excel(file_name, index=False)