### import

In [203]:
import torch
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import sys
import time

from torch import nn

### control line

In [204]:

random_seed = 42


device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"

# model_select_signal = 'resnet18'
model_select_signal = 'vgg16'
# model_select_signal = 'densenet121'
# model_select_signal = 'feature vec'


data_select_signal = 'skin'
# data_select_signal = 'chest CT'
# data_select_signal = 'ocularDisease'

# select image type is RGB or not
isRGB = True
# isRGB = False

# enable img feature vector as mutimodal
enable_muti_modal_signal = True
# enable_muti_modal_signal = False

# select the output from which hidden layer (from the end of CNN model)
hidden_layer_selector = 34


In [205]:

if data_select_signal == 'skin':
    model_folder_path = "../model/HAM10000"
elif data_select_signal == 'chest CT':
    model_folder_path = "../model/CT chest"
elif data_select_signal == 'ocularDisease':
    model_folder_path = "../model/ocularDisease"


xgb_model_folder = model_folder_path + "/XGB/"
xgb_output_model_name = data_select_signal + "_" + model_select_signal + "_"  + "layer" + str(hidden_layer_selector)
if enable_muti_modal_signal:
    xgb_model_path =  xgb_model_folder + xgb_output_model_name + "_FPGA_MM" +".json"

else:
    xgb_model_path =  xgb_model_folder + xgb_output_model_name + "_FPGA" +".json"

vhd_file_name = xgb_output_model_name + ".vhd"


In [206]:
vhd_file_name

'skin_vgg16_layer34.vhd'

### data precessing

set transform

In [207]:
transform_std = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform = transform_std

load dataset

In [208]:
import os
sys.path.append(os.path.abspath('..'))


from data.HAM10000.ham10000Dataloader import HAM10000DataProcessor
from data.chestCTData.chestCTDataloader import ChestCTDataProcessor
from data.ocularDisease.ocularDataloader import OcularDiseaseDataProcessor

if data_select_signal == 'skin':
    dataContainer = HAM10000DataProcessor(transform=transform_std)
elif data_select_signal == 'chest CT':
    dataContainer = ChestCTDataProcessor(transform=transform_std)
elif data_select_signal == 'ocularDisease':
    dataContainer = OcularDiseaseDataProcessor(transform=transform_std)
else:
    raise ValueError('需要指定dataset類別')

train_dataloader , test_dataloader = dataContainer.getDataloaders()
train_files , test_files = dataContainer.getDatasetFilenames()
# feature_vector_file_path = dataContainer.getFeatureVectorFilename()

num_classes = dataContainer.getNumClasses()


### load CNN and create feature data

load CNN model

In [209]:
from torchinfo import summary



#  ===================================
# 加載訓練好的ResNet模型
resnet18 = models.resnet18(pretrained=True)
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, num_classes)
resnet18.load_state_dict(torch.load(model_folder_path + "/best_model_pretrain_Resnet18.pth"))

#  ===================================
# 載入訓練好的vgg
vgg16 = models.vgg16(pretrained=True)
classifier = list(vgg16.classifier.children())[:-1]

# 移除原始模型的最后一个全连接层
# 并添加一个新的全连接层，输出特征数为 輸出的種類數
classifier.append(torch.nn.Linear(4096, num_classes))

# 替换原始模型的分类器
vgg16.classifier = torch.nn.Sequential(*classifier)

vgg16.load_state_dict(torch.load(model_folder_path + "/best_model_pretrain_VGG16.pth"))

# 使用nn.Sequential的方式取代torch.flatten的功能
new_classfier = nn.Sequential(
    nn.Flatten(),
    vgg16.classifier,
)

vgg16.classifier = new_classfier


#  ===================================
# 載入訓練好的densenet

# 加载预训练的 DenseNet121
# densenet121 = models.densenet121(pretrained=True)

# # Optimizer
# optimizer = torch.optim.SGD(densenet121.parameters(), lr = 0.001) # 選擇你想用的 optimizer
# # optimizer = torch.optim.Adam(model_densenet121.parameters(), lr =0.01)

# # Loss function
# loss_fn = nn.CrossEntropyLoss()                

# # 更換classifier的輸出
# densenet121.classifier = nn.Linear(densenet121.classifier.in_features, num_classes)


# densenet121.load_state_dict(torch.load(model_folder_path + "/best_model_pretrain_VGG16.pth"))


#  ========================================
# 設定空的model給feaure訓練用



class EmptyModel(nn.Module):
    def __init__(self):
        super(EmptyModel, self).__init__()

    def forward(self, x):
        # 返回一个空的张量
        return torch.empty((x.size(0), 0), dtype=torch.float32)




select which model would be used

In [210]:
# 決定使用的模型
model_0 = None

if model_select_signal == 'resnet18':
    model_0 = resnet18
elif model_select_signal == 'vgg16':
    model_0 = vgg16
elif model_select_signal == 'feature vec':
    model_0 = nn.Sequential(EmptyModel())
elif model_select_signal == 'densenet121':
    model_0 = densenet121


model_0 = model_0.to(device)

summary(model_0, input_size=[1,3,224,224])

# get var name# get var name
# model_0_name = [name for name, val in globals().items() if val == model_0][0]

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 7]                    --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

select hidden layer of CNN modal as the final output

In [211]:
from helperFunction.helperFunctions import createDetailLayerVersions 
# detail version

list_of_models = createDetailLayerVersions(model_0)


cnn_model = list_of_models[len(list_of_models) - hidden_layer_selector][0]

總層數為: 44層


In [212]:
print(cnn_model)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

cal muti modal vector (img feature & cnn feature)

In [213]:
from helperFunction.helperFunctions import dataloaderToFeatureData , calImgFeatureVector
train_img_feature_vector = []
test_img_feature_vector = []

# def print_list_dimensions(lst):
#     dimensions = []
#     while isinstance(lst, list):
#         dimensions.append(len(lst))
#         lst = lst[0] if len(lst) > 0 else []
#     print("Dimensions:", " x ".join(map(str, dimensions)))

#     return


# if enable_muti_modal_signal:

#     for idx , (batch_data, label) in enumerate(train_dataloader):
#         for img in batch_data:
#             if idx == 1:
#                 print(batch_data)
#             pass
            # train_img_feature_vector.append(cnn_model(img) + calImgFeatureVector(img, isRGB=isRGB))

# else : 
#     for idx , (batch_data, label) in enumerate(train_dataloader):
#         for img in batch_data:
#             if idx == 1:
#                 print(batch_data)
#             pass
#             if idx == 10:
#                 break
            # train_img_feature_vector.append(cnn_model(img))
    
    # print_list_dimensions(train_img_feature_vector)

    # for idx , (batch_data, label) in enumerate(test_dataloader):
    #     for img in batch_data:
    #         test_img_feature_vector.append(calImgFeatureVector(img, isRGB=isRGB))

In [214]:
# @ unit test : model versions eval ability
summary(cnn_model, input_size=[1,3,224,224])

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1, 512, 7, 7]            --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

### Create feature vector

In [215]:
from helperFunction.helperFunctions import calImgFeatureVector
# data_iter = iter(train_dataloader)
data_iter = iter(test_dataloader)

img_feature_vector = []
img_label_vector = []



# print("First batch data shape:", first_batch_data.shape)
# print("First batch labels:", first_batch_labels)
for i in range(4):
    batch_data, batch_labels = next(data_iter)

    for i in range(batch_data.size(0)) :
        data = (batch_data[i].to(device))
        feature = (cnn_model(torch.unsqueeze(data , 0)))
        feature = torch.flatten(feature, start_dim=0, end_dim=-1)
        if enable_muti_modal_signal:
            img_feature_vector.append([*(feature.squeeze().tolist()) , *(calImgFeatureVector(data.to("cpu"), isRGB=True))])
        else:
            img_feature_vector.append(feature.squeeze().tolist() )
        
    for ele in batch_labels.tolist():
        img_label_vector.append(ele)

# if enable_muti_modal_signal:
#     pass
# else : 
#     for data in first_batch_data :
#         train_img_feature_vector.append(cnn_model(datao))





In [216]:
print(ele)

1


In [217]:
len(img_label_vector)
# img_label_vector[0]

128

In [218]:
feature.shape

torch.Size([25088])

In [219]:
print(len(img_feature_vector))

128


In [220]:
print(len(img_feature_vector[0]))

25129


### import xgb_tree_on_vhdl

In [221]:
print(img_feature_vector[0])
# print(fi)


[0.0, 0.6375657320022583, 0.47173184156417847, 0.6569995284080505, 0.0, 0.0, 1.0114192962646484, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7949013113975525, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8427569270133972, 1.0346118211746216, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9758139848709106, 1.2245975732803345, 0.0, 0.0, 0.06473009288311005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12909241020679474, 1.630709171295166, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.9580278396606445, 4.289695739746094, 0.7786515355110168, 2.8433899879455566, 3.2182509899139404, 0.7808862924575806, 0.7383480072021484, 1.0721583366394043, 0.0, 0.0, 0.0, 0.445088654756546, 0.0, 0.44646155834198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9547855854034424, 0.18279753625392914, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4635648727416

In [222]:
# @ unit test : test xgb model performance
import xgboost  as xgb

import json
# 读取JSON文件
with open(xgb_model_path, 'r') as f:
    model_json = json.load(f)

feature_name = model_json['learner']["feature_names"]
model_xgb_2 = xgb.Booster()
model_xgb_2.load_model(xgb_model_path)

X_test_ori = pd.DataFrame(img_feature_vector, columns=[f'f{i}' for i in range(len(img_feature_vector[0]))])

# feature_name = ['f1', 'f4', 'f0', 'f6', 'f5', 'f2', 'f3'] 

X_test = X_test_ori[feature_name]
dtest = xgb.DMatrix(X_test, label=img_label_vector)

y_pred = model_xgb_2.predict(dtest, output_margin=True)

print(y_pred)
print(img_label_vector)

data_list = X_test.values.tolist()


[[-2.0189404   3.6702979  -5.3376613  -5.365902   -2.2410352   0.9773396
  -5.8361344 ]
 [ 1.4674633   2.762826   -6.3489823  -5.6346745  -3.5727496  -4.6023326
  -6.1664505 ]
 [-0.16140279  4.8131094  -5.979347   -5.850036   -4.6832314  -3.7098424
  -6.1157665 ]
 [-3.406332    7.0984244  -6.610758   -6.324422   -5.303928   -5.72745
  -6.1664505 ]
 [-3.3824718  -2.979487   -2.6248467  -3.5121062   3.4502904   1.2571242
  -5.2586    ]
 [-4.967459    7.366323   -6.056444   -6.220767   -5.1456566  -3.8121982
  -5.9359965 ]
 [ 0.8708297   1.2077122  -2.6835425  -1.6673548  -2.0738878  -5.184292
  -5.7394376 ]
 [-3.395966    4.9782043  -5.9409266  -5.437029   -3.5890198  -4.483947
  -5.9359965 ]
 [-4.2053933   7.08639    -6.1122155  -6.187567   -4.921874   -5.350265
  -5.645543  ]
 [-0.5239506   2.8239207  -5.4085484  -5.7397475  -1.5064464  -5.6938477
  -6.1157665 ]
 [-0.13536203  5.308015   -5.10089    -4.5115027  -2.9231348  -5.259404
  -6.031693  ]
 [-0.8586777   7.33613    -6.076555   

In [223]:
X_test = X_test_ori.values.tolist() 
y_test = img_label_vector
vhdl_file_path =  "C:/Users/E/Desktop/decision tree to vhdl/FPGA_accelerator_for_GBDT/FPGA_VHDL_code_and_data/sim/skin_vgg16_layer34.vhd"

import sys
sys.path.append('./xgb_to_vhdl')

init xgb manager

In [224]:
from xgb_to_vhdl.xgb_manager import XGB_manager

xgb_manager = XGB_manager("../model/HAM10000/XGB/skin_vgg16_layer34_FPGA_MM.json")

xgb_manager.get_max_class_weight()

12.354073466000004

In [225]:
xgb_manager.set_q_foramt([5,10])  # set int part & fraction part 

xgb_manager.create_vhdl_labeling_code(vhdl_file_path, X_test, y_test)  

		Addr <=  "00000000000000";
		Trees_din <= x"01189f78";
		wait for Clk_period;
		Addr <=  "00000000000001";
		Trees_din <= x"fe131838";
		wait for Clk_period;
		Addr <=  "00000000000010";
		Trees_din <= x"5b1b5120";
		wait for Clk_period;
		Addr <=  "00000000000011";
		Trees_din <= x"b3130c10";
		wait for Clk_period;
		Addr <=  "00000000000100";
		Trees_din <= x"4d1b0d08";
		wait for Clk_period;
		Addr <=  "00000000000101";
		Trees_din <= x"26150104";
		wait for Clk_period;
		Addr <=  "00000000000110";
		Trees_din <= x"ff5401cd";
		wait for Clk_period;
		Addr <=  "00000000000111";
		Trees_din <= x"ffd601cd";
		wait for Clk_period;
		Addr <=  "00000000001000";
		Trees_din <= x"5200ce04";
		wait for Clk_period;
		Addr <=  "00000000001001";
		Trees_din <= x"010501cd";
		wait for Clk_period;
		Addr <=  "00000000001010";
		Trees_din <= x"ff7b01cd";
		wait for Clk_period;
		Addr <=  "00000000001011";
		Trees_din <= x"58067c08";
		wait for Clk_period;
		Addr <=  "00000000001100";
		Trees_din

In [226]:
# @ unittest : xgb_manager.eval()

# print(data_list[0])
# print(feature_name)
# print(xgb_manager.eval(feature=data_list[3]))

# xgb_manager_res = []
# xgb_manager_score = []
# for feature in data_list:
#     xgb_manager_res.append(np.argmax(xgb_manager.eval(feature)))
#     xgb_manager_score.append(xgb_manager.eval(feature))



In [227]:
# print(xgb_manager_res)
# print(img_label_vector)

# def count_common_elements(list1, list2):
#     # 先將兩個列表轉換為集合
#     set1 = set(list1)
#     set2 = set(list2)
    
#     # 找到兩個集合的交集
#     common_elements = set1.intersection(set2)
    
#     # 計算每個共同元素在兩個列表中的出現次數中的最小值
#     count = 0
#     for element in common_elements:
#         count += min(list1.count(element), list2.count(element))
    
#     return count

# res = count_common_elements(xgb_manager_res, img_label_vector)
# print(res)

In [228]:

# print(xgb_manager_score)

set q_format

In [229]:
# xgb_manager.set_q_foramt([5,10])

# vhdl_file_path = "./" + vhd_file_name

# # xgb_manager.gen(train_img_feature_vector, first_batch_labels.tolist())
# xgb_manager.create_vhdl_labeling_code(vhdl_file_path, X_test_ori.values.tolist(),img_label_vector) 