In [None]:
import pandas as pd
from numpy import mean
from numpy import std
import numpy as np
import timeit
from matplotlib import pyplot
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from keras.layers import Input, Dense, Flatten, Dropout
from keras.models import Model
import matplotlib.pyplot as plt
import json
SIZE = 27
from tensorflow.keras.callbacks import EarlyStopping
PADDING = 46

def load_data(data_train, data_val, data_test):
    train = pd.read_csv(data_train)
    val = pd.read_csv(data_val)
    test = pd.read_csv(data_test)
    return train, val, test

def preprocess_data(data, max_values_dict):
    x = data.drop(['file_name', 'label', 'category_name', 'category_encoding'], axis=1)
    y = data['category_encoding']

    for feature in x.columns:
        max_value = max_values_dict.get(feature, 1)
        if max_value == 0:
            x[feature] = 0
        else:
            x[feature] = x[feature] / max_value

    return x, y

data = []

def prepare_data(train, val, test, max_values_dict):

    x_train, y_train = preprocess_data(train, max_values_dict)
    x_val, y_val = preprocess_data(val, max_values_dict)
    x_test, y_test = preprocess_data(test, max_values_dict)


    y_train = np.array(y_train)  # Chuyển đổi y_train thành một mảng numpy
    y_train = np.where(y_train == '[0]', '0', y_train)  # Xử lý chuỗi '[0]' thành '0'

    SIZE = 27
    PADDING = 46

    x_train_deep = np.concatenate((x_train[:], np.zeros((x_train[:].shape[0], PADDING))), 1)
    x_val_deep = np.concatenate((x_val[:], np.zeros((x_val[:].shape[0], PADDING))), 1)
    x_test_deep = np.concatenate((x_test[:], np.zeros((x_test[:].shape[0], PADDING))), 1)

    x_train_deep = x_train_deep.reshape(x_train_deep.shape[0], SIZE, SIZE, 1)
    x_val_deep = x_val_deep.reshape(x_val_deep.shape[0], SIZE, SIZE, 1)
    x_test_deep = x_test_deep.reshape(x_test_deep.shape[0], SIZE, SIZE, 1)

    y_train_deep = to_categorical(y_train, 30)
    y_val_deep = to_categorical(y_val, 30)

    return x_train_deep, y_train_deep, x_val_deep, y_val_deep, x_test_deep, y_test

def build_CNN_model(SIZE):
    deep_input = Input(shape=(SIZE, SIZE, 1))
    conv1 = Conv2D(32, kernel_size=2, activation='relu', padding="same", input_shape=(SIZE, SIZE, 1))(deep_input)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(32, kernel_size=2, activation='relu', padding="same")(pool1)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(64, kernel_size=2, activation='relu', padding="same")(pool2)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    flatten = Flatten()(pool3)
    deep = Dense(1024, activation='relu')(flatten)
    deep = Dropout(0.3)(deep)
    deep = Dense(512, activation='relu')(deep)
    deep = Dropout(0.3)(deep)
    output = Dense(30, activation='softmax')(deep)
    model_CNN_PE = Model(inputs=deep_input, outputs=output)
    model_CNN_PE.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Thêm callback "early stopping"
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

    return model_CNN_PE

def train_model(model, x_train, y_train, x_val, y_val):
    history = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_val, y_val))
    return history

def save_model(model, file_path):
    model.save(file_path)

def plot_training_history(history):
    # Plot accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
    # Plot loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = y_test.values
    print("Accuracy Score:", accuracy_score(y_true, y_pred_classes))
    print("Precision Score:", precision_score(y_true, y_pred_classes, average='weighted'))
    print("Recall Score:", recall_score(y_true, y_pred_classes, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred_classes, average='weighted'))

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred_classes))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred_classes))


# Chạy chương trình
data_train = r"/content/drive/MyDrive/Data/New_data_26/data_new_combined_26.csv"
data_val = r"/content/drive/MyDrive/Data/New_data_26/split9.csv"
data_test = r"/content/drive/MyDrive/Data/New_data_26/split10.csv"
train, val, test = load_data(data_train, data_val, data_test)

max_values_df = pd.read_csv("/content/drive/MyDrive/Data/New_data_26/max_data_new.csv")
max_values_dict = max_values_df.set_index('Feature')['Max Value'].to_dict()

x_train_deep, y_train_deep, x_val_deep, y_val_deep, x_test_deep, y_test = prepare_data(train, val, test,
                                                                                       max_values_dict)

SIZE = 27
# model_CNN_PE = build_CNN_model(SIZE)

# history = train_model(model_CNN_PE, x_train_deep, y_train_deep, x_val_deep, y_val_deep)

# history_dict = history.history

# # Lưu `history` vào tệp JSON
# with open('history.json', 'w') as f:
#     json.dump(history_dict, f)

# save_model(model_CNN_PE, '/content/model_12_07_26_lables.h5')

# plot_training_history(history)

# evaluate_model(model_CNN_PE, x_test_deep, y_test)

  y_train = np.where(y_train == '[0]', '0', y_train)  # Xử lý chuỗi '[0]' thành '0'


In [None]:
from keras.models import load_model

model = load_model("/content/DL_Category.h5")


data_train = r"/content/drive/MyDrive/Data/New_data_last_last/data_raw.csv"
data_val = r"/content/drive/MyDrive/Data/New_data_last_last/aasplit_9.csv"
data_test = r"/content/drive/MyDrive/Data/New_data_last/aasplit_8.csv"

train, val, test = load_data(data_train, data_val, data_test)

x_train_deep, y_train_deep, x_val_deep, y_val_deep, x_test_deep, y_test = prepare_data(train, val, test, exportFunction,
                                                                                       max_values_dict)

model.fit(x_train_deep, y_train_deep, epochs=50, batch_size=32, validation_data=(x_val_deep, y_val_deep))

model.save('test.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
from keras.models import load_model
model = load_model("/content/model_12_07_26_lables.h5")
y_pred = model.predict(x_train_deep)

y_pred_classes = np.argmax(y_pred, axis=1)
y_true = y_train_deep
print("Accuracy Score:", accuracy_score(y_true, y_pred_classes))
print("Precision Score:", precision_score(y_true, y_pred_classes, average='weighted'))
print("Recall Score:", recall_score(y_true, y_pred_classes, average='weighted'))
print("F1 Score:", f1_score(y_true, y_pred_classes, average='weighted'))

print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))



ValueError: ignored

In [None]:
#y_pred_classes
y_pred

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 27, 27, 1)]       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 27, 27, 32)        160       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 13, 13, 32)        4128      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 6, 6, 32)         0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 6, 6, 64)          8256

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

In [None]:
def plot_training_history(history):
    # Plot accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
    plt.savefig("Accurancyv410.png")
    # Plot loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
    plt.savefig("Lossv410.png")
plot_training_history(history)

In [None]:
def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = y_test.values
    print("Accuracy Score:", accuracy_score(y_true, y_pred_classes))
    print("Precision Score:", precision_score(y_true, y_pred_classes, average='weighted'))
    print("Recall Score:", recall_score(y_true, y_pred_classes, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred_classes, average='weighted'))

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred_classes))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred_classes))
    print(y_pred_classes)
evaluate_model(model_CNN_PE, x_test_deep, y_test)

In [None]:
y_pred = model_CNN_PE.predict(x_test_deep)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = y_test.values
print("Accuracy Score:", accuracy_score(y_true, y_pred_classes))
print("Precision Score:", precision_score(y_true, y_pred_classes, average='weighted'))
print("Recall Score:", recall_score(y_true, y_pred_classes, average='weighted'))
print("F1 Score:", f1_score(y_true, y_pred_classes, average='weighted'))

print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))
print(y_pred_classes)

In [None]:
val.iloc[:, 3].value_counts()

In [None]:
train

In [None]:
import pandas as pd
import numpy as np
from keras.models import load_model
PADDING = 46  # Thay bằng giá trị PADDING thích hợp
SIZE = 27  # Thay bằng giá trị SIZE thích hợp

def load_test_dataset(filepath):
    test_new = pd.read_csv(filepath)
    x_test_new = test_new.drop(['file_name', 'label', 'category_name', 'category_encoding']  + exportFunction, axis=1)
    y_test_new = test_new['category_encoding']
    return x_test_new, y_test_new

def standardize_data(x_test_new, max_values_dict):
    for feature in x_test_new.columns:
        max_value = max_values_dict.get(feature, 1)
        if max_value == 0:
            x_test_new[feature] = 0
        else:
            x_test_new[feature] = x_test_new[feature] / max_value
    return x_test_new

def preprocess_data(x_test_new, y_test_new):
    x_test_new_deep = np.concatenate((x_test_new[:], np.zeros((x_test_new[:].shape[0], PADDING))), 1)
    x_test_new_deep = x_test_new_deep.reshape(x_test_new_deep.shape[0], SIZE, SIZE, 1)
    return x_test_new_deep, y_test_new

def predict_accuracy(model_path, x_test_new_deep, y_test_new):
    model = load_model(model_path)
    y_pred = model.predict(x_test_new_deep)
    y_pred = np.argmax(y_pred, axis=-1)
    accuracy = np.sum(y_pred == y_test_new) / len(y_pred)
    print("kết quả nhận dạng: ", accuracy)
    return accuracy

def preprocess_and_predict(filepath, max_values_dict, model):
    x_test_new, y_test_new = load_test_dataset(filepath)
    x_test_new = standardize_data(x_test_new, max_values_dict)
    x_test_new_deep, y_test_new = preprocess_data(x_test_new, y_test_new)
    accuracy = predict_accuracy(model, x_test_new_deep, y_test_new)
    return accuracy

# Sử dụng hàm preprocess_and_predict
filepath = r"/content/drive/MyDrive/Data/New_data_last/aasplit_10.csv"
max_values_df = pd.read_csv("/content/drive/MyDrive/Data/New_data_last/max_data.csv")
max_values_dict = max_values_df.set_index('Feature')['Max Value'].to_dict()
model_path = '/content/trained_model_CNN_classification_ver410.h5'  # Thay bằng mô hình CNN thích hợp

accuracy = preprocess_and_predict(filepath, max_values_dict, model_path)


In [None]:
test_new = pd.read_csv(r"/content/drive/MyDrive/Data/New_data_last/aasplit_10.csv")
x_test_new = test_new.drop(['file_name', 'label', 'category_name', 'category_encoding']+ exportFunction, axis=1)
y_test_new = test_new['category_encoding']

print(x_test_new.shape)
# Tiêu chuẩn hóa dữ liệu bằng Standard Scaling
for feature in x_test_new.columns:
  max_value = max_values_dict.get(feature, 1)  # Lấy giá trị max từ dict, nếu không có thì mặc định là 1
  if max_value == 0:
    x_test_new[feature] = 0
  else:
    x_test_new[feature] = x_test_new[feature] / max_value
#print(x_val)

x_test_new_deep = np.concatenate((x_test_new[:], np.zeros((x_test_new[:].shape[0], PADDING))),1)


x_test_new_deep = x_test_new_deep.reshape(x_test_new_deep.shape[0],SIZE, SIZE, 1)


print(x_test_new_deep)
print(y_test_new.shape)
#predict
y_pred = model_CNN_PE.predict(x_test_new_deep)
y_pred = np.argmax(y_pred, axis=-1)
print("kết quả nhận dạng: ",np.sum(y_pred == y_test_new) / len(y_pred))