In [76]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
# ----------- Feed Forward Neural Network ---------- #
title = ['vendor name', 'Model Name', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP']
data = pd.read_csv('machine.data')

# Use the values in the first row as a new row of data
data.loc[-1] = data.columns
data.index = data.index + 1
data = data.sort_index()

# Set the column names using the provided 'title' list
data.columns = title
data.head()

# Print the data
vendors = data.iloc[:, 0].value_counts()
print("Number of vendor: ", len(vendors))
vendors.plot(kind='bar', xlabel='Vendor', ylabel='Count', title='Vendor Counts')
# Data Cleaning
duplicate_rows = data.duplicated()
print("Number of duplicate rows:", duplicate_rows.sum())
data.drop_duplicates(inplace=True)
print("Number of duplicate rows removed:", duplicate_rows.sum())

# Data Transformation
label_encoder = LabelEncoder()
data['Model Name'] = label_encoder.fit_transform(data['Model Name'])

# Data normalizing
scaler = MinMaxScaler()
columns_to_normalize = ["MYCT", "MMIN", "MMAX", "CACH", "CHMIN", "CHMAX", "PRP", "ERP"]
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])
data.head()
# Split traning
X = data.values[:,1:]
y = data.values[:,0]

# Split the data to traning and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=43)

# Xây dựng Mạng Nơ-ron Feed Forward (FFNN) với MLPClassifier
ffnn_model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=1000, activation='relu', solver='adam', random_state=42)

# Huấn luyện FFNN
ffnn_model.fit(X_train, y_train)

# Đánh giá mô hình
y_pred = ffnn_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Feed Forward Neural Network Accuracy: {accuracy}')

# In báo cáo phân loại
print("Feed Forward Neural Network Classification Report:\n", classification_report(y_test, y_pred))

# Vẽ biểu đồ (nếu cần)
vendors = data.iloc[:, 0].value_counts()
vendors.plot(kind='bar', xlabel='Vendor', ylabel='Count', title='Vendor Counts')
plt.show()

In [None]:
# ----------- Reccurent Neural Network ---------- #
# Load the data
title = ['vendor name', 'Model Name', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP']
data = pd.read_csv('machine.data')

# Use the values in the first row as a new row of data
data.loc[-1] = data.columns
data.index = data.index + 1
data = data.sort_index()

# Set the column names using the provided 'title' list
data.columns = title

# Data Cleaning
duplicate_rows = data.duplicated()
print("Number of duplicate rows:", duplicate_rows.sum())
data.drop_duplicates(inplace=True)
print("Number of duplicate rows removed:", duplicate_rows.sum())

# Data Transformation
label_encoder = LabelEncoder()
data['Model Name'] = label_encoder.fit_transform(data['Model Name'])

# Data normalizing
scaler = MinMaxScaler()
columns_to_normalize = ["MYCT", "MMIN", "MMAX", "CACH", "CHMIN", "CHMAX", "PRP", "ERP"]
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])

# Split training
X = data.values[:, 1:]
y = data.values[:, 0]

# One-hot encode the target variable
onehot_encoder = OneHotEncoder(sparse=False)
y_onehot = onehot_encoder.fit_transform(y.reshape(-1, 1))

# Split the data into training and testing
X_train, X_test, y_train_onehot, y_test_onehot = train_test_split(X, y_onehot, test_size=0.25, random_state=43)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train_onehot)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test_onehot)).batch(32)

# Build the RNN model
model = Sequential([
    LSTM(50, input_shape=(X_train.shape[1], 1)),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_dataset, epochs=10)

# Evaluate the model on the test set
accuracy = model.evaluate(test_dataset)[1]
print("Accuracy:", accuracy)

In [None]:
# ---- Overfiting của Feed Forward Neural Network  ---- #
# Áp dụng chính quy hóa (regularization) và early stopping vào FFNN
ffnn_model = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    max_iter=1000,
    activation='relu',
    solver='adam',
    random_state=42,
    alpha=0.0001,  # Tham số chính quy
    early_stopping=True,
    validation_fraction=0.1,  # Phần trăm dữ liệu validation
    n_iter_no_change=10  # Số lượng epoch không cải thiện trước khi dừng sớm
)

# Huấn luyện mô hình FFNN
ffnn_model.fit(X_train, y_train)

# Đánh giá mô hình
y_pred = ffnn_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Feed Forward Neural Network Accuracy: {accuracy}')

# In báo cáo phân loại
print("Feed Forward Neural Network Classification Report:\n", classification_report(y_test, y_pred))

# Vẽ biểu đồ (nếu cần)
vendors = data.iloc[:, 0].value_counts()
vendors.plot(kind='bar', xlabel='Vendor', ylabel='Count', title='Vendor Counts')
plt.show()

In [None]:
# ---- Overfiting của Reccurent Neural Network ---- #
# Build the RNN model with dropout and early stopping
model = Sequential([
    LSTM(50, input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),  # Dropout layer to prevent overfitting
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(train_dataset, epochs=50, validation_data=test_dataset, callbacks=[early_stopping])

# Evaluate the model on the test set
accuracy = model.evaluate(test_dataset)[1]
print("Accuracy:", accuracy)