# Import các thư viện cần thiết

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from prettytable import PrettyTable
import joblib

# Import data

In [2]:
# from google.colab import drive
# drive.mount('/content/gdrive')
# %cd '/content/gdrive/MyDrive/Project'

In [3]:
!pip install ucimlrepo
from ucimlrepo import fetch_ucirepo

# fetch dataset
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697)

# data (as pandas dataframes)
X = predict_students_dropout_and_academic_success.data.features
y = predict_students_dropout_and_academic_success.data.targets

# metadata
print(predict_students_dropout_and_academic_success.metadata)

# variable information
print(predict_students_dropout_and_academic_success.variables)


Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
{'uci_id': 697, 'name': "Predict Students' Dropout and Academic Success", 'repository_url': 'https://archive.ics.uci.edu/dataset/697/predict+students+dropout+and+academic+success', 'data_url': 'https://archive.ics.uci.edu/static/public/697/data.csv', 'abstract': "A dataset created from a higher education institution (acquired from several disjoint databases) related to students enrolled in different undergraduate degrees, such as agronomy, design, education, nursing, journalism, management, social service, and technologies.\nThe dataset includes information known at the time of student enrollment (academic path, demographics, and social-economic factors) and the students' academic performance at the end of the first and second semesters. \nThe data is used to build classification models to predict students' dropout and academic suc

In [4]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print("Shape of y_train (after to_categorical):", y_train.shape)
print("Shape of y_test (after to_categorical):", y_test.shape)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1)


Shape of y_train (after to_categorical): (3096, 3)
Shape of y_test (after to_categorical): (1328, 3)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


## Apply Neurol Network

In [5]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(3, activation='softmax'))


model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

checkpoint_filepath = 'best_model.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history = model.fit(X_train, y_train, batch_size=256, epochs=120, validation_data=(X_val, y_val), callbacks=[model_checkpoint_callback])

best_model = keras.models.load_model(checkpoint_filepath)

y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

accuracy_NN = accuracy_score(y_test_classes, y_pred_classes)
precision_NN = precision_score(y_test_classes, y_pred_classes, average='weighted')
recall_NN = recall_score(y_test_classes, y_pred_classes, average='weighted')
f1_NN = f1_score(y_test_classes, y_pred_classes, average='weighted')

score = best_model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Accuracy:', accuracy_NN)
print('Precision:', precision_NN)
print('Recall:', recall_NN)
print('F1 Score:', f1_NN)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                2368      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 3)                 195       
                                                                 
Total params: 6723 (26.26 KB)
Trainable params: 6723 (26.26 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/120
Epoch 2/120

  saving_api.save_model(


Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 

## Apply Random Forest

In [6]:
rf_model = RandomForestClassifier(n_estimators=1000, random_state=1)

rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

accuracy_RF = accuracy_score(y_test, y_pred)
precision_RF = precision_score(y_test, y_pred, average='weighted')
recall_RF = recall_score(y_test, y_pred, average='weighted')
f1_RF = f1_score(y_test, y_pred, average='weighted')

print('Accuracy:', accuracy_RF)
print('Precision:', precision_RF)
print('Recall:', recall_RF)
print('F1 Score:', f1_RF)

Accuracy: 0.7131024096385542
Precision: 0.8311646962792022
Recall: 0.7131024096385542
F1 Score: 0.7377040558362125


## Table

In [8]:
table = PrettyTable()
table.field_names = ["Metric", "Neural Network", "Random Forest"]

table.add_row(["Accuracy", accuracy_NN, accuracy_RF])
table.add_row(["Precision", precision_NN, precision_RF])
table.add_row(["Recall", recall_NN, recall_RF])
table.add_row(["F1 Score", f1_NN, f1_RF])
print(table)

+-----------+--------------------+--------------------+
|   Metric  |   Neural Network   |   Random Forest    |
+-----------+--------------------+--------------------+
|  Accuracy | 0.7884036144578314 | 0.7131024096385542 |
| Precision | 0.7760348678726283 | 0.8311646962792022 |
|   Recall  | 0.7884036144578314 | 0.7131024096385542 |
|  F1 Score | 0.7757857488880701 | 0.7377040558362125 |
+-----------+--------------------+--------------------+
