In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive/', force_remount=True)
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"
base_dir = root_dir + 'FirstM/'

**Accuracy PerCourse Propsoed eLSTM Per Sem**



In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## Proposed eLSTMg Sem1-2020 using best model FSG4.###################

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh

#Set alphs value
alpha = 0.5

# Define the proposed MSECosine loss function with alpha
def MSECosine_loss(alpha):
    def loss(y_true, y_pred):
        mse_loss = MeanSquaredError()(y_true, y_pred)
        cosh_loss = LogCosh()(y_true, y_pred)
        return alpha * mse_loss + (1 - alpha) * cosh_loss
    return loss

# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/synthetic_samples_Model4.csv'
synthetic_df = pd.read_csv(synthetic_file_path)


X_synthetic = synthetic_df.drop(['GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for eLSTM input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the eLSTM model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss=MSECosine_loss(alpha), optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Create a DataFrame with predictions and merge with original attributes
predicted_df = pd.DataFrame({
    'Index': X_test_synthetic.index,
    'Predicted': y_test_pred.flatten()
})

# Merge with synthetic_df to include the original attributes
data_with_predictions_df = pd.merge(synthetic_df.loc[X_test_synthetic.index], predicted_df, left_index=True, right_on='Index')

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2020_data_using_predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_elstm_Model_using_FSG4.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/elstm_Model_using_FSG4.h5')


In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## Proposed eLSTMg Sem2-2020 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh

alpha = 0.5

# Define the combined loss function with alpha
def MSECosine_loss(alpha):
    def loss(y_true, y_pred):
        mse_loss = MeanSquaredError()(y_true, y_pred)
        cosh_loss = LogCosh()(y_true, y_pred)
        return alpha * mse_loss + (1 - alpha) * cosh_loss
    return loss

# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2020_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for LSTM input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the LSTM model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss=MSECosine_loss(alpha), optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem1-2021_data_using_predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_elstm_Model_using_FSG4_Sem2-2020.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/elstm_Model_using_FSG4_Sem2-2020.h5')




In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## Proposed eLSTMg Sem2-2020 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh

alpha = 0.5

# Define the combined loss function with alpha
def MSECosine_loss(alpha):
    def loss(y_true, y_pred):
        mse_loss = MeanSquaredError()(y_true, y_pred)
        cosh_loss = LogCosh()(y_true, y_pred)
        return alpha * mse_loss + (1 - alpha) * cosh_loss
    return loss

# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2020_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for LSTM input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the LSTM model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss=MSECosine_loss(alpha), optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2021_data_using_predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_elstm_Model_using_FSG4_Sem1-2021.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/elstm_Model_using_FSG4_Sem1-2021.h5')


In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## Proposed eLSTMg Sem2-2020 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh

alpha = 0.5

# Define the combined loss function with alpha
def MSECosine_loss(alpha):
    def loss(y_true, y_pred):
        mse_loss = MeanSquaredError()(y_true, y_pred)
        cosh_loss = LogCosh()(y_true, y_pred)
        return alpha * mse_loss + (1 - alpha) * cosh_loss
    return loss

# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem1-2021_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for LSTM input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the LSTM model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss=MSECosine_loss(alpha), optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (Propsoed MSEcosine): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_elstm_Model_using_FSG4_Sem2-2021.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/elstm_Model_using_FSG4_Sem2-2021.h5')


**Accuracy PerCourse LSTM Per Sem**

In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## LSTMg Sem1-2020 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh


# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/synthetic_samples_Model4.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for model input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss='MSE', optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2020_data_using_predicted_value'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_lstmg_Model_using_FSG4_Sem1-2020.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/lstmg_Model_using_FSG4_Sem1-2020.h5')


In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## LSTMg Sem2-2020 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh


# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2020_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for model input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss='MSE', optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem1-2021_data_using_predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_lstmg_Model_using_FSG4_Sem2-2020.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/lstmg_Model_using_FSG4_Sem2-2020.h5')


In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## LSTMg Sem1-20201 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh


# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem1-2021_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for model input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss='MSE', optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2021_data_using_predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_lstmg_Model_using_FSG4_Sem1-2021.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/lstmg_Model_using_FSG4_Sem1-2021.h5')


In [None]:
###########################################################################################
# This code is implemented by Dr.Anahita Ghazvini and Prof.Dr.Nurfadhlina Mohd Sharef
#Email: anahitaghazvini@upm.edu.my;nurfadhlina@upm.edu.my
###########################################################################################

########################## LSTMg Sem2-2021 using best model FSG4.###################

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.losses import MeanSquaredError, LogCosh


# Load the synthetic samples from the CSV file
synthetic_file_path = '/content/gdrive/My Drive/Colab Notebooks/Sem2-2021_data_using_predicted_value.csv'
synthetic_df = pd.read_csv(synthetic_file_path)

# Drop the columns 'Index' and 'GRADE_ENCODED'
X_synthetic = synthetic_df.drop(['Index', 'GRADE_ENCODED'], axis=1)
y_synthetic = synthetic_df['GRADE_ENCODED']

# Convert categorical attributes to numerical representations using one-hot encoding
categorical_cols_synthetic = ['COURSE']
X_synthetic = pd.get_dummies(X_synthetic, columns=categorical_cols_synthetic)
X_synthetic.columns = X_synthetic.columns.str.replace(' ', '_')

# Split the synthetic data into training and testing sets
X_train_synthetic, X_test_synthetic, y_train_synthetic, y_test_synthetic = train_test_split(
    X_synthetic, y_synthetic, test_size=0.2, random_state=42
)

# Ensure the data is numeric and contains no missing values
X_train_synthetic = X_train_synthetic.astype(float)
y_train_synthetic = y_train_synthetic.astype(float)
X_test_synthetic = X_test_synthetic.astype(float)

# Reshape data for model input
num_features = X_train_synthetic.shape[1]
seq_length = 1
X_train_synthetic_reshaped = X_train_synthetic.values.reshape((X_train_synthetic.shape[0], seq_length, num_features))
X_test_synthetic_reshaped = X_test_synthetic.values.reshape((X_test_synthetic.shape[0], seq_length, num_features))

# Build and compile the model
model = Sequential()
model.add(Input(shape=(seq_length, num_features)))
model.add(LSTM(64))
model.add(Dense(32))
model.add(LeakyReLU())
model.add(Dense(1, activation='linear'))
model.compile(loss='MSE', optimizer='adam')

# Train the model
history = model.fit(X_train_synthetic_reshaped, y_train_synthetic, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss = model.evaluate(X_test_synthetic_reshaped, y_test_synthetic)
y_test_pred = model.predict(X_test_synthetic_reshaped)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

# Prepare DataFrame for predictions
predicted_df = pd.DataFrame({
    'Predicted': y_test_pred.flatten()
})

# Save predictions with all attributes in original data
predicted_csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/predicted_value.csv'
data_with_predictions_df.to_csv(predicted_csv_output_path, index=False)

# Convert regression predictions to class labels
y_test_pred_classes = np.round(y_test_pred).flatten().astype(int)
y_test_classes = y_test_synthetic.astype(int)

# Calculate metrics
accuracy = np.mean(y_test_pred_classes == y_test_classes)
r_squared = r2_score(y_test_synthetic, y_test_pred.flatten())
print(f"Test Loss (MSE): {test_loss}, Overall Accuracy: {accuracy}, R-squared: {r_squared}")

course_list = ['AGEN KOMPUTERAN', 'ANALISIS DAN REKA BENTUK PERMAINAN KOMPUTER', 'ANALISIS DAN REKA BENTUK RANGKAIAN', 'ANALITIK BISNES', 'ANTARA RANGKAIAN', 'APLIKASI BERGERAK', 'FORENSIK KOMPUTER', 'KENYATAAN MAYA', 'KESELAMATAN PANGKALAN DATA', 'KESELAMATAN SISTEM KOMPUTER', 'KOMPUTERAN CERDAS', 'KRIPTOGRAFI', 'KUALITI PERISIAN', 'ORGANISASI KOMPUTER DAN BAHASA HIMPUNAN', 'PEMBANGUNAN APLIKASI PANGKALAN DATA', 'PEMBANGUNAN APLIKASI WEB',
               'PEMBANGUNAN PERISIAN SELAMAT', 'PEMBANGUNAN PERMAINAN KOMPUTER', 'PENDIGITAN AUDIO TAMPAK', 'PENGATURCARAAN C++', 'PENGATURCARAAN KOMPUTER I', 'PENGATURCARAAN KOMPUTER II',
               'PENGATURCARAAN PYTHON', 'PENGATURCARAAN SELARI DAN TERAGIH', 'PENGATURCARAAN SHELL', 'PENGKOMPUTAN SELARI DAN TERAGIH', 'PENGUJIAN PERISIAN',
               'PENGURUSAN RANGKAIAN', 'PENYELENGGARAAN DAN EVOLUSI PERISIAN', 'PERDAGANGAN ELEKTRONIK', 'PROJEK KEJURUTERAAN PERISIAN BERPASUKAN', 'SISTEM BERASASKAN PENGETAHUAN', 'SISTEM PENGOPERASIAN',
               'STATISTIK BAGI SAINS KOMPUTER', 'STRUKTUR DATA DAN ALGORITMA', 'STRUKTUR DISKRET', 'PEMBANGUNAN APLIKASI BERGERAK']

# Calculate accuracy per course
course_accuracies = {}

for course in course_list:
    course_column = f'COURSE_{course.replace(" ", "_")}'
    if course_column in X_test_synthetic.columns:
        test_indices = X_test_synthetic[X_test_synthetic[course_column] == 1].index
        X_test_course = X_test_synthetic.loc[test_indices]
        y_test_course = y_test_synthetic.loc[test_indices]
        X_test_course_reshaped = X_test_course.values.reshape((X_test_course.shape[0], seq_length, num_features))
        test_loss_course = model.evaluate(X_test_course_reshaped, y_test_course, verbose=0)
        y_test_pred_course = model.predict(X_test_course_reshaped)
        y_test_pred_classes_course = np.round(y_test_pred_course).flatten().astype(int)
        accuracy_course = np.mean(y_test_pred_classes_course == y_test_course.astype(int))
        rmse_course = np.sqrt(mean_squared_error(y_test_course, y_test_pred_course))
        course_accuracies[course] = (accuracy_course, rmse_course)
        print(f"Accuracy for {course}: {accuracy_course}, RMSE: {rmse_course}")
    else:
        print(f"Column for {course} does not exist in the test dataset.")

# Calculate overall accuracy and RMSE
overall_accuracy = np.mean(y_test_pred_classes == y_test_classes)
overall_rmse = np.sqrt(mean_squared_error(y_test_classes, y_test_pred.flatten()))
print(f"Overall Accuracy: {overall_accuracy}, RMSE: {overall_rmse}, R-squared: {r_squared}")

# Save course accuracies to CSV
course_accuracy_df = pd.DataFrame(course_accuracies.items(), columns=['Course', 'Metrics'])
csv_output_path = '/content/gdrive/My Drive/Colab Notebooks/course_accuracies_lstmg_Model_using_FSG4_Sem2-2021.csv'
course_accuracy_df.to_csv(csv_output_path, index=False)

# Save the model
model.save('/content/gdrive/My Drive/Colab Notebooks/lstmg_Model_using_FSG4_Sem2-2021.h5')
