In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('student_data_semester_6.csv')

# Split the data into features and targets
X = df.drop(columns=['Student_ID', 'Grade', 'Dropout_Risk_%'])
y_grade = df['Grade']
y_dropout = df['Dropout_Risk_%']

# Convert Grade to numeric (O = 10, A = 9, B = 8, C = 7, D = 6, E = 5, F = 0)
grade_mapping = {'O': 10,'E': 9 , 'A': 8, 'B': 7, 'C': 6, 'D': 5,  'F': 0}
y_grade = y_grade.map(grade_mapping)

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train_grade, y_test_grade, y_train_dropout, y_test_dropout = train_test_split(
    X, y_grade, y_dropout, test_size=0.2, random_state=42
)

# Standardize the data (important for deep learning models)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [5]:
 import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Build the neural network model
grade_model = Sequential()

# Input layer
grade_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))

# Hidden layers
grade_model.add(Dense(128, activation='relu'))
grade_model.add(Dropout(0.2))
grade_model.add(Dense(64, activation='relu'))
grade_model.add(Dropout(0.2))

# Output layer
grade_model.add(Dense(1, activation='linear'))  # For regression to predict grade numerically

# Compile the model
grade_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# Train the model
grade_model.fit(X_train_scaled, y_train_grade, epochs=150, batch_size=32, validation_data=(X_test_scaled, y_test_grade))

# Evaluate the model
y_pred_grade = grade_model.predict(X_test_scaled)
y_pred_grade = y_pred_grade.round()  # Convert continuous output to nearest grade (0-10)

# Calculate accuracy
accuracy = accuracy_score(y_test_grade, y_pred_grade)

print(f"Deep Learning Grade Prediction Accuracy: {accuracy * 100:.2f}%")


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [8]:
# Build the neural network model for dropout risk prediction
dropout_model = Sequential()

# Input layer
dropout_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))

# Hidden layers
dropout_model.add(Dense(128, activation='relu'))
dropout_model.add(Dropout(0.2))
dropout_model.add(Dense(64, activation='relu'))
dropout_model.add(Dropout(0.2))

# Output layer
dropout_model.add(Dense(1, activation='linear'))  # For regression to predict dropout risk

# Compile the model
dropout_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
dropout_model.fit(X_train_scaled, y_train_dropout, epochs=100, batch_size=32, validation_data=(X_test_scaled, y_test_dropout))

# Evaluate the model
y_pred_dropout = dropout_model.predict(X_test_scaled)

from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE (Root Mean Squared Error)
dropout_rmse = mean_squared_error(y_test_dropout, y_pred_dropout) ** 0.5

# Calculate R-squared score
dropout_r2 = r2_score(y_test_dropout, y_pred_dropout)

print(f"Deep Learning Dropout Risk RMSE: {dropout_rmse:.2f}")
print(f"Deep Learning Dropout Risk R-squared: {dropout_r2:.2f}")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [9]:
import pandas as pd

# Create a DataFrame to compare the actual and predicted values
comparison_df = pd.DataFrame({
    'Actual Grades': y_test_grade,
    'Predicted Grades': y_pred_grade.flatten(),  # Flatten the predictions array
    'Actual Dropout Risk': y_test_dropout,
    'Predicted Dropout Risk': y_pred_dropout.flatten()  # Flatten the predictions array
})

# Display the first 10 rows to compare
print(comparison_df.head(10))


      Actual Grades  Predicted Grades  Actual Dropout Risk  \
1860              5               5.0                81.07   
353               9               8.0                 6.96   
1333              5               5.0                81.07   
905               8               8.0                14.99   
1289              8               8.0                11.45   
1273              6               6.0                49.80   
938               6               5.0                46.96   
1731              9               9.0                 3.78   
65                5               5.0                78.23   
1323              6               6.0                47.67   

      Predicted Dropout Risk  
1860               87.472054  
353                 5.411403  
1333               62.219673  
905                18.808758  
1289               10.746274  
1273               62.297657  
938                54.288181  
1731                3.524431  
65                 69.522995  
1323   

In [10]:
# Save the grade prediction model
grade_model.save('grade_prediction_model6.h5')

# Save the dropout risk prediction model
dropout_model.save('dropout_risk_prediction_model6.h5')

print("Models saved successfully.")


Models saved successfully.
