In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('student_data_semester_2.csv')

# Split the data into features and targets
X = df.drop(columns=['Student_ID', 'Grade', 'Dropout_Risk_%'])
y_grade = df['Grade']
y_dropout = df['Dropout_Risk_%']

# Convert Grade to numeric (O = 10, A = 9, B = 8, C = 7, D = 6, E = 5, F = 0)
grade_mapping = {'O': 10,'E': 9 , 'A': 8, 'B': 7, 'C': 6, 'D': 5,  'F': 0}
y_grade = y_grade.map(grade_mapping)

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train_grade, y_test_grade, y_train_dropout, y_test_dropout = train_test_split(
    X, y_grade, y_dropout, test_size=0.2, random_state=42
)

# Standardize the data (important for deep learning models)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [10]:
 import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Build the neural network model
grade_model = Sequential()

# Input layer
grade_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))

# Hidden layers
grade_model.add(Dense(128, activation='relu'))
grade_model.add(Dropout(0.2))
grade_model.add(Dense(64, activation='relu'))
grade_model.add(Dropout(0.2))

# Output layer
grade_model.add(Dense(1, activation='linear'))  # For regression to predict grade numerically

# Compile the model
grade_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# Train the model
grade_model.fit(X_train_scaled, y_train_grade, epochs=150, batch_size=32, validation_data=(X_test_scaled, y_test_grade))

# Evaluate the model
y_pred_grade = grade_model.predict(X_test_scaled)
y_pred_grade = y_pred_grade.round()  # Convert continuous output to nearest grade (0-10)

# Calculate accuracy
accuracy = accuracy_score(y_test_grade, y_pred_grade)

print(f"Deep Learning Grade Prediction Accuracy: {accuracy * 100:.2f}%")


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [12]:
# Build the neural network model for dropout risk prediction
dropout_model = Sequential()

# Input layer
dropout_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))

# Hidden layers
dropout_model.add(Dense(128, activation='relu'))
dropout_model.add(Dropout(0.2))
dropout_model.add(Dense(64, activation='relu'))
dropout_model.add(Dropout(0.2))

# Output layer
dropout_model.add(Dense(1, activation='linear'))  # For regression to predict dropout risk

# Compile the model
dropout_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
dropout_model.fit(X_train_scaled, y_train_dropout, epochs=50, batch_size=32, validation_data=(X_test_scaled, y_test_dropout))

# Evaluate the model
y_pred_dropout = dropout_model.predict(X_test_scaled)

from sklearn.metrics import mean_squared_error, r2_score

# Calculate RMSE (Root Mean Squared Error)
dropout_rmse = mean_squared_error(y_test_dropout, y_pred_dropout) ** 0.5

# Calculate R-squared score
dropout_r2 = r2_score(y_test_dropout, y_pred_dropout)

print(f"Deep Learning Dropout Risk RMSE: {dropout_rmse:.2f}")
print(f"Deep Learning Dropout Risk R-squared: {dropout_r2:.2f}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Deep Learning Dropout Risk RMSE: 5.57
Deep Learning Dropout Risk R-squared: 0.94


In [13]:
# Predict Grade
y_pred_grade = grade_model.predict(X_test_scaled)
y_pred_grade = y_pred_grade.round()  # Round to nearest integer for grades (0-10)

# Predict Dropout Risk (assuming dropout_model is already trained)
y_pred_dropout = dropout_model.predict(X_test_scaled)

# Print predictions for the first few test samples
print("Grade Predictions (first 5):", y_pred_grade[:5])
print("Dropout Risk Predictions (first 5):", y_pred_dropout[:5])


Grade Predictions (first 5): [[7.]
 [7.]
 [8.]
 [9.]
 [7.]]
Dropout Risk Predictions (first 5): [[20.703045 ]
 [32.53448  ]
 [ 9.838882 ]
 [ 6.1435823]
 [28.277565 ]]


In [14]:
import pandas as pd

# Create a DataFrame to compare the actual and predicted values
comparison_df = pd.DataFrame({
    'Actual Grades': y_test_grade,
    'Predicted Grades': y_pred_grade.flatten(),  # Flatten the predictions array
    'Actual Dropout Risk': y_test_dropout,
    'Predicted Dropout Risk': y_pred_dropout.flatten()  # Flatten the predictions array
})

# Display the first 10 rows to compare
print(comparison_df.head(10))


      Actual Grades  Predicted Grades  Actual Dropout Risk  \
1860              7               7.0                25.65   
353               7               7.0                28.20   
1333              7               8.0                22.47   
905               9               9.0                 8.16   
1289              7               7.0                25.02   
1273              6               6.0                41.55   
938               6               5.0                49.50   
1731              8               8.0                11.55   
65                7               7.0                25.02   
1323              9               8.0                 4.66   

      Predicted Dropout Risk  
1860               20.703045  
353                32.534481  
1333                9.838882  
905                 6.143582  
1289               28.277565  
1273               34.266254  
938                58.502316  
1731                8.590672  
65                 22.335487  
1323   

In [15]:
# Save the grade prediction model
grade_model.save('grade_prediction_model2.h5')

# Save the dropout risk prediction model
dropout_model.save('dropout_risk_prediction_model2.h5')

print("Models saved successfully.")


Models saved successfully.
