<a href="https://colab.research.google.com/github/lynnkathomi/ENROLLMENT_GRADUATION_MODEL/blob/main/EROLL_GRADUATE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# Define number of samples
num_samples = 1000

# Create a synthetic dataset
np.random.seed(42)
data = {
    'academic_score': np.random.normal(75, 10, num_samples),
    'extracurricular_participation': np.random.randint(0, 2, num_samples),
    'socioeconomic_status': np.random.randint(0, 3, num_samples),
    'age': np.random.randint(18, 30, num_samples),
    'gender': np.random.randint(0, 2, num_samples),
    'program_interest': np.random.randint(0, 3, num_samples),
    'enrollment_status': np.random.randint(0, 2, num_samples),
    'graduation_status': np.random.randint(0, 2, num_samples)
}

df = pd.DataFrame(data)


In [None]:
# Features for enrollment prediction
X_enroll = df[['academic_score', 'extracurricular_participation', 'socioeconomic_status', 'age', 'gender', 'program_interest']]
y_enroll = df['enrollment_status']


In [None]:
# Split data for training and testing
X_enroll_train, X_enroll_test, y_enroll_train, y_enroll_test = train_test_split(X_enroll, y_enroll, test_size=0.3, random_state=42)

# Train a Random Forest model
enroll_model = RandomForestClassifier(random_state=42)
enroll_model.fit(X_enroll_train, y_enroll_train)

# Evaluate model
y_enroll_pred = enroll_model.predict(X_enroll_test)
print("Enrollment Prediction Accuracy:", accuracy_score(y_enroll_test, y_enroll_pred))
print("Enrollment Prediction Report:\n", classification_report(y_enroll_test, y_enroll_pred))


Enrollment Prediction Accuracy: 0.47333333333333333
Enrollment Prediction Report:
               precision    recall  f1-score   support

           0       0.48      0.50      0.49       151
           1       0.47      0.44      0.46       149

    accuracy                           0.47       300
   macro avg       0.47      0.47      0.47       300
weighted avg       0.47      0.47      0.47       300



In [None]:
# Filter enrolled students
df_graduation = df[df['enrollment_status'] == 1]
X_grad = df_graduation[['academic_score', 'extracurricular_participation', 'socioeconomic_status', 'age', 'gender']]
y_grad = df_graduation['graduation_status']


In [None]:
# Split data for training and testing
X_grad_train, X_grad_test, y_grad_train, y_grad_test = train_test_split(X_grad, y_grad, test_size=0.3, random_state=42)

# Train a Random Forest model for graduation prediction
grad_model = RandomForestClassifier(random_state=42)
grad_model.fit(X_grad_train, y_grad_train)

# Evaluate model
y_grad_pred = grad_model.predict(X_grad_test)
print("Graduation Prediction Accuracy:", accuracy_score(y_grad_test, y_grad_pred))
print("Graduation Prediction Report:\n", classification_report(y_grad_test, y_grad_pred))


Graduation Prediction Accuracy: 0.4863013698630137
Graduation Prediction Report:
               precision    recall  f1-score   support

           0       0.45      0.46      0.45        68
           1       0.52      0.51      0.52        78

    accuracy                           0.49       146
   macro avg       0.48      0.48      0.48       146
weighted avg       0.49      0.49      0.49       146



In [None]:
# New student data for enrollment prediction
new_student = pd.DataFrame({
    'academic_score': [85],
    'extracurricular_participation': [3],
    'socioeconomic_status': [2],
    'age': [20],
    'gender': [1],
    'program_interest': [3]
})

# Enrollment prediction
enrollment_prediction = enroll_model.predict(new_student)
if enrollment_prediction[0] == 1:
    print("Prediction: Likely to Enroll")

    # Graduation prediction only if enrolled
    grad_prediction = grad_model.predict(new_student[['academic_score', 'extracurricular_participation', 'socioeconomic_status', 'age', 'gender']])
    print("Prediction:", "Likely to Graduate" if grad_prediction[0] == 1 else "May Need Support")
else:
    print("Prediction: Unlikely to Enroll")


Prediction: Likely to Enroll
Prediction: Likely to Graduate


In [None]:
# New student data for enrollment prediction
new_student = pd.DataFrame({
    'academic_score': [85],
    'extracurricular_participation': [1],
    'socioeconomic_status': [1],
    'age': [20],
    'gender': [1],
    'program_interest': [1]
})

# Enrollment prediction
enrollment_prediction = enroll_model.predict(new_student)
if enrollment_prediction[0] == 1:
    print("Prediction: Likely to Enroll")

    # Graduation prediction only if enrolled
    grad_prediction = grad_model.predict(new_student[['academic_score', 'extracurricular_participation', 'socioeconomic_status', 'age', 'gender']])
    print("Prediction:", "Likely to Graduate" if grad_prediction[0] == 1 else "May Need Support")
else:
    print("Prediction: Unlikely to Enroll")


Prediction: Unlikely to Enroll
