<a href="https://colab.research.google.com/github/mrigankraj/enhanced-scholarship-management-system/blob/main/Application_Acceptance_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#Creation of dataset scholarship_data.csv
#This dataset will be used to train the random forest ml model later

import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Generate synthetic data
GPA = np.round(np.random.uniform(2.0, 4.0, n_samples), 2)
TestScores = np.random.randint(200, 800, n_samples)
Extracurricular = np.random.choice(['Yes', 'No'], n_samples)
FinancialNeed = np.random.choice(['Low', 'Medium', 'High'], n_samples)
EssayScore = np.round(np.random.uniform(4.0, 10.0, n_samples), 1)

# Random acceptance logic (for simplicity)
Accepted = (GPA > 3.0) & (TestScores > 500) & (EssayScore > 6.0)
Accepted = Accepted.astype(int)

# Create a DataFrame
data = pd.DataFrame({
    'GPA': GPA,
    'TestScores': TestScores,
    'Extracurricular': Extracurricular,
    'FinancialNeed': FinancialNeed,
    'EssayScore': EssayScore,
    'Accepted': Accepted
})

# Save the dataset to a CSV file
data.to_csv('scholarship_data.csv', index=False)

print("Dataset created and saved as 'scholarship_data.csv'")


Dataset created and saved as 'scholarship_data.csv'


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('scholarship_data.csv')

# Feature selection
features = ['GPA', 'TestScores', 'Extracurricular', 'FinancialNeed', 'EssayScore']
X = data[features]
y = data['Accepted']  # Target variable

# Convert categorical variables to numerical
X = pd.get_dummies(X, columns=['Extracurricular', 'FinancialNeed'], drop_first=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

# Feature Importances (Optional)
importances = rf_model.feature_importances_
feature_names = X.columns
feature_importances = pd.Series(importances, index=feature_names).sort_values(ascending=False)

print("Feature Importances:")
print(feature_importances)


Accuracy: 1.00
Confusion Matrix:
[[161   0]
 [  0  39]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       161
           1       1.00      1.00      1.00        39

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

Feature Importances:
TestScores              0.379245
GPA                     0.374022
EssayScore              0.234085
FinancialNeed_Medium    0.004951
Extracurricular_Yes     0.004361
FinancialNeed_Low       0.003337
dtype: float64
