In [2]:
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
from itertools import product
import io

df1 = pd.read_csv("./data/student_dataset.csv")
df2 = pd.read_csv("./data/scholarship_dataset.csv")

df3=df1.merge(df2, how='cross')


In [4]:
def generate_output(row):
    if (row['Education_Level'] == row['Education_Preference']) and (row['Domain_Area'] == row['Eligible_Domain']):
        return 1
    else:
        return 0

In [5]:
df3['Target'] = df3.apply(generate_output, axis=1)

df3.to_csv("combined_dataset.csv", index=False)

In [5]:
print(df3['Target'].value_counts())

Target
0    241549
1      8451
Name: count, dtype: int64


In [6]:
df3 = pd.read_csv("combined_dataset.csv")

# Split the dataset into features (X) and target variable (y)
X = df3.drop('Target', axis=1)
y = df3['Target']

# One-hot encode categorical columns
categorical_columns = ["Name", "Education_Level", "Country_Preference", "Domain_Area", "Scholarship_Name",
                       "Scholarship_Type", "Education_Preference", "Country_of_Scholarship",
                       "Eligible_Domain"]

X = pd.get_dummies(X, columns=categorical_columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Build a simple neural network model
model = Sequential([
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(4, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with a specified number of epochs
epochs = 5  # Adjust the number of epochs as needed
model.fit(X_train_scaled, y_train, epochs=epochs, validation_data=(X_test_scaled, y_test))

Epoch 1/5
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 5ms/step - accuracy: 0.9656 - loss: 0.1700 - val_accuracy: 0.9756 - val_loss: 0.0632
Epoch 2/5
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - accuracy: 0.9872 - loss: 0.0352 - val_accuracy: 0.9987 - val_loss: 0.0041
Epoch 3/5
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 6ms/step - accuracy: 0.9996 - loss: 0.0014 - val_accuracy: 0.9997 - val_loss: 9.9157e-04
Epoch 4/5
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 5ms/step - accuracy: 0.9999 - loss: 2.6357e-04 - val_accuracy: 0.9999 - val_loss: 3.2850e-04
Epoch 5/5
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 6ms/step - accuracy: 0.9999 - loss: 1.5144e-04 - val_accuracy: 0.9998 - val_loss: 3.8808e-04


<keras.src.callbacks.history.History at 0x25e8e05c4d0>

In [8]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict probabilities for binary classification
y_pred_prob = model.predict(X_test_scaled)

# Convert probabilities to binary labels
y_pred = (y_pred_prob > 0.5).astype(int)

# Ensure y_test and y_pred have the same data type and shape
y_test = np.array(y_test)
y_pred = np.array(y_pred)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Generate classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Generate confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step
Accuracy: 0.99982
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     48310
           1       1.00      1.00      1.00      1690

    accuracy                           1.00     50000
   macro avg       1.00      1.00      1.00     50000
weighted avg       1.00      1.00      1.00     50000

Confusion Matrix:
[[48307     3]
 [    6  1684]]


In [13]:
import keras
keras.saving.save_model(model,"recommender-system.keras")