In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [9]:
# Assuming your dataset is in a CSV file named 'dataset.csv'
df = pd.read_csv('C:/Users/Kishan raj/Documents/GitHub/EduQuest/learning/dataset_updated.csv')

In [10]:
from sklearn.preprocessing import LabelEncoder

# Define the columns to be encoded
categorical_cols = ['Proficiency level', 'Preferred subjects', 'Preferred study times', 
                    'Goals', 'Curriculum structure', 'Available content', 'External factors']

# Initialize the LabelEncoder
label_encoders_X = {}

# Encode categorical variables
for col in categorical_cols:
    label_encoders_X[col] = LabelEncoder()
    df[col + '_en'] = label_encoders_X[col].fit_transform(df[col])

# Display the first few rows of the dataset with encoded variables
print("\nEncoded Dataset:")
print(df.head())

# Now, label_encoders_X dictionary will contain LabelEncoder objects for each column


Encoded Dataset:
  Proficiency level                    Preferred subjects  \
0          beginner     Network and System Administration   
1          beginner  Software Engineering and Development   
2          beginner        Devops and Systems Integration   
3          beginner                   Database Management   
4          beginner     Network and System Administration   

  Preferred study times       Goals  quiz scores  Completion rates  \
0               Morning  Short-term            1                 1   
1               Morning  Short-term            1                 2   
2               Morning  Short-term            1                 3   
3               Morning  Short-term            1                 4   
4               Morning  Short-term            1                 5   

   Time spent on different types of content Curriculum structure  \
0                                        52                 Exam   
1                                        52            Kno

In [11]:
# Separate features (X) and target variable (y)
selected_columns = ['Proficiency level_en', 'Preferred subjects_en', 'Preferred study times_en', 'Goals_en', 'Curriculum structure_en', 'External factors_en', 'Available content_en', 'Time spent on different types of content', 'Completion rates', 'quiz scores']
X = df[selected_columns]
y = df['Learning style']

In [12]:
from sklearn.preprocessing import LabelEncoder

# Encode target variable using LabelEncoder
label_encoder_Y = LabelEncoder()
y = label_encoder_Y.fit_transform(y)

In [13]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the neural network model
model = Sequential([
    # Input layer with 10 nodes
    Dense(128, activation='relu', input_shape=(10,)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Output layer with appropriate activation function
    Dense(3, activation='softmax')  # Assuming 3 classes for the output
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.summary()

In [16]:
# Train the model (assuming X_train and y_train are your training data)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=32)

Epoch 1/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6464 - loss: 3.9021 - val_accuracy: 0.8462 - val_loss: 0.6615
Epoch 2/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6993 - loss: 1.1596 - val_accuracy: 0.8485 - val_loss: 0.7808
Epoch 3/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7497 - loss: 0.8427 - val_accuracy: 0.8485 - val_loss: 0.6240
Epoch 4/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7909 - loss: 0.7302 - val_accuracy: 0.8485 - val_loss: 0.5781
Epoch 5/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7900 - loss: 0.7054 - val_accuracy: 0.8485 - val_loss: 0.5398
Epoch 6/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8071 - loss: 0.6580 - val_accuracy: 0.8485 - val_loss: 0.5436
Epoch 7/20
[1m109/109[0m 

In [17]:
import numpy as np

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8369 - loss: 0.4069 
Test Accuracy: 0.8356290459632874


In [18]:
# Save the model
model.save("prediction.h5")



In [19]:
import pickle

# Save the label encoder to a file
with open('label_encoder_X.pkl', 'wb') as f:
    pickle.dump(label_encoders_X, f)

In [20]:
import pickle

# Save the label encoder to a file
with open('label_encoder_Y.pkl', 'wb') as f:
    pickle.dump(label_encoder_Y, f)

In [21]:
from joblib import dump

# Save the label encoder classes
for col, encoder in label_encoders_X.items():
    dump(encoder, f'label_encoder_{col}_classes.joblib')
    
dump(label_encoder_Y, 'label_encoder_Y_classes.joblib')

['label_encoder_Y_classes.joblib']

In [22]:
from joblib import load

# Load the label encoder classes
label_encoder_X_classes = {}
for col in ['Proficiency level', 'Preferred subjects', 'Preferred study times', 'Goals', 'Curriculum structure', 'Available content', 'External factors']:
    label_encoder_X_classes[col] = load(f'label_encoder_{col}_classes.joblib')
label_encoder_Y_classes = load('label_encoder_Y_classes.joblib')

# Print the classes for each feature
for col, encoder in label_encoder_X_classes.items():
    print(f'Classes for {col}: {encoder.classes_}')

print(f'Classes for Y: {label_encoder_Y_classes.classes_}')


Classes for Proficiency level: ['advanced' 'beginner' 'intermediate']
Classes for Preferred subjects: ['Artificial Intelligence and Machine Learning' 'Cloud Computing'
 'CyberSecurity' 'Data Science and Analytics' 'Database Management'
 'Devops and Systems Integration' 'IT Project Management'
 'Network and System Administration'
 'Software Engineering and Development' 'Web Development']
Classes for Preferred study times: ['Afternoon' 'Evening' 'Midnight' 'Morning' 'Night']
Classes for Goals: ['Long-term' 'Short-term']
Classes for Curriculum structure: ['Exam' 'Knowledge']
Classes for Available content: ['assignment' 'audio' 'pdf' 'quiz' 'text' 'video']
Classes for External factors: ['Time Constraints' 'Upcoming Exams']
Classes for Y: ['Auditory' 'Kinesthetic' 'Visual']


In [23]:
# # Check data types of preprocessed input data
# print("Data types of preprocessed input:")
# print(preprocessed_input.dtype)

# Check the classes learned by each label encoder
print("Classes learned by the label encoder for X:")
for col, encoder in label_encoders_X.items():
    print(col + ":")
    print(encoder.classes_)

print("Classes learned by the label encoder for Y:")
print(label_encoder_Y.classes_)


Classes learned by the label encoder for X:
Proficiency level:
['advanced' 'beginner' 'intermediate']
Preferred subjects:
['Artificial Intelligence and Machine Learning' 'Cloud Computing'
 'CyberSecurity' 'Data Science and Analytics' 'Database Management'
 'Devops and Systems Integration' 'IT Project Management'
 'Network and System Administration'
 'Software Engineering and Development' 'Web Development']
Preferred study times:
['Afternoon' 'Evening' 'Midnight' 'Morning' 'Night']
Goals:
['Long-term' 'Short-term']
Curriculum structure:
['Exam' 'Knowledge']
Available content:
['assignment' 'audio' 'pdf' 'quiz' 'text' 'video']
External factors:
['Time Constraints' 'Upcoming Exams']
Classes learned by the label encoder for Y:
['Auditory' 'Kinesthetic' 'Visual']


In [26]:
import numpy as np

# Function to preprocess input data for testing
def preprocess_input(input_data, label_encoders_X):
    encoded_input = []
    for col, value in input_data.items():
        if col in label_encoders_X:
            # Use the corresponding LabelEncoder to transform feature variables
            encoded_input.append(label_encoders_X[col].transform([value])[0])
        else:
            # If the column is not categorical, use the value as is
            encoded_input.append(value)
    return np.array(encoded_input).reshape(1, -1)

# Define input data for testing
input_data = {
    'Proficiency level': 'intermediate',
    'Preferred subjects': 'Software Engineering and Development',
    'Preferred study times': 'Morning',
    'Goals': 'Short-term',
    'Curriculum structure': 'Exam',
    'Available content': 'video',
    'External factors': 'Time Constraints',
    'Time spent on different types of content': 10,
    'Completion rates': 7,
    'Quiz scores': 80
}

# Preprocess input data for testing
preprocessed_input = preprocess_input(input_data, label_encoders_X)

# Make predictions using the trained model
predictions = model.predict(preprocessed_input)

# Get the predicted class
predicted_class_index = np.argmax(predictions)
predicted_class = label_encoder_Y.classes_[predicted_class_index]

print("Predicted Learning Style:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
Predicted Learning Style: Visual
