In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [87]:
# Assuming your dataset is in a CSV file named 'dataset.csv'
df = pd.read_csv('F:/KA Projects/Jaye/model/dataset.csv')

In [88]:
from sklearn.preprocessing import LabelEncoder

# Define the columns to be encoded
categorical_cols = ['Proficiency level', 'Preferred subjects', 'Preferred study times', 
                    'Goals', 'Curriculum structure', 'Available content', 'External factors']

# Initialize the LabelEncoder
label_encoders_X = {}

# Encode categorical variables
for col in categorical_cols:
    label_encoders_X[col] = LabelEncoder()
    df[col + '_en'] = label_encoders_X[col].fit_transform(df[col])

# Display the first few rows of the dataset with encoded variables
print("\nEncoded Dataset:")
print(df.head())

# Now, label_encoders_X dictionary will contain LabelEncoder objects for each column


Encoded Dataset:
  Proficiency level Preferred subjects Preferred study times       Goals  \
0               Low           Software               Morning  Short-term   
1               Low           Software               Morning  Short-term   
2               Low           Software               Morning  Short-term   
3               Low           Software               Morning  Short-term   
4               Low           Software               Morning  Short-term   

   Quiz scores  Completion rates  Time spent on different types of content  \
0            1                 1                                         1   
1            1                 2                                         2   
2            1                 3                                         3   
3            1                 4                                         4   
4            1                 5                                         5   

  Curriculum structure Available content  External facto

In [90]:
# Separate features (X) and target variable (y)
selected_columns = ['Proficiency level_en', 'Preferred subjects_en', 'Preferred study times_en', 'Goals_en', 'Curriculum structure_en', 'External factors_en', 'Available content_en', 'Time spent on different types of content', 'Completion rates', 'Quiz scores']
X = df[selected_columns]
y = df['Learning style']

In [91]:
from sklearn.preprocessing import LabelEncoder

# Encode target variable using LabelEncoder
label_encoder_Y = LabelEncoder()
y = label_encoder_Y.fit_transform(y)

In [92]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the neural network model
model = Sequential([
    # Input layer with 10 nodes
    Dense(128, activation='relu', input_shape=(10,)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Output layer with appropriate activation function
    Dense(3, activation='softmax')  # Assuming 3 classes for the output
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [94]:
model.summary()

In [95]:
# Train the model (assuming X_train and y_train are your training data)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=32)

Epoch 1/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6857 - loss: 3.1082 - val_accuracy: 0.9212 - val_loss: 0.3416
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7993 - loss: 0.9069 - val_accuracy: 0.9212 - val_loss: 0.5048
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8622 - loss: 0.5825 - val_accuracy: 0.9212 - val_loss: 0.4346
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8773 - loss: 0.5319 - val_accuracy: 0.9212 - val_loss: 0.4065
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8588 - loss: 0.5556 - val_accuracy: 0.9212 - val_loss: 0.3740
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8784 - loss: 0.4640 - val_accuracy: 0.9212 - val_loss: 0.3524
Epoch 7/20
[1m100/100[0m 

In [96]:
import numpy as np

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9732 - loss: 0.0961
Test Accuracy: 0.9700000286102295


In [97]:
# Save the model
model.save("prediction.h5")



In [101]:
import pickle

# Save the label encoder to a file
with open('label_encoder_X.pkl', 'wb') as f:
    pickle.dump(label_encoders_X, f)

In [99]:
import pickle

# Save the label encoder to a file
with open('label_encoder_Y.pkl', 'wb') as f:
    pickle.dump(label_encoder_Y, f)

In [109]:
from joblib import dump

# Save the label encoder classes
for col, encoder in label_encoders_X.items():
    dump(encoder, f'label_encoder_{col}_classes.joblib')
    
dump(label_encoder_Y, 'label_encoder_Y_classes.joblib')

['label_encoder_Y_classes.joblib']

In [110]:
from joblib import load

# Load the label encoder classes
label_encoder_X_classes = {}
for col in ['Proficiency level', 'Preferred subjects', 'Preferred study times', 'Goals', 'Curriculum structure', 'Available content', 'External factors']:
    label_encoder_X_classes[col] = load(f'label_encoder_{col}_classes.joblib')
label_encoder_Y_classes = load('label_encoder_Y_classes.joblib')

# Print the classes for each feature
for col, encoder in label_encoder_X_classes.items():
    print(f'Classes for {col}: {encoder.classes_}')

print(f'Classes for Y: {label_encoder_Y_classes.classes_}')


Classes for Proficiency level: ['High' 'Low' 'Medium']
Classes for Preferred subjects: ['Database' 'Network' 'OS' 'Software']
Classes for Preferred study times: ['Afternoon' 'Evening' 'Midnight' 'Morning' 'Night']
Classes for Goals: ['High-term' 'Long-term' 'Low-term' 'Short-term']
Classes for Curriculum structure: ['Exam' 'Knowledge']
Classes for Available content: ['Interactive Exercises' 'Interactive exercises' 'Lectures' 'Quiz'
 'Reading Materials' 'Tutorial' 'Tutorials' nan]
Classes for External factors: ['Time Constraints' 'Upcoming Exams']
Classes for Y: ['Auditory' 'Kinesthetic' 'Visual']


In [103]:
# # Check data types of preprocessed input data
# print("Data types of preprocessed input:")
# print(preprocessed_input.dtype)

# Check the classes learned by each label encoder
print("Classes learned by the label encoder for X:")
for col, encoder in label_encoders_X.items():
    print(col + ":")
    print(encoder.classes_)

print("Classes learned by the label encoder for Y:")
print(label_encoder_Y.classes_)


Classes learned by the label encoder for X:
Proficiency level:
['High' 'Low' 'Medium']
Preferred subjects:
['Database' 'Network' 'OS' 'Software']
Preferred study times:
['Afternoon' 'Evening' 'Midnight' 'Morning' 'Night']
Goals:
['High-term' 'Long-term' 'Low-term' 'Short-term']
Curriculum structure:
['Exam' 'Knowledge']
Available content:
['Interactive Exercises' 'Interactive exercises' 'Lectures' 'Quiz'
 'Reading Materials' 'Tutorial' 'Tutorials' nan]
External factors:
['Time Constraints' 'Upcoming Exams']
Classes learned by the label encoder for Y:
['Auditory' 'Kinesthetic' 'Visual']


In [104]:
import numpy as np

# Function to preprocess input data for testing
def preprocess_input(input_data, label_encoders_X):
    encoded_input = []
    for col, value in input_data.items():
        if col in label_encoders_X:
            # Use the corresponding LabelEncoder to transform feature variables
            encoded_input.append(label_encoders_X[col].transform([value])[0])
        else:
            # If the column is not categorical, use the value as is
            encoded_input.append(value)
    return np.array(encoded_input).reshape(1, -1)

# Define input data for testing
input_data = {
    'Proficiency level': 'Medium',
    'Preferred subjects': 'Software',
    'Preferred study times': 'Morning',
    'Goals': 'Short-term',
    'Curriculum structure': 'Exam',
    'Available content': 'Lectures',
    'External factors': 'Time Constraints',
    'Time spent on different types of content': 10,
    'Completion rates': 7,
    'Quiz scores': 80
}

# Preprocess input data for testing
preprocessed_input = preprocess_input(input_data, label_encoders_X)

# Make predictions using the trained model
predictions = model.predict(preprocessed_input)

# Get the predicted class
predicted_class_index = np.argmax(predictions)
predicted_class = label_encoder_Y.classes_[predicted_class_index]

print("Predicted Learning Style:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
Predicted Learning Style: Visual
