In [14]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [42]:
import warnings
warnings.filterwarnings('ignore')

In [103]:
data = pd.read_excel(r"C:\Users\sivak\Excel_files\Assessment_Data.xlsx")
data

Unnamed: 0,firstName,lastName,updatedOn,assessmentTitle,assessmentID,totalNumberQuestions,questionsDone,overallScore,GRADE
0,Little Jimmy,Jr.,2023-11-27 16:40:13.329,Interpreting the weather,,0,0,0.000000,1
1,Learnt,Africa,2023-08-07 08:46:48.538,Basic Addition Part One,,0,0,0.000000,1
2,Collins,Kesuibai,2023-07-06 16:51:03.625,Morning Greetings,,0,0,0.000000,1
3,Learnt,Africa,2022-04-22 20:19:30.877,,e2qxi19gMDjRHzmJ0EuJ,2,2,1.000000,1
4,Mark,Moriama,2022-04-22 21:48:30.971,,WQIiM5iXz8tqUwyi6n1K,2,2,0.500000,1
...,...,...,...,...,...,...,...,...,...
90,Collins,Kesuibai,2022-08-24 18:35:11.584,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,11,1.800000,1
91,Learnt,Africa,2023-12-27 10:57:42.419,Counting Numbers 20 - 30,N6EFBOe2aCeglrUR2qUy,7,7,1.000000,1
92,Learnt,Africa,2024-05-05 14:44:12.761,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,7,1.000000,1
93,Mark,Moriama,2022-06-20 10:07:53.693,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,7,0.571429,1


In [105]:
data['studentID'] = data['firstName'] + ' ' + data['lastName']

In [113]:
# Create a pivot table
pivot_table = data.pivot_table(index='studentID', columns='assessmentTitle', values='overallScore', fill_value=0)
pivot_table

assessmentTitle,Afternoon Greetings,Alphabets Lesson 1 (A-M),Alphabets Lesson 2 (Upper and Lower Case),Basic Addition Part One,Counting Numbers 1-10,Counting Numbers 10 - 20,Counting Numbers 20 - 30,Courtesy Words,Environmental Studies,Evening Greetings,Interpreting the weather,Morning Greetings,Our solar system,The Boy and the North Wind
studentID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Anne Other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8
Bontle Shezi,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Collins Kesuibai,0.8,0.833333,1.8,1.0,1.8,0.2,0.8,0.0,1.6,1.0,0.0,0.4,0.0,0.4
Huy Tran Duong Tuan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0
Irators Sounds,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8,0.0,0.0
Joash Olum Boys Demo,0.0,1.5,2.6,2.4,0.0,0.8,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0
Joash Olum Girls,0.6,1.166667,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0
Keren Tangus,0.0,0.666667,1.0,0.0,0.0,3.6,1.1,0.0,0.0,2.6,0.0,0.8,2.0,1.0
Kesuibai Collins,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0
Kubrate Space,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [115]:
# Perform matrix factorization using TruncatedSVD
svd = TruncatedSVD(n_components=10, random_state=42)  # Reduce dimensions to 10 latent factors
latent_matrix = svd.fit_transform(pivot_table)
student_similarity = cosine_similarity(latent_matrix)

In [117]:
students = pivot_table.index

In [124]:
def get_assessment_recommendations(student_id, top_n=5):
    student_idx = np.where(students == student_id)[0][0]
    sim_scores = student_similarity[student_idx]  # Get the similarity scores for this student with all other students
    similar_students_idx = np.argsort(sim_scores)[::-1][1:top_n + 1]  # Find the indices of the top similar students (excluding the student itself)
    similar_students = pivot_table.iloc[similar_students_idx]  # Get the data for similar students
    
    # Find the assessments the student has already completed
    completed_assessments = pivot_table.columns[pivot_table.iloc[student_idx] > 0]
    recommendations = similar_students.mean().drop(completed_assessments).sort_values(ascending=False)  # Compute the average score for each assessment
    
    return recommendations.head(top_n).index.tolist()

In [128]:
# Generate recommendations for all students
for student_id in students:
    assessment_titles = get_assessment_recommendations(student_id, top_n=10)
    overall_score = pivot_table.loc[student_id].sum()  # Calculate the overall score for the student
    
    # Print the recommendations for this student
    print(f"Recommendations for {student_id} (Overall Score: {overall_score}):")
    for title in assessment_titles:
        print(f"- {title}")
    print("\n")  


Recommendations for Anne Other (Overall Score: 0.8):
- Alphabets Lesson 2 (Upper and Lower Case)
- Counting Numbers 10 - 20
- Alphabets Lesson 1 (A-M)
- Counting Numbers 1-10
- Basic Addition Part One
- Evening Greetings
- Counting Numbers 20 - 30
- Environmental Studies
- Morning Greetings
- Afternoon Greetings


Recommendations for Bontle Shezi (Overall Score: 0.4):
- Alphabets Lesson 2 (Upper and Lower Case)
- Counting Numbers 10 - 20
- Alphabets Lesson 1 (A-M)
- Counting Numbers 1-10
- Evening Greetings
- Counting Numbers 20 - 30
- Environmental Studies
- Afternoon Greetings
- Morning Greetings
- Our solar system


Recommendations for Collins Kesuibai (Overall Score: 10.633333333333335):
- Our solar system
- Courtesy Words
- Interpreting the weather


Recommendations for Huy Tran Duong Tuan (Overall Score: 0.6):
- Alphabets Lesson 2 (Upper and Lower Case)
- Counting Numbers 10 - 20
- Alphabets Lesson 1 (A-M)
- Basic Addition Part One
- Evening Greetings
- Counting Numbers 1-10
- Co

### Explanation:

- **TruncatedSVD:** It uses a technique to simplify the data. Think of it like shrinking a big picture to highlight the important parts. This helps to find hidden patterns in students' performances.
- **cosine_similarity:** It works on how similar each student is to every other student based on their assessment scores.

#### Build a recommendation system using ML models along with neural network system

In [7]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
# Load dataset
df = pd.read_excel(r"C:\Users\sivak\Excel_files\Assessment_Data.xlsx")
df

Unnamed: 0,firstName,lastName,updatedOn,assessmentTitle,assessmentID,totalNumberQuestions,questionsDone,overallScore,GRADE
0,Little Jimmy,Jr.,2023-11-27 16:40:13.329,Interpreting the weather,,0,0,0.000000,1
1,Learnt,Africa,2023-08-07 08:46:48.538,Basic Addition Part One,,0,0,0.000000,1
2,Collins,Kesuibai,2023-07-06 16:51:03.625,Morning Greetings,,0,0,0.000000,1
3,Learnt,Africa,2022-04-22 20:19:30.877,,e2qxi19gMDjRHzmJ0EuJ,2,2,1.000000,1
4,Mark,Moriama,2022-04-22 21:48:30.971,,WQIiM5iXz8tqUwyi6n1K,2,2,0.500000,1
...,...,...,...,...,...,...,...,...,...
90,Collins,Kesuibai,2022-08-24 18:35:11.584,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,11,1.800000,1
91,Learnt,Africa,2023-12-27 10:57:42.419,Counting Numbers 20 - 30,N6EFBOe2aCeglrUR2qUy,7,7,1.000000,1
92,Learnt,Africa,2024-05-05 14:44:12.761,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,7,1.000000,1
93,Mark,Moriama,2022-06-20 10:07:53.693,Counting Numbers 1-10,N6EFBOe2aCeglrUR2qUy,7,7,0.571429,1


In [10]:
df['studentID'] = df['firstName'] + ' ' + df['lastName']

In [11]:
scaler = StandardScaler()
df['scaled_scores'] = scaler.fit_transform(df[['overallScore']])

In [14]:
# Splitting data into training and testing
X = df.drop(columns=['assessmentTitle', 'studentID', 'firstName', 'lastName', 'updatedOn', 'assessmentID'])
y = df['GRADE']

# Adjust the target labels to start from 0
y = y - 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Encoding the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [21]:
# Random Forest model to predict subjects
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

In [23]:
# Prediction and evaluation
y_pred = model_rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       0.00      0.00      0.00         2
           2       0.75      1.00      0.86         3

    accuracy                           0.89        19
   macro avg       0.56      0.67      0.61        19
weighted avg       0.81      0.89      0.85        19



In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [27]:
# Building a neural network model
model_dl = Sequential()
model_dl.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_dl.add(Dense(32, activation='relu'))
model_dl.add(Dense(len(y_train.unique()), activation='softmax'))

In [29]:
# Compiling the model
model_dl.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dl.fit(X_train, y_train, epochs=20, batch_size=12)  # Training the model

Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.1466 - loss: 1.9888    
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4797 - loss: 1.0175 
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8305 - loss: 0.5364 
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8557 - loss: 0.4412 
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8864 - loss: 0.3894 
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8425 - loss: 0.4150 
Epoch 7/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8258 - loss: 0.4724 
Epoch 8/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8107 - loss: 0.4466 
Epoch 9/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x1ebe1021a30>

In [30]:
# Evaluating the model
loss, accuracy = model_dl.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step - accuracy: 0.8947 - loss: 0.4434
Test Accuracy: 0.8947368264198303


In [31]:
df['overallScore'].value_counts()

overallScore
0.000000    29
1.000000    18
0.800000    12
0.200000     6
0.400000     6
0.600000     5
1.800000     3
0.750000     3
2.600000     2
0.833333     2
2.000000     1
0.500000     1
3.600000     1
1.600000     1
2.400000     1
1.166667     1
0.666667     1
1.500000     1
0.571429     1
Name: count, dtype: int64

In [32]:
min_score = 1.00  # Minimum score threshold
recommendations_dict = {}  # Create an empty dictionary

# each student to generate recommendations based on overallscores
for student_id, group in df.groupby('studentID'):
    overall_score = group['overallScore'].values[0]  # Get the overall score for the student
    
    # Generate recommendations only for students with overall scores below the minimum score
    if overall_score < min_score:
        attempted_assessments = group['assessmentTitle'].unique()  # assessments that the student has attempted

        all_assessments = df['assessmentTitle'].unique()  # Get all assessments in the dataset
        unattempted_assessments = set(all_assessments) - set(attempted_assessments)  # Identify assessments that the student has not attempted

        # Store recommendations in dictionary
        if student_id in recommendations_dict:
            recommendations_dict[student_id].extend(unattempted_assessments)
        else:
            recommendations_dict[student_id] = list(unattempted_assessments)  # Create a new list for this student

### Output the recommendations directly for students with low scores

In [38]:
for student_id, assessments in recommendations_dict.items():
    unique_assessments = list(set(map(str, assessments)))  # Convert all assessments to strings and remove duplicates
    overall_score = df[df['studentID'] == student_id]['overallScore'].values[0]  # Get the overall score
    print(f"Recommendations for {student_id} (Overall Score: {overall_score:.2f}):")
    for title in unique_assessments:  # Use unique assessments for output
        print(f"- {title}")
    print("\n")

Recommendations for Anne Other (Overall Score: 0.80):
- Interpreting the weather
- Alphabets Lesson 1 (A-M)
- Counting Numbers 10 - 20
- nan
- Environmental Studies
- Counting Numbers 20 - 30
- Alphabets Lesson 2 (Upper and Lower Case)
- Basic Addition Part One
- Our solar system
- Afternoon Greetings
- Morning Greetings
- Counting Numbers 1-10
- Courtesy Words
- Evening Greetings


Recommendations for Bontle Shezi (Overall Score: 0.40):
- Interpreting the weather
- Alphabets Lesson 1 (A-M)
- Counting Numbers 10 - 20
- nan
- Environmental Studies
- Counting Numbers 20 - 30
- Alphabets Lesson 2 (Upper and Lower Case)
- Our solar system
- The Boy and the North Wind
- Afternoon Greetings
- Morning Greetings
- Counting Numbers 1-10
- Courtesy Words
- Evening Greetings


Recommendations for Collins Kesuibai (Overall Score: 0.00):
- nan
- Courtesy Words


Recommendations for Huy Tran Duong Tuan (Overall Score: 0.60):
- Interpreting the weather
- Alphabets Lesson 1 (A-M)
- Counting Numbers 10

In [40]:
# Neural network prediction on test set
predictions_dl = model_dl.predict(X_test)
predicted_classes_dl = predictions_dl.argmax(axis=1)

# Output the neural network's predicted classes
print(f"Neural Network Predicted classes: {predicted_classes_dl}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
Neural Network Predicted classes: [0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 2]
