In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [2]:
# Load your dataset
# Replace this with your actual dataset loading method
df = pd.read_csv('Path_updated.csv')

In [3]:
# Separate the features and the target variables
X = df.drop(columns=['Complexity', 'Learning Content'])
y_complexity = df['Complexity']
y_learning_content = df['Learning Content']

In [4]:
# Encode categorical features
label_encoders = {}
categorical_cols = X.select_dtypes(include=['object']).columns

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Standardize numerical features
scaler = StandardScaler()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Split the dataset for both targets
X_train_complexity, X_test_complexity, y_train_complexity, y_test_complexity = train_test_split(
    X, y_complexity, test_size=0.3, random_state=42
)
X_train_content, X_test_content, y_train_content, y_test_content = train_test_split(
    X, y_learning_content, test_size=0.3, random_state=42
)

In [5]:
# Train the model for predicting Complexity
model_complexity = RandomForestClassifier(random_state=42)
model_complexity.fit(X_train_complexity, y_train_complexity)

# Make predictions and evaluate the model
y_pred_complexity = model_complexity.predict(X_test_complexity)
print("Accuracy for 'Complexity':", accuracy_score(y_test_complexity, y_pred_complexity))
print("Classification Report for 'Complexity':\n", classification_report(y_test_complexity, y_pred_complexity))

# Save the model and label encoders
joblib.dump(model_complexity, 'model_complexity.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')
joblib.dump(scaler, 'scaler.pkl')


Accuracy for 'Complexity': 0.9994110718492344
Classification Report for 'Complexity':
               precision    recall  f1-score   support

    advanced       1.00      1.00      1.00       253
    beginner       1.00      1.00      1.00      1064
intermediate       1.00      1.00      1.00       381

    accuracy                           1.00      1698
   macro avg       1.00      1.00      1.00      1698
weighted avg       1.00      1.00      1.00      1698



['scaler.pkl']

In [6]:
# Train the model for predicting Learning Content
model_content = RandomForestClassifier(random_state=42)
model_content.fit(X_train_content, y_train_content)

# Make predictions and evaluate the model
y_pred_content = model_content.predict(X_test_content)
print("Accuracy for 'Learning Content':", accuracy_score(y_test_content, y_pred_content))
print("Classification Report for 'Learning Content':\n", classification_report(y_test_content, y_pred_content))

# Save the model
joblib.dump(model_content, 'model_content.pkl')


Accuracy for 'Learning Content': 0.8975265017667845
Classification Report for 'Learning Content':
               precision    recall  f1-score   support

  assignment       0.92      0.97      0.94       383
       audio       0.37      0.14      0.20        51
         pdf       0.30      0.23      0.26        44
        quiz       0.95      0.96      0.95       609
        text       0.19      0.09      0.12        45
       video       0.91      0.96      0.94       566

    accuracy                           0.90      1698
   macro avg       0.61      0.56      0.57      1698
weighted avg       0.87      0.90      0.88      1698



['model_content.pkl']

In [8]:
# Load models and encoders
model_complexity = joblib.load('model_complexity.pkl')
model_content = joblib.load('model_content.pkl')
label_encoders = joblib.load('label_encoders.pkl')
scaler = joblib.load('scaler.pkl')

# Example of new data (replace with actual new data)
new_data = pd.DataFrame({
    'Subject': ['Artificial Intelligence and Machine Learning'],
    'Course Score': [85],
    'Learning Score': [78],
    'quiz Score': [90]
})

# Encode categorical variables
for col in categorical_cols:
    new_data[col] = label_encoders[col].transform(new_data[col])

# Standardize numerical features
new_data[numerical_cols] = scaler.transform(new_data[numerical_cols])

# Predict using the models
complexity_prediction = model_complexity.predict(new_data)
content_prediction = model_content.predict(new_data)

# Decode predictions if needed
print("Predicted Complexity:", complexity_prediction[0])
print("Predicted Learning Content:", content_prediction[0])


Predicted Complexity: advanced
Predicted Learning Content: assignment
