In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Optional: for scaling numerical data
from sklearn.preprocessing import StandardScaler


In [18]:
# Load your dataset
df = pd.read_csv('Content_recommend.csv')

# Display the first few rows to check the data
df.head()

Unnamed: 0,Subject,Course Score,Learning Score,Quiz Score,Recommendation
0,Artificial Intelligence and Machine Learning,100,9,95,Artificial Intelligence and Machine Learning
1,Artificial Intelligence and Machine Learning,99,18,51,Artificial Intelligence and Machine Learning
2,Artificial Intelligence and Machine Learning,98,3,81,Artificial Intelligence and Machine Learning
3,Artificial Intelligence and Machine Learning,97,10,67,Artificial Intelligence and Machine Learning
4,Artificial Intelligence and Machine Learning,96,88,73,Data Science and Analytics


In [19]:
import joblib  # For saving and loading models and scalers

# Encode categorical columns
label_encoder_subject = LabelEncoder()
label_encoder_recommendation = LabelEncoder()

df['Subject'] = label_encoder_subject.fit_transform(df['Subject'])
df['Recommendation'] = label_encoder_recommendation.fit_transform(df['Recommendation'])

# Optional: Scale numerical features
scaler = StandardScaler()
df[['Course Score', 'Learning Score', 'Quiz Score']] = scaler.fit_transform(df[['Course Score', 'Learning Score', 'Quiz Score']])

# Save the encoders and scaler
joblib.dump(label_encoder_subject, 'label_encoder_subject.pkl')
joblib.dump(label_encoder_recommendation, 'label_encoder_recommendation.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Display the processed data
print(df.head())


   Subject  Course Score  Learning Score  Quiz Score  Recommendation
0        0      1.695674       -1.389133    1.539503               0
1        0      1.661075       -1.079805    0.033907               0
2        0      1.626475       -1.595351    1.060450               0
3        0      1.591876       -1.354763    0.581396               0
4        0      1.557277        1.326076    0.786705               3


In [20]:
# Separate features and target variable
X = df.drop('Recommendation', axis=1)
y = df['Recommendation']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data: {X_train.shape}, Testing data: {X_test.shape}")

Training data: (4044, 4), Testing data: (1012, 4)


In [21]:
# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [22]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
print(classification_report(y_test, y_pred))

# Optional: Confusion matrix
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.99
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        82
           1       1.00      0.96      0.98        95
           2       1.00      1.00      1.00        85
           3       1.00      0.98      0.99        84
           4       1.00      1.00      1.00        84
           5       1.00      1.00      1.00        78
           6       0.96      1.00      0.98       297
           7       1.00      0.96      0.98        76
           8       1.00      0.96      0.98        91
           9       1.00      1.00      1.00        40

    accuracy                           0.99      1012
   macro avg       1.00      0.99      0.99      1012
weighted avg       0.99      0.99      0.99      1012

[[ 82   0   0   0   0   0   0   0   0   0]
 [  0  91   0   0   0   0   4   0   0   0]
 [  0   0  85   0   0   0   0   0   0   0]
 [  0   0   0  82   0   0   2   0   0   0]
 [  0   0   0   0  84   0   0   0   0   0]
 [  0   0  

In [23]:
# Save the model
joblib.dump(model, 'classification_model.pkl')

['classification_model.pkl']

In [24]:
# Load the saved model and encoders
model = joblib.load('classification_model.pkl')
label_encoder_subject = joblib.load('label_encoder_subject.pkl')
label_encoder_recommendation = joblib.load('label_encoder_recommendation.pkl')
scaler = joblib.load('scaler.pkl')

# Example of predicting with new data
def predict_recommendation(subject, course_score, learning_score, quiz_score):
    # Transform the categorical input
    subject_encoded = label_encoder_subject.transform([subject])[0]

    # Create DataFrame for the new data
    new_data = pd.DataFrame([[subject_encoded, course_score, learning_score, quiz_score]],
                            columns=['Subject', 'Course Score', 'Learning Score', 'Quiz Score'])
    
    # Scale the features
    new_data[['Course Score', 'Learning Score', 'Quiz Score']] = scaler.transform(new_data[['Course Score', 'Learning Score', 'Quiz Score']])
    
    # Make prediction
    prediction_encoded = model.predict(new_data)
    
    # Transform the prediction back to text
    recommendation = label_encoder_recommendation.inverse_transform(prediction_encoded)
    
    return recommendation[0]

# Test with new data
subject = "Artificial Intelligence and Machine Learning"
course_score = 75
learning_score = 80
quiz_score = 85

print(f"Predicted Recommendation: {predict_recommendation(subject, course_score, learning_score, quiz_score)}")


Predicted Recommendation: IT Project Management
