In [3]:
!pip install pytest pytest-cov

Collecting pytest-cov
  Downloading pytest_cov-6.1.1-py3-none-any.whl.metadata (28 kB)
Collecting coverage>=7.5 (from coverage[toml]>=7.5->pytest-cov)
  Downloading coverage-7.8.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.5 kB)
Downloading pytest_cov-6.1.1-py3-none-any.whl (23 kB)
Downloading coverage-7.8.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.0/244.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: coverage, pytest-cov
Successfully installed coverage-7.8.0 pytest-cov-6.1.1


In [4]:
%%writefile course_recommender.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score

numeric_features = [
    'gpa', 'attendance', 'course_load_per_term', 'study_hours_per_week',
    'average_grade', 'fail_rate', 'student_difficulty_rating',
    'weekly_study_hours_required', 'study_hours_logged'
]

categorical_features = [
    'enrollment_status', 'major_x', 'highest_degree',
    'course_difficulty_experience', 'career_goal',
    'prerequisite_level', 'delivery_mode', 'completion_status'
]

def load_and_merge_data():
    students_df = pd.read_csv('students_final.csv')
    courses_df = pd.read_csv('courses_final.csv')
    history_df = pd.read_csv('student_course_history_final.csv')
    merged_df = history_df.merge(students_df, on='student_id').merge(courses_df, on='course_id')
    return merged_df

def build_pipeline(model_type='random_forest'):
    preprocessor = ColumnTransformer(transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

    if model_type == 'random_forest':
        model = RandomForestClassifier(n_estimators=150, max_depth=15, min_samples_split=2, random_state=42)
    elif model_type == 'logistic_regression':
        model = LogisticRegression(max_iter=500)
    elif model_type == 'svm':
        model = SVC(kernel='rbf', probability=True)
    else:
        raise ValueError("Invalid model type")

    pipe = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])
    return pipe

def train_and_evaluate_model(pipe, X_train, y_train, X_test, y_test):
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    return {'accuracy': acc, 'f1_score': f1, 'pipeline': pipe}

def generate_recommendations(merged_df):
    top_courses = merged_df.groupby('course_name').agg({
        'success': 'mean', 'popularity_score': 'mean'
    }).reset_index()
    top_courses = top_courses.sort_values(by=['success', 'popularity_score'], ascending=[False, False])
    return top_courses.head(10)


Writing course_recommender.py


In [5]:
%%writefile test_course_recommender.py
import pytest
import pandas as pd
from course_recommender import load_and_merge_data, build_pipeline, train_and_evaluate_model, generate_recommendations
from sklearn.model_selection import train_test_split

@pytest.fixture(scope="module")
def sample_data():
    return load_and_merge_data()

def test_data_loading(sample_data):
    assert not sample_data.empty
    assert 'success' in sample_data.columns

def test_pipeline_training(sample_data):
    X = sample_data[[
        'gpa', 'attendance', 'course_load_per_term', 'study_hours_per_week',
        'average_grade', 'fail_rate', 'student_difficulty_rating',
        'weekly_study_hours_required', 'study_hours_logged',
        'enrollment_status', 'major_x', 'highest_degree',
        'course_difficulty_experience', 'career_goal', 'prerequisite_level',
        'delivery_mode', 'completion_status']]
    y = sample_data['success']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    pipe = build_pipeline('random_forest')
    result = train_and_evaluate_model(pipe, X_train, y_train, X_test, y_test)
    assert 'accuracy' in result
    assert 'f1_score' in result
    assert result['accuracy'] >= 0

def test_generate_recommendations(sample_data):
    top_courses = generate_recommendations(sample_data)
    assert not top_courses.empty
    assert 'course_name' in top_courses.columns
    assert len(top_courses) <= 10


Writing test_course_recommender.py


In [6]:
!pytest --maxfail=1 --disable-warnings --cov=course_recommender test_course_recommender.py


platform linux -- Python 3.11.12, pytest-8.3.5, pluggy-1.5.0
rootdir: /content
plugins: cov-6.1.1, langsmith-0.3.24, typeguard-4.4.2, anyio-4.9.0
collected 3 items                                                              [0m

test_course_recommender.py [32m.[0m[32m.[0m[32m.[0m[32m                                           [100%][0m

_______________ coverage: platform linux, python 3.11.12-final-0 _______________

Name                    Stmts   Miss  Cover
-------------------------------------------
course_recommender.py      37      5    86%
-------------------------------------------
TOTAL                      37      5    86%
