In [0]:
%pip install pandas scikit-learn joblib


[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import pandas as pd
import random
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# -----------------------------
# 1. Generate synthetic data
# -----------------------------
random.seed(42)
data = []

for _ in range(500):
    study_hours = round(random.uniform(0, 6), 1)
    attendance = random.randint(50, 100)
    previous_score = random.randint(30, 100)
    assignments = random.randint(40, 100)

    pass_fail = (
        study_hours > 2 and
        attendance > 70 and
        previous_score > 50 and
        assignments > 60
    )

    data.append([
        study_hours,
        attendance,
        previous_score,
        assignments,
        int(pass_fail)
    ])

df = pd.DataFrame(
    data,
    columns=[
        "study_hours",
        "attendance_pct",
        "previous_score",
        "assignments_completed",
        "pass"
    ]
)

# -----------------------------
# 2. Prepare data
# -----------------------------
X = df.drop("pass", axis=1)
y = df["pass"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 3. Train model
# -----------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# -----------------------------
# 4. Evaluate model
# -----------------------------
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# -----------------------------
# 5. Log to MLflow (SAFE)
# -----------------------------
mlflow.end_run()  # Ensures no active run exists

with mlflow.start_run(run_name="student_pass_logistic_regression") as run:
    mlflow.log_param("max_iter", 1000)
    mlflow.log_metric("accuracy", accuracy)

    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="student_pass_model"
    )

    model_uri = f"runs:/{run.info.run_id}/student_pass_model"

print("✅ Model logged to MLflow")
print("📌 Model URI:", model_uri)
print("🎯 Accuracy:", round(accuracy, 4))




✅ Model logged to MLflow
📌 Model URI: runs:/b9e4c31bd20d462795b732cea7ce8412/student_pass_model
🎯 Accuracy: 0.83
