In [1]:
# ✅ PART 1: Implement logistic regression using Python

# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

# Step 2: Create the dataset (study hours and pass/fail data)
data = {
    'STUDY_HOURS': [29, 15, 33, 48, 39],
    'PASS_FAIL': [0, 0, 1, 1, 1]  # 0 = Fail, 1 = Pass
}

# Step 3: Convert data to DataFrame (table format)
df = pd.DataFrame(data)

# Step 4: Separate the input (X) and output (y)
X = df[['STUDY_HOURS']]  # Study hours (feature/input)
y = df['PASS_FAIL']      # Pass/Fail result (label/output)

# Step 5: Train the logistic regression model
model = LogisticRegression()
model.fit(X, y)

# Show dataset and model training complete
print("📊 Dataset:\n", df)
print("\n✅ Logistic Regression model trained.")


📊 Dataset:
    STUDY_HOURS  PASS_FAIL
0           29          0
1           15          0
2           33          1
3           48          1
4           39          1

✅ Logistic Regression model trained.


In [2]:
# ✅ PART 2: Predict pass/fail for 25 and 42 study hours (Updated to avoid warning)

# Use a DataFrame with the correct column name
new_students = pd.DataFrame({'STUDY_HOURS': [25, 42]})
predictions = model.predict(new_students)

# Show prediction results
print("\n🔍 Predictions:")
print(f"Student with 25 hours: {'Pass' if predictions[0] == 1 else 'Fail'}")
print(f"Student with 42 hours: {'Pass' if predictions[1] == 1 else 'Fail'}")



🔍 Predictions:
Student with 25 hours: Fail
Student with 42 hours: Pass


In [3]:
# ✅ PART 3: Confusion matrix and performance metrics

# Step 8: Predict values on original dataset
y_pred = model.predict(X)

# Step 9: Import metrics and create confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Step 10: Get confusion matrix
cm = confusion_matrix(y, y_pred)

# Step 11: Extract values from confusion matrix
tn, fp, fn, tp = cm.ravel()

print("\n📋 Confusion Matrix Results:")
print("True Positives (TP):", tp)
print("False Positives (FP):", fp)
print("True Negatives (TN):", tn)
print("False Negatives (FN):", fn)

# Step 12: Calculate Accuracy, Error Rate, Precision, Recall
accuracy = accuracy_score(y, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)

# Step 13: Show results
print("\n📈 Performance Metrics:")
print("Accuracy     :", round(accuracy, 2))
print("Error Rate   :", round(error_rate, 2))
print("Precision    :", round(precision, 2))
print("Recall       :", round(recall, 2))



📋 Confusion Matrix Results:
True Positives (TP): 3
False Positives (FP): 0
True Negatives (TN): 2
False Negatives (FN): 0

📈 Performance Metrics:
Accuracy     : 1.0
Error Rate   : 0.0
Precision    : 1.0
Recall       : 1.0
