1) Predict loan default from income and credit score Description: predict whether a borrower defaults (1) or not (0) from annual_income (₹lakhs) and credit_score.

In [4]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Loan Default)
# ==========================================================
csv_data = """
annual_income_lakhs,credit_score,default
2.5,520,1
3.0,580,0
4.5,600,0
1.8,480,1
5.0,700,0
2.0,500,1
6.0,720,0
3.5,610,0
2.2,490,1
4.0,650,0
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["annual_income_lakhs", "credit_score"]].values
y = df["default"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Display Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["annual_income_lakhs", "credit_score"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_default"] = probs
results["predicted_default"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,annual_income_lakhs,credit_score,default
0,2.5,520,1
1,3.0,580,0
2,4.5,600,0
3,1.8,480,1
4,5.0,700,0
5,2.0,500,1
6,6.0,720,0
7,3.5,610,0
8,2.2,490,1
9,4.0,650,0



✅ Model Trained Successfully

Intercept (β0): 105.95304909066616
Coefficients (β):
  annual_income_lakhs: -0.0017
  credit_score: -0.1926

✅ Predictions:


Unnamed: 0,annual_income_lakhs,credit_score,default,probability_default,predicted_default
0,2.5,520,1,0.9969196,1
1,3.0,580,0,0.003081548,0
2,4.5,600,0,6.542952e-05,0
3,1.8,480,1,0.9999986,1
4,5.0,700,0,2.815041e-13,0
5,2.0,500,1,0.9999345,1
6,6.0,720,0,5.964174e-15,0
7,3.5,610,0,9.548805e-06,0
8,2.2,490,1,0.9999905,1
9,4.0,650,0,4.297368e-09,0



✅ Model Evaluation:
Confusion Matrix:
 [[6 0]
 [0 4]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000


2) Predict disease presence from age and BMI Description: predict disease (1 = disease present) from age (years) and BMI.

In [2]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Disease Prediction)
# ==========================================================
csv_data = """
age,bmi,disease
25,22.0,0
30,24.5,0
45,28.0,1
50,31.5,1
35,26.0,0
60,33.0,1
40,27.5,0
55,30.0,1
48,29.0,1
29,23.5,0
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["age", "bmi"]].values
y = df["disease"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Display Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["age", "bmi"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_disease"] = probs
results["predicted_disease"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,age,bmi,disease
0,25,22.0,0
1,30,24.5,0
2,45,28.0,1
3,50,31.5,1
4,35,26.0,0
5,60,33.0,1
6,40,27.5,0
7,55,30.0,1
8,48,29.0,1
9,29,23.5,0



✅ Model Trained Successfully

Intercept (β0): -34.71838715184588
Coefficients (β):
  age: 0.7563
  bmi: 0.0948

✅ Predictions:


Unnamed: 0,age,bmi,disease,probability_disease,predicted_disease
0,25,22.0,0,1e-06,0
1,30,24.5,0,6.1e-05,0
2,45,28.0,1,0.877461,1
3,50,31.5,1,0.997721,1
4,35,26.0,0,0.003068,0
5,60,33.0,1,0.999999,1
6,40,27.5,0,0.134688,0
7,55,30.0,1,0.99994,1
8,48,29.0,1,0.987032,1
9,29,23.5,0,2.6e-05,0



✅ Model Evaluation:
Confusion Matrix:
 [[5 0]
 [0 5]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000


3) Predict churn from months_active and last_month_spend Description: customer churn (1=yes) from tenure (months) and last month spend (₹).

In [3]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Churn Prediction)
# ==========================================================
csv_data = """
months_active,last_month_spend,churn
2,120,1
5,300,0
12,250,0
1,50,1
8,180,0
3,90,1
20,400,0
6,150,0
4,80,1
15,320,0
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["months_active", "last_month_spend"]].values
y = df["churn"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Display Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["months_active", "last_month_spend"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_churn"] = probs
results["predicted_churn"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,months_active,last_month_spend,churn
0,2,120,1
1,5,300,0
2,12,250,0
3,1,50,1
4,8,180,0
5,3,90,1
6,20,400,0
7,6,150,0
8,4,80,1
9,15,320,0



✅ Model Trained Successfully

Intercept (β0): 40.77285199793662
Coefficients (β):
  months_active: -0.0401
  last_month_spend: -0.3008

✅ Predictions:


Unnamed: 0,months_active,last_month_spend,churn,probability_churn,predicted_churn
0,2,120,1,0.989976,1
1,5,300,0,2.662694e-22,0
2,12,250,0,6.85357e-16,0
3,1,50,1,1.0,1
4,8,180,0,1.124824e-06,0
5,3,90,1,0.9999987,1
6,20,400,0,1.256261e-35,0
7,6,150,0,0.01002405,0
8,4,80,1,0.9999999,1
9,15,320,0,4.346746e-25,0



✅ Model Evaluation:
Confusion Matrix:
 [[6 0]
 [0 4]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000


4) Predict pass/fail from hours studied and prior_grade Description: student pass (1) vs fail (0) with predictors hours studied and prior exam grade (%).

In [5]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Pass/Fail Prediction)
# ==========================================================
csv_data = """
hours,prior_grade,pass
1.0,45,0
2.5,55,0
3.0,60,1
4.0,65,1
0.5,40,0
5.0,75,1
3.5,62,1
2.0,50,0
4.5,70,1
1.5,48,0
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["hours", "prior_grade"]].values
y = df["pass"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Show Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["hours", "prior_grade"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_pass"] = probs
results["predicted_pass"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,hours,prior_grade,pass
0,1.0,45,0
1,2.5,55,0
2,3.0,60,1
3,4.0,65,1
4,0.5,40,0
5,5.0,75,1
6,3.5,62,1
7,2.0,50,0
8,4.5,70,1
9,1.5,48,0



✅ Model Trained Successfully

Intercept (β0): -44.811655623608
Coefficients (β):
  hours: 0.0866
  prior_grade: 0.7771

✅ Predictions:


Unnamed: 0,hours,prior_grade,pass,probability_pass,predicted_pass
0,1.0,45,0,5.8e-05,0
1,2.5,55,0,0.135223,0
2,3.0,60,1,0.888268,1
3,4.0,65,1,0.997636,1
4,0.5,40,0,1e-06,0
5,5.0,75,1,0.999999,1
6,3.5,62,1,0.975172,1
7,2.0,50,0,0.003066,0
8,4.5,70,1,0.999953,1
9,1.5,48,0,0.000622,0



✅ Model Evaluation:
Confusion Matrix:
 [[5 0]
 [0 5]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000


5) Predict email spam from number of links and has_attachment Description: spam (1) vs not (0) using link_count and has_attachment (0/1).

In [6]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Spam Prediction)
# ==========================================================
csv_data = """
link_count,has_attachment,spam
0,0,0
3,1,1
1,0,0
5,1,1
2,0,0
4,1,1
0,0,0
6,1,1
2,0,0
1,1,0
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["link_count", "has_attachment"]].values
y = df["spam"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Show Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["link_count", "has_attachment"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_spam"] = probs
results["predicted_spam"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,link_count,has_attachment,spam
0,0,0,0
1,3,1,1
2,1,0,0
3,5,1,1
4,2,0,0
5,4,1,1
6,0,0,0
7,6,1,1
8,2,0,0
9,1,1,0



✅ Model Trained Successfully

Intercept (β0): -3.813519344649295
Coefficients (β):
  link_count: 1.2293
  has_attachment: 0.5237

✅ Predictions:


Unnamed: 0,link_count,has_attachment,spam,probability_spam,predicted_spam
0,0,0,0,0.021594,0
1,3,1,1,0.598191,1
2,1,0,0,0.070158,0
3,5,1,1,0.945651,1
4,2,0,0,0.205053,0
5,4,1,1,0.835784,1
6,0,0,0,0.021594,0
7,6,1,1,0.983467,1
8,2,0,0,0.205053,0
9,1,1,0,0.112988,0



✅ Model Evaluation:
Confusion Matrix:
 [[6 0]
 [0 4]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000


6) Predict passanger boarding from weather_clear and day_of_week (binary simplified)
Description: whether a commuter boards (1) vs skips (0) using weather_clear (1/0) and is_weekday (1/0).

In [7]:
import pandas as pd
from io import StringIO
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)

# ==========================================================
# ✅ STEP 1: Dataset (Passenger Boarding)
# ==========================================================
csv_data = """
weather_clear,is_weekday,board
1,1,1
0,1,0
1,0,1
0,0,0
1,1,1
1,1,1
0,1,0
1,0,1
0,0,0
1,1,1
"""

df = pd.read_csv(StringIO(csv_data))
print("✅ Dataset Loaded:")
display(df)

# ==========================================================
# ✅ STEP 2: Split X and Y
# ==========================================================
X = df[["weather_clear", "is_weekday"]].values
y = df["board"].values

# ==========================================================
# ✅ STEP 3: Train Logistic Regression
# ==========================================================
model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(X, y)

print("\n✅ Model Trained Successfully\n")

# ==========================================================
# ✅ STEP 4: Show Coefficients
# ==========================================================
print("Intercept (β0):", model.intercept_[0])
print("Coefficients (β):")
for col, coef in zip(["weather_clear", "is_weekday"], model.coef_[0]):
    print(f"  {col}: {coef:.4f}")

# ==========================================================
# ✅ STEP 5: Predictions & Probabilities
# ==========================================================
probs = model.predict_proba(X)[:, 1]
preds = (probs >= 0.5).astype(int)

results = df.copy()
results["probability_board"] = probs
results["predicted_board"] = preds

print("\n✅ Predictions:")
display(results)

# ==========================================================
# ✅ STEP 6: Evaluation Metrics
# ==========================================================
cm = confusion_matrix(y, preds)
acc = accuracy_score(y, preds)
prec = precision_score(y, preds)
rec = recall_score(y, preds)
f1 = f1_score(y, preds)
auc = roc_auc_score(y, probs)

print("\n✅ Model Evaluation:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy:  {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall:    {rec:.3f}")
print(f"F1 Score:  {f1:.3f}")
print(f"AUC:       {auc:.3f}")


✅ Dataset Loaded:


Unnamed: 0,weather_clear,is_weekday,board
0,1,1,1
1,0,1,0
2,1,0,1
3,0,0,0
4,1,1,1
5,1,1,1
6,0,1,0
7,1,0,1
8,0,0,0
9,1,1,1



✅ Model Trained Successfully

Intercept (β0): -0.5669400456667222
Coefficients (β):
  weather_clear: 1.5284
  is_weekday: 0.1707

✅ Predictions:


Unnamed: 0,weather_clear,is_weekday,board,probability_board,predicted_board
0,1,1,1,0.756231,1
1,0,1,0,0.402211,0
2,1,0,1,0.723411,1
3,0,0,0,0.361943,0
4,1,1,1,0.756231,1
5,1,1,1,0.756231,1
6,0,1,0,0.402211,0
7,1,0,1,0.723411,1
8,0,0,0,0.361943,0
9,1,1,1,0.756231,1



✅ Model Evaluation:
Confusion Matrix:
 [[4 0]
 [0 6]]
Accuracy:  1.000
Precision: 1.000
Recall:    1.000
F1 Score:  1.000
AUC:       1.000
