In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

# ---- Load dataset ----
data = pd.read_csv("medication_data.csv")

# Calculate Probability and Log Odds
data["Probability"] = data["#Cured"] / data["TotalPatients"]
data["Odds"] = data["Probability"] / (1 - data["Probability"])
data["LogOdds"] = np.log(data["Odds"])

print("Dataset with Calculations:")
print(data)

# ---- Prepare features and target ----
X = data["Dosage"].values.reshape(-1, 1)
y = data["Probability"].values

# Convert probabilities into binary outcomes for logistic regression
# We replicate each patient’s outcome for better fitting
X_expanded = []
y_expanded = []
for dosage, cured, total in zip(data["Dosage"], data["#Cured"], data["TotalPatients"]):
    X_expanded.extend([dosage] * total)
    y_expanded.extend([1] * cured + [0] * (total - cured))

X_expanded = np.array(X_expanded).reshape(-1, 1)
y_expanded = np.array(y_expanded)

# ---- Logistic Regression Model ----
model = LogisticRegression()
model.fit(X_expanded, y_expanded)

# Predict probabilities over a range
X_range = np.linspace(10, 60, 100).reshape(-1, 1)
probs = model.predict_proba(X_range)[:, 1]

# ---- Plot ----
plt.figure(figsize=(10, 5))

# Subplot 1: Log Odds vs Dosage
plt.subplot(1, 2, 1)
plt.scatter(data["Dosage"], data["LogOdds"], color="blue")
plt.plot(X_range, model.coef_[0][0] * X_range + model.intercept_[0], color="red")
plt.xlabel("Dosage")
plt.ylabel("Log Odds")
plt.title("Log Odds Regression")

# Subplot 2: Probability vs Dosage
plt.subplot(1, 2, 2)
plt.scatter(data["Dosage"], data["Probability"], color="blue")
plt.plot(X_range, probs, color="green")
plt.xlabel("Dosage")
plt.ylabel("Probability Cured")
plt.title("Sigmoid Curve")

plt.tight_layout()
plt.show()