In [17]:
import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

# Generate synthetic dataset similar to diabetes dataset
np.random.seed(42)
n_samples = 200

# Creating features (independent variables)
data = pd.DataFrame({
    "age": np.random.randint(20, 80, n_samples),
    "bmi": np.random.uniform(18, 35, n_samples),
    "blood_pressure": np.random.uniform(90, 180, n_samples),
    "cholesterol": np.random.uniform(150, 300, n_samples),
    "glucose": np.random.uniform(70, 200, n_samples)
})

# Target variable (dependent variable) - some function of features + noise
data["target"] = (
    0.2 * data["age"] +
    0.5 * data["bmi"] +
    0.3 * data["blood_pressure"] +
    0.4 * data["cholesterol"] +
    0.6 * data["glucose"] +
    np.random.normal(0, 10, n_samples)
)

# Splitting data
X = data.drop(columns=["target"])
y = data["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Save model & scaler
joblib.dump(model, "regression_model.pkl")
joblib.dump(scaler, "scaler.pkl")
data.to_csv("synthetic_regression_data.csv", index=False)

print("Model and scaler saved successfully.")


Model and scaler saved successfully.
