- a) Import bibliotek

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib


- b) Import danych

In [None]:
df = pd.read_csv('pacjenci_demo_system_ekspertowy.csv')

- c) Analiza danych wej?ciowych

In [None]:
df[['age','bmi','glucose','systolic_bp','diastolic_bp']].describe()

- d) Wizualizacja danych

In [None]:
plt.figure(figsize=(8,4))
plt.hist(df['systolic_bp'], bins=15, alpha=0.7, label='SBP')
plt.hist(df['diastolic_bp'], bins=15, alpha=0.7, label='DBP')
plt.xlabel('Pressure')
plt.ylabel('Count')
plt.legend()
plt.tight_layout()
plt.show()


- e) Przygotowanie etykiety ryzyka

In [None]:
df['risk'] = ((df['systolic_bp'] >= 140) | (df['diastolic_bp'] >= 90)).astype(int)

- f) Przygotowanie danych do ML

In [None]:
X = df[['age','bmi','glucose','systolic_bp','diastolic_bp']]
y = df['risk']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


- g) Trenowanie modelu ML

In [None]:
model = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=1000))
])
model.fit(X_train, y_train)


- h) Ocena modelu

In [None]:
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)


- i) Zapis modelu

In [None]:
joblib.dump(model, 'risk_model.joblib')