In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:
file = 'healthcare_dataset.csv'

In [None]:
df = pd.read_csv('healthcare_dataset.csv')

In [None]:
df.head()

Unnamed: 0,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Doctor,Hospital,Insurance Provider,Billing Amount,Room Number,Admission Type,Discharge Date,Medication,Test Results
0,Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Matthew Smith,Sons and Miller,Blue Cross,18856.281306,328,Urgent,2024-02-02,Paracetamol,Normal
1,LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Samantha Davies,Kim Inc,Medicare,33643.327287,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Tiffany Mitchell,Cook PLC,Aetna,27955.096079,205,Emergency,2022-10-07,Aspirin,Normal
3,andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Kevin Wells,"Hernandez Rogers and Vang,",Medicare,37909.78241,450,Elective,2020-12-18,Ibuprofen,Abnormal
4,adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Kathleen Hanna,White-White,Aetna,14238.317814,458,Urgent,2022-10-09,Penicillin,Abnormal


In [None]:
features = ['Age', 'Gender', 'Blood Type', 'Medical Condition']
X = df[features].copy()
y = df['Test Results']

In [None]:
le = LabelEncoder()
for col in X.columns:
    X[col] = le.fit_transform(X[col])
y = le.fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 1. Линейная модель (Логистическая регрессия)
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
lr_acc = accuracy_score(y_test, lr.predict(X_test_scaled))
print(f"1. Линейная модель. Точность: {lr_acc:.4f}")

1. Линейная модель. Точность: 0.3359


In [None]:
# 2. Метод опорных векторов (SVM)
svm = SVC(kernel='rbf')
svm.fit(X_train_scaled, y_train)
svm_acc = accuracy_score(y_test, svm.predict(X_test_scaled))
print(f"2. SVM. Точность: {svm_acc:.4f}")

2. SVM. Точность: 0.3382


In [None]:
# 3. Дерево решений
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)
dt_acc = accuracy_score(y_test, dt.predict(X_test))
print(f"3. Дерево решений. Точность: {dt_acc:.4f}")

3. Дерево решений. Точность: 0.3339


In [None]:
 #4. Нейросеть
model_nn = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model_nn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_nn.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
raw_predictions = model_nn.predict(X_test_scaled, verbose=0)
nn_preds_indices = np.argmax(raw_predictions, axis=1)
nn_acc = accuracy_score(y_test, nn_preds_indices)
print(f"4. Нейросеть. Точность: {nn_acc:.4f}")

4. Нейросеть. Точность: 0.3394
