In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [None]:
# Wczytanie i przekształcenia danych
file_path = '/content/drive/MyDrive/stroke/healthcare-dataset-stroke-data.csv'
data = pd.read_csv(file_path)
data = data.drop('id', axis=1).drop('avg_glucose_level', axis=1)

gender_mapping = {'Other': 2, 'Male': 0, 'Female': 1}
data['gender'] = data['gender'].map(gender_mapping)

married_mapping = {'No': 0, 'Yes': 1}
data['ever_married'] = data['ever_married'].map(married_mapping)

work_mapping = {'Never_worked': 0, 'Private': 1, 'Self-employed': 2, 'Govt_job': 3, 'children': 4}
data['work_type'] = data['work_type'].map(work_mapping)

residence_mapping = {'Rural': 0, 'Urban': 1}
data['Residence_type'] = data['Residence_type'].map(residence_mapping)

smoking_mapping = {'Unknown': 0, 'formerly smoked': 1, 'never smoked': 2, 'smokes': 3}
data['smoking_status'] = data['smoking_status'].map(smoking_mapping)

data = data.dropna()
data = data[data["gender"] != 2]

X = data.drop('stroke', axis=1)
y = data['stroke']


# Podzial na dane testowe i treningowe
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standaryzacja danych
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Definicja modelu sieci neuronowej
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Kompilacja modelu
model.compile(optimizer='adam', loss='mse')
# Trenowanie modelu
model.fit(X_train_scaled, y_train, epochs=50, batch_size=8, verbose=2)

Epoch 1/50
491/491 - 1s - loss: 0.0435 - 1s/epoch - 3ms/step
Epoch 2/50
491/491 - 1s - loss: 0.0378 - 620ms/epoch - 1ms/step
Epoch 3/50
491/491 - 1s - loss: 0.0365 - 639ms/epoch - 1ms/step
Epoch 4/50
491/491 - 1s - loss: 0.0363 - 590ms/epoch - 1ms/step
Epoch 5/50
491/491 - 1s - loss: 0.0358 - 610ms/epoch - 1ms/step
Epoch 6/50
491/491 - 1s - loss: 0.0353 - 606ms/epoch - 1ms/step
Epoch 7/50
491/491 - 1s - loss: 0.0352 - 612ms/epoch - 1ms/step
Epoch 8/50
491/491 - 1s - loss: 0.0351 - 625ms/epoch - 1ms/step
Epoch 9/50
491/491 - 1s - loss: 0.0350 - 609ms/epoch - 1ms/step
Epoch 10/50
491/491 - 1s - loss: 0.0348 - 608ms/epoch - 1ms/step
Epoch 11/50
491/491 - 1s - loss: 0.0348 - 601ms/epoch - 1ms/step
Epoch 12/50
491/491 - 1s - loss: 0.0344 - 613ms/epoch - 1ms/step
Epoch 13/50
491/491 - 1s - loss: 0.0342 - 619ms/epoch - 1ms/step
Epoch 14/50
491/491 - 1s - loss: 0.0341 - 606ms/epoch - 1ms/step
Epoch 15/50
491/491 - 1s - loss: 0.0339 - 763ms/epoch - 2ms/step
Epoch 16/50
491/491 - 1s - loss: 0.03

<keras.src.callbacks.History at 0x7b2fc410a410>

In [None]:
# Ocena modelu
mse = model.evaluate(X_test_scaled, y_test, verbose=0)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.05432045832276344


In [None]:
# Zapisanie modelu, jeśli potrzebne
import joblib
joblib.dump(model, '/content/drive/MyDrive/stroke/movie_revenue_predictor.pkl')

['/content/drive/MyDrive/stroke/movie_revenue_predictor.pkl']

In [None]:
# Możliwość predykcji na nowych danych (np. pojedynczych filmach)
new_data = pd.DataFrame([[1, 23, 0, 0, 0, 1, 1, 31, 3]], columns=['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'Residence_type', 'bmi', 'smoking_status'])
predicted = 'stroke' if model.predict(new_data)[0][0] > 0.5 else 'no stroke'
print(f'Predicted: {predicted}')

Predicted: no stroke


In [None]:
# z gotowego modelu
import pandas as pd
import joblib

model = joblib.load('/content/drive/MyDrive/stroke/movie_revenue_predictor.pkl')

new_data = pd.DataFrame([[1, 23, 0, 0, 0, 1, 1, 31, 3]], columns=['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'Residence_type', 'bmi', 'smoking_status'])
predicted = 'stroke' if model.predict(new_data)[0][0] > 0.5 else 'no stroke'
print(f'Predicted: {predicted}')

Predicted: no stroke


In [None]:
# Export danych
print(data.info())
test = data.drop('stroke', axis=1)

data['training_score'] = model.predict(test)

data['training_score'] = (data['training_score']-data['training_score'].min())/(data['training_score'].max()-data['training_score'].min())
data['training_score'] = round(data['training_score'])
data['training_score'] = data['training_score'].astype(int)


gender_mapping = {0: 'Male', 1: 'Female'}
data['gender'] = data['gender'].map(gender_mapping)

data[data["age"] < 1]["age"] = 0

hypertension_mapping = {0: 'No', 1: 'Yes'}
data['hypertension'] = data['hypertension'].map(hypertension_mapping)

heart_mapping = {0: 'No', 1: 'Yes'}
data['heart_disease'] = data['heart_disease'].map(heart_mapping)

married_mapping = {0: 'No', 1: 'Yes'}
data['ever_married'] = data['ever_married'].map(married_mapping)

work_mapping = {0: 'Never_worked', 1: 'Private', 2: 'Self-employed', 3: 'Govt_job', 4: 'children'}
data['work_type'] = data['work_type'].map(work_mapping)

residence_mapping = {0: 'Rural', 1: 'Urban'}
data['Residence_type'] = data['Residence_type'].map(residence_mapping)

smoking_mapping = {0: 'Unknown', 1: 'formerly smoked', 2: 'never smoked', 3: 'smokes'}
data['smoking_status'] = data['smoking_status'].map(smoking_mapping)

stroke_mapping = {0: 'No', 1: 'Yes'}
data['stroke'] = data['stroke'].map(stroke_mapping)

data.to_csv('/content/drive/MyDrive/stroke/test.csv')

<class 'pandas.core.frame.DataFrame'>
Index: 4908 entries, 0 to 5109
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   gender          4908 non-null   int64  
 1   age             4908 non-null   float64
 2   hypertension    4908 non-null   int64  
 3   heart_disease   4908 non-null   int64  
 4   ever_married    4908 non-null   int64  
 5   work_type       4908 non-null   int64  
 6   Residence_type  4908 non-null   int64  
 7   bmi             4908 non-null   float64
 8   smoking_status  4908 non-null   int64  
 9   stroke          4908 non-null   int64  
dtypes: float64(2), int64(8)
memory usage: 421.8 KB
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[data["age"] < 1]["age"] = 0
