In [1]:
# Import modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [2]:
# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(
    Path('full_data.csv')   
)

# Review the DataFrame
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
2,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
3,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81.0,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [3]:
#Find missing values (NaN or None) in the DataFrame
missing_values = df.isna()

# Display the DataFrame with missing value indicators
print(missing_values)

      gender    age  hypertension  heart_disease  ever_married  work_type  \
0      False  False         False          False         False      False   
1      False  False         False          False         False      False   
2      False  False         False          False         False      False   
3      False  False         False          False         False      False   
4      False  False         False          False         False      False   
...      ...    ...           ...            ...           ...        ...   
4976   False  False         False          False         False      False   
4977   False  False         False          False         False      False   
4978   False  False         False          False         False      False   
4979   False  False         False          False         False      False   
4980   False  False         False          False         False      False   

      Residence_type  avg_glucose_level    bmi  smoking_status  stroke  
0 

In [4]:
# Drop rows containing missing values (axis=0)
data_cleaned = df.dropna(axis=0)

data_cleaned

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
2,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
3,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81.0,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1
...,...,...,...,...,...,...,...,...,...,...,...
4976,Male,41.0,0,0,No,Private,Rural,70.15,29.8,formerly smoked,0
4977,Male,40.0,0,0,Yes,Private,Urban,191.15,31.1,smokes,0
4978,Female,45.0,1,0,Yes,Govt_job,Rural,95.02,31.8,smokes,0
4979,Male,40.0,0,0,Yes,Private,Rural,83.94,30.0,smokes,0


In [5]:
import psycopg2
import json
#change database creds to postgres db
with open('create_database_creds.json') as O:
    config = json.load(O)
username = config['user']
password = config['password']
db_host = config['host']
db_port = config['port']
db_ = 'stroke_db'
conn = psycopg2.connect(
    host=db_host,
    port=db_port,
    user=username,
    password=password,
)
conn.autocommit = True
cursor = conn.cursor()
#create database to house table
cursor.execute(f"CREATE DATABASE {db_}")
cursor.close()
conn.close()

DuplicateDatabase: database "stroke_db" already exists


In [6]:
from sqlalchemy import create_engine
#upload dataframe into postgres
# Replace these values with your PostgreSQL database credentials
db_username = config['user']
db_password = config['password']
db_host = config['host']
db_port = config['port']
db_name = 'stroke_db'

#open engine
connection_path = f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_path)

table_name = 'stroke_table'
#import data into db
data_cleaned.to_sql(table_name, engine, if_exists='replace', index=False)


981

In [7]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on your data (computes mean and standard deviation)
scaler.fit(df[['age', 'avg_glucose_level', 'bmi']])

# Transform the numeric columns to scale the data
scaled_data = scaler.transform(df[['age', 'avg_glucose_level', 'bmi']])

# Replace the original numeric columns with scaled values
df[['age', 'avg_glucose_level', 'bmi']] = scaled_data

print(df)


      gender       age  hypertension  heart_disease ever_married  \
0       Male  1.040584             0              1          Yes   
1       Male  1.614270             0              1          Yes   
2     Female  0.246250             0              0          Yes   
3     Female  1.570141             1              0          Yes   
4       Male  1.658400             0              0          Yes   
...      ...       ...           ...            ...          ...   
4976    Male -0.106788             0              0           No   
4977    Male -0.150917             0              0          Yes   
4978  Female  0.069731             1              0          Yes   
4979    Male -0.150917             0              0          Yes   
4980  Female  1.614270             1              0          Yes   

          work_type Residence_type  avg_glucose_level       bmi  \
0           Private          Urban           2.723411  1.193238   
1           Private          Rural          -0.00

In [8]:
# Use get_dummies to one-hot encode the 'Color' column
encoded_data = pd.get_dummies(df, columns=['gender','ever_married', 'work_type', 'Residence_type', 'smoking_status'])

encoded_data.head()

Unnamed: 0,age,hypertension,heart_disease,avg_glucose_level,bmi,stroke,gender_Female,gender_Male,ever_married_No,ever_married_Yes,work_type_Govt_job,work_type_Private,work_type_Self-employed,work_type_children,Residence_type_Rural,Residence_type_Urban,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes
0,1.040584,0,1,2.723411,1.193238,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0
1,1.61427,0,1,-0.000523,0.58939,1,0,1,0,1,0,1,0,0,1,0,0,0,1,0
2,0.24625,0,0,1.448529,0.869222,1,1,0,0,1,0,1,0,0,0,1,0,0,0,1
3,1.570141,1,0,1.51265,-0.662492,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0
4,1.6584,0,0,1.780895,0.073909,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0


In [1]:

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))



Num GPUs Available:  0


In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[]

SyntaxError: invalid syntax (1907596635.py, line 1)