# Task for Today  

***

## Fetal Health Prediction  

Given *data about fetuses*, let's try to predict the **health** of a given fetus.  
  
We will Optuna to find the optimal hyperparameters for a logistic regression model and a neural network.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import optuna
from sklearn.linear_model import LogisticRegression
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [None]:
data = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')

In [None]:
data

In [None]:
data.info()

In [None]:
data['fetal_health'].unique()

In [None]:
data['fetal_health'] = data['fetal_health'] - 1

# Visualizing Correlations

In [None]:
corr = data.corr()

plt.figure(figsize=(18, 15))
sns.heatmap(corr, annot=True, vmin=-1.0)
plt.show()

In [None]:
data = data.drop(['histogram_min', 'histogram_mode', 'histogram_median'], axis=1)

# Splitting/Scaling

In [None]:
y = data['fetal_health'].copy()
X = data.drop('fetal_health', axis=1).copy()

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

# Optimizing Regularization Strength (Logistic Regression)

In [None]:
def get_results(trial):
    C = trial.suggest_loguniform('C', 0.00001, 10000.0)
    model = LogisticRegression(C=C)
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(get_results, n_trials=100, show_progress_bar=True)

In [None]:
best_params = study.best_params
best_params

In [None]:
model = LogisticRegression(C=best_params['C'])
model.fit(X_train, y_train)

model.score(X_test, y_test)

# Optimizing Hidden Layer Sizes (Neural Network)

In [None]:
X.shape

In [None]:
def test_model(a, b, batch_size=32, epochs=100):
    inputs = tf.keras.Input(shape=(X.shape[1],))
    
    hidden_1 = tf.keras.layers.Dense(a, activation='relu')(inputs)
    hidden_2 = tf.keras.layers.Dense(b, activation='relu')(hidden_1)
    
    outputs = tf.keras.layers.Dense(3, activation='softmax')(hidden_2)
    
    model = tf.keras.Model(inputs, outputs)
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    model.fit(
        X_train,
        y_train,
        validation_split=0.2,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=3,
                restore_best_weights=True
            )
        ],
        verbose=0
    )
    
    return model.evaluate(X_test, y_test, verbose=0)

In [None]:
def get_results(trial):
    a = trial.suggest_uniform('a', 16, 1024)
    b = trial.suggest_uniform('b', 16, 1024)
    return test_model(a, b)[0]

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(get_results, n_trials=100, show_progress_bar=True)

In [None]:
best_params = study.best_params
best_params

In [None]:
test_model(best_params['a'], best_params['b'])

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/j7WGVhF7g3M