In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [None]:
data=pd.read_csv('../input/fetal-health-classification/fetal_health.csv')
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data['fetal_health'].unique()

In [None]:
data['fetal_health']=data['fetal_health']-1 #target is 1,2,3 but tensorflow requires to start from 0.So subtract 1 from it

In [None]:
data['fetal_health'].unique()

Visualizing correlations

In [None]:
corr=data.corr()
plt.figure(figsize=(18,15))
sns.heatmap(corr,annot=True,vmin=-1.0)
plt.show()

In [None]:
#we are dropping highly correlated features in order to reduce redundancy
#histogram_min is highly negatively correlated 
#histogram_median and histogram_mode is highly positively correlated
data=data.drop(['histogram_min','histogram_median','histogram_mode'],axis=1)

In [None]:
data.head()

In [None]:
data.shape

Splitting/Scaling

In [None]:
y=data['fetal_health'].copy()
X=data.drop('fetal_health',axis=1).copy()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X=scaler.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,random_state=100)

Optimizing Regularization strength for Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
#here we have created model logistic regression inside the function to get maximum test accuracy
def get_reults(trial):
    C=trial.suggest_loguniform('C',0.00001,10000.0) 
    model=LogisticRegression(C=C)
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)
#0.00001 &10000.0 is small and large regularization strength
#loguniform function is used to get various small values of 0.00001 instead just uniform function
#the differences between 5000/7000/10000 are small but the difeerences between 0.001 or 0.1 or 0.0001 are large

In [None]:
study=optuna.create_study(direction='maximize')
study.optimize(get_reults,n_trials=100,show_progress_bar=True)

In [None]:
best_param=study.best_params
best_param

In [None]:
model=LogisticRegression(C=best_param['C'])
model.fit(X_train,y_train)
model.score(X_test,y_test)


In [None]:
#just putting random C value
model=LogisticRegression(C=0.01)
model.fit(X_train,y_train)
model.score(X_test,y_test)

Optimising hidden layer sizes(Neural Network)

In [None]:
X.shape

In [None]:
import tensorflow as tf
import tensorflow 

from tensorflow import keras
from keras.layers import Dense

In [None]:
#here again we create model Neural network inside function
def test_model(a,b, batch_size=32, epochs=100):
    inputs =tf.keras.Input(shape=(X.shape[1],))
    
    hidden_1=tf.keras.layers.Dense(a, activation='relu',)(inputs)
    hidden_2=tf.keras.layers.Dense(b, activation='relu',)(hidden_1)
    
    outputs=tf.keras.layers.Dense(3, activation='softmax',)(hidden_2)
    
    model=tf.keras.Model(inputs,outputs)
    
    model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
    )
    
    model.fit(
        X_train,
        y_train,
        validation_split=0.2,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
              monitor='val_loss',
              patience=3,
              restore_best_weights = True
            )
        ],
        verbose=0
    )
    
    return model.evaluate(X_test,y_test,verbose=0)

In [None]:
# we dont care here for small values hence loguniform is not used
def get_results(trial):
    a=trial.suggest_uniform('a',16,1024)
    b=trial.suggest_uniform('b',16,1024)
    return test_model(a,b)[0]


In [None]:
study=optuna.create_study(direction='minimize')
study.optimize(get_results,n_trials=100,show_progress_bar=True)

In [None]:
best_param=study.best_params
best_param

In [None]:
test_model(best_param['a'],best_param['b'])