In [None]:
import pandas as pd
fetal=pd.read_csv('../input/fetal-health-classification/fetal_health.csv')
fetal.head()

In [None]:
fetal.info()

In [None]:
fetal.describe()

In [None]:
round(fetal.fetal_health.value_counts()*100/len(fetal),2)

In [None]:
round(fetal.severe_decelerations.value_counts()*100/len(fetal),2)

In [None]:
round(fetal.histogram_tendency.value_counts()*100/len(fetal),2)

In [None]:
fetal.nunique().sort_values(ascending=True)

In [None]:
fetal.skew().sort_values(ascending=True)

In [None]:
fetal=fetal.drop_duplicates()
fetal.shape

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
for i in fetal.columns.tolist():
    plt.figsize=(10,5)
    sns.boxplot(y=fetal[i])
    plt.show()

In [None]:
!pip install pycaret

In [None]:
data = fetal.sample(frac=0.95, random_state=42)
data_unseen = fetal.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(data.shape))

In [None]:
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
from pycaret.classification import *

In [None]:
fetal_helath= setup(data = data, target = 'fetal_health', session_id=42,
                  normalize = True, 
                  transformation = True, 
                  log_experiment = True,
                  handle_unknown_categorical = True, 
                  unknown_categorical_method = 'most_frequent',
                  remove_multicollinearity = True, #drop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level
                  numeric_imputation='median',
                    normalize_method='robust',    
                    numeric_features=['baseline value', 'accelerations', 'fetal_movement',
       'uterine_contractions', 'light_decelerations',
       'prolongued_decelerations', 'abnormal_short_term_variability',
       'mean_value_of_short_term_variability',
       'percentage_of_time_with_abnormal_long_term_variability',
       'mean_value_of_long_term_variability', 'histogram_width',
       'histogram_min', 'histogram_max', 'histogram_number_of_peaks',
       'histogram_number_of_zeroes', 'histogram_mode', 'histogram_mean',
       'histogram_median', 'histogram_variance'],
                    categorical_features=['severe_decelerations','histogram_tendency'],
                    
                    
           fix_imbalance = True,
            train_size = 0.8,
          )



In [None]:
lightgbm = create_model('lightgbm')

In [None]:
print(lightgbm)

In [None]:
tuned_lightgbm = tune_model(lightgbm,optimize = 'Accuracy')

In [None]:
plot_model(tuned_lightgbm, plot = 'auc')

In [None]:
plot_model(tuned_lightgbm, plot = 'pr')

In [None]:
plot_model(tuned_lightgbm, plot='feature')

In [None]:
plot_model(tuned_lightgbm, plot = 'confusion_matrix')

In [None]:
evaluate_model(tuned_lightgbm)

In [None]:
predict_model(lightgbm)

In [None]:
unseen_predictions = predict_model(tuned_lightgbm, data=data_unseen)
unseen_predictions.head()

In [None]:
print("Confidence Score :   {}".format(round(unseen_predictions.Score.mean(),2)))#Confidence Score