In [145]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [146]:
df= pd.read_csv('bmi.csv')
df.head()

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3


In [147]:
df.shape

(500, 4)

In [148]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Gender  500 non-null    object
 1   Height  500 non-null    int64 
 2   Weight  500 non-null    int64 
 3   Index   500 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 15.8+ KB


In [149]:
df['Index'].unique()

array([4, 2, 3, 5, 1, 0])

In [150]:
df['Index'].value_counts()

Index
5    198
4    130
2     69
3     68
1     22
0     13
Name: count, dtype: int64

In [151]:
df['Gender'].value_counts()

Gender
Female    255
Male      245
Name: count, dtype: int64

In [152]:
df

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3
...,...,...,...,...
495,Female,150,153,5
496,Female,184,121,4
497,Female,141,136,5
498,Male,150,95,5


In [153]:

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler

In [154]:
X = df.drop(columns=['Index'])
y = df['Index']

In [155]:
y.value_counts()

Index
5    198
4    130
2     69
3     68
1     22
0     13
Name: count, dtype: int64

In [156]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [157]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=6)
y_test = to_categorical(y_test, num_classes=6)

In [158]:
from sklearn.compose import ColumnTransformer

In [159]:
transformer = ColumnTransformer(
    transformers=[
        ('tnf1', OrdinalEncoder(categories=[['Female', 'Male']]), ['Gender']),
        ('tnf2', MinMaxScaler(),['Height','Weight'])
    ],
    remainder='passthrough'
)

In [197]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import Dense, Dropout

In [206]:
def create_model():
    model = Sequential([
        Dense(36, activation='relu', input_shape=(3,)), 
        Dense(24, activation='relu'),
        Dense(12, activation='relu'),
        Dense(6, activation='softmax')  
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [188]:
from sklearn.pipeline import Pipeline

In [199]:
from sklearn.base import BaseEstimator, ClassifierMixin

In [208]:
class KerasCustomClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, epochs=100, batch_size=32):
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = create_model()

    def fit(self, X, y):
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=1)
        return self

    def predict(self, X):
        return np.argmax(self.model.predict(X), axis=-1)

    def score(self, X, y):
        return self.model.evaluate(X, y, verbose=0)[1]

In [209]:
pipeline = Pipeline([
    ('preprocessor', transformer),
    ('classifier', KerasCustomClassifier(epochs=100, batch_size=32))  
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [210]:
pipeline.fit(X_train, y_train)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1725 - loss: 1.7807
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4766 - loss: 1.7259 
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4721 - loss: 1.6738 
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4589 - loss: 1.6238 
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4732 - loss: 1.5264 
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3932 - loss: 1.4674 
Epoch 7/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4136 - loss: 1.4201 
Epoch 8/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3829 - loss: 1.4103 
Epoch 9/100
[1m13/13[0m [32m━━━━━━━━━━

In [211]:
y_pred = pipeline.predict(X_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step




In [212]:
y_test_labels = np.argmax(y_test, axis=1)

In [213]:
from sklearn.metrics import classification_report
print(classification_report(y_test_labels, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         4
           2       0.95      1.00      0.97        18
           3       0.70      0.88      0.78         8
           4       0.96      0.87      0.91        30
           5       0.97      0.97      0.97        39

    accuracy                           0.94       100
   macro avg       0.93      0.95      0.94       100
weighted avg       0.95      0.94      0.94       100



In [215]:
import pickle
with open('bmi_pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)