# Water Potability Prediction - Machine Learning Model

### - Water potability refers to the safety water for human consumption
### - Potable water is free from harmful contaminants and bacteria and is safe for drinking and food preparation
### - There are various method to ensure water potability, including filtration, and treatment processes such as UV filtration and reverse osmosis

# 

## 1. Library Improting

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# 

## 2. Exploratory Data Analysis (EDA) 

In [None]:
water_data = pd.read_csv('water_potability.csv')
water_data.head()

In [None]:
water_data.columns

#### - ph : pH of water 
#### - Hardness : Capacity of water to precipitate soap in mg/L
#### - Solids : Total dissolved solids in ppm
#### - Chloramines : Amounts of Chloramines in ppm
#### - Sulfate : Amounts of Sulfates dissolved in mg/L
#### - Conductivity : Electrical conductivity of water in uS/cm
#### - Organic_carbon : Amount of organic carbon in ppm
#### - Trihalomethanes : Amount of Trihalomethanes in mg/L
#### - Turbidity : Measure of light emiting property of water in NTV ( Nephelometric Turbidity Units )
#### - Potability : Indicates if water is safe for human consumption

# 

In [None]:
water_data.shape

In [None]:
water_data.info()

In [None]:
water_data.describe()

In [None]:
water_data.duplicated().any()

In [None]:
water_data.isnull().sum()

In [None]:
null_df = water_data.isnull().sum().reset_index()
null_df.columns = ['Columns', 'Null_count']
null_df['%miss_value'] = round(null_df['Null_count']/len(water_data), 2)*100
null_df

In [None]:
sns.heatmap(water_data.isnull(), yticklabels = False, cbar = False, cmap = 'viridis')
plt.show()

# 

## 3. Handling Missing Values 

In [None]:
water_data['ph'].plot(kind = 'hist')
plt.show()

In [None]:
water_data['Sulfate'].plot(kind = 'hist')

In [None]:
water_data['Trihalomethanes'].plot(kind = 'hist')

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
water_data['Trihalomethanes'].plot(kind='kde', ax=ax)
plt.show()

In [None]:
water_data['ph'] = water_data['ph'].fillna(water_data['ph'].mean())
water_data['Sulfate'] = water_data['Sulfate'].fillna(water_data['Sulfate'].mean())
water_data['Trihalomethanes'] = water_data['Trihalomethanes'].fillna(water_data['Trihalomethanes'].mean())

In [None]:
water_data.isnull().sum()

# 

## 4. Check for Correlation 

In [None]:
corr_matrix = water_data.corr()
corr_matrix

In [None]:
plt.figure(figsize = (18, 16))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

dapat dilihat bahwa tidak ada korelasi yang signifikan dari variabel-variabel tersebut.

In [None]:
corr_matrix1 = corr_matrix.abs()
upper_tri = corr_matrix1.where(np.triu(np.ones(corr_matrix1.shape), k=1).astype(np.bool_))
upper_tri

In [None]:
matrix = np.triu(corr_matrix)
sns.heatmap(water_data.corr(), annot=True, linewidth=.8, mask=matrix, cmap='rocket', cbar=False)

ini adalah salah satu bentuk matrix korelasi yang lainnya, dapat menggunakan yang sebelumnya atau yang ini, sama saja.

In [None]:
data_hist_plot = water_data.hist(figsize=(20, 20), color = '#5F9EA0')

In [None]:
for col in water_data.columns :
    sns.histplot(data=water_data, x=col, kde=True, hue='Potability')
    plt.show()

In [None]:
water_data.groupby('Potability').mean().T

In [None]:
for col in water_data.columns :
    sns.boxplot(data=water_data, x=col)
    plt.show()

In [None]:
sns.countplot(water_data['Potability'])

# 

## 5. Data Preprocessing 

In [None]:
X = water_data.drop(['Potability'], axis=1)
y = water_data['Potability']

In [None]:
X.head()

In [None]:
y.head()

# 

## 6. Scalling

In [None]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()

In [None]:
X_scaled = std_scaler.fit_transform(X)
X_scaled

Mentransformasi data X dengan 'Zero Mean Value' dan satu standard deviasi. Tranformasi dimaksudkan untuk merubah dataset menjadi array.

# 

## 7. Classification Modelling 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
X_train.shape, X_test.shape

### Model Development : 

List of Models :
1. LogisticRegressin
2. DecisionTreeClassifier
3. RandomForestClassifier
4. ExtraTreesClasifier
5. Support Vector Classifier
6. KNeighborsClassifier
7. GradientBoostingClassifier
8. Naive-Bayer
9. AdaBoostClassifier

#### Importing Models 

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier

In [None]:
LR = LogisticRegression()
DT = DecisionTreeClassifier()
RF = RandomForestClassifier()
ETC = ExtraTreesClassifier()
SVM = SVC()
KNN = KNeighborsClassifier()
GBC = GradientBoostingClassifier()
NB = GaussianNB()
ABC = AdaBoostClassifier()

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
models = [LR, DT, RF, ETC, SVM, KNN, GBC, NB, ABC]
features = X_scaled
labels = y
CV = 5
accu_list = [] # Accuracy List
ModelName = [] # Model Name List

for model in models :
    model_name = model.__class__.__name__
    accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv = CV)
    accu_list.append(accuracies.mean()*100)
    ModelName.append(model_name)

model_acc_df = pd.DataFrame({"Model" : ModelName, "Cross_Val_Accuracy" : accu_list})
model_acc_df

ini merupakan presentase akurasi setiap model. Dapat dilihat bahwa yang terbesar adalah SVM, ETC, dan RF. Maka, ketiga model ini akan dicek lebih lanjut.

In [None]:
from sklearn.metrics import classification_report

In [None]:
SVM.fit(X_train, y_train)
ETC.fit(X_train, y_train)
RF.fit(X_train, y_train)

y_pred_rf = RF.predict(X_test)
y_pred_svm = SVM.predict(X_test)
y_pred_etc = ETC.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred_rf))

In [None]:
print(classification_report(y_test, y_pred_svm))

In [None]:
print(classification_report(y_test, y_pred_etc))

karena RandomForestClassifier merupakan model yang paling lengkap dan mudah untuk dijelaskan, maka ada digunakan model RF.

# 

In [None]:
from sklearn.metrics import roc_curve, auc
y_scores = ETC.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(7, 5))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc})')
plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import roc_curve, auc
y_scores = RF.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(7, 5))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc})')
plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

#### Best Model - Random Forest : 

In [None]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold

params_RF = {"min_samples_split" : [2, 6],
            "min_samples_leaf" : [1, 4],
            "n_estimators" : [100, 200, 300],
            "criterion" : ["gini", 'entropy']
            }

cv_method = StratifiedKFold(n_splits=3)
GridSearchCV_RF = GridSearchCV(estimator  = ExtraTreesClassifier(),
                              param_grid = params_RF,
                              cv = cv_method,
                              verbose = 1,
                              n_jobs = 2,
                              scoring = "accuracy",
                              return_train_score = True
                              )

GridSearchCV_RF.fit(X_train, y_train)
best_params_RF = GridSearchCV_RF.best_params_
print("Best Hyperparameters for Random Forest are = ", best_params_RF)

In [None]:
best_estimator = GridSearchCV_RF.best_estimator_
best_estimator.fit(X_train, y_train)
y_pred_best = best_estimator.predict(X_test)
print(classification_report(y_test, y_pred_best))

In [None]:
from sklearn.metrics import accuracy_score
print(f"Accuracy of Random Forest Model = {round(accuracy_score(y_test, y_pred_best)*100, 2)} %")

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import sklearn.metrics

Y_pred = model.predict(y_pred_rf)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
print(confusion_matrix(y_pred_rf.classes, y_pred))
confusion_array = sklearn.metrics.confusion_matrix(y_pred_rf.classes, y_pred)

print('True Negative = ', confusion_array[0,0])
print('False Negative = ', confusion_array[1,0])
print('True Positive = ', confusion_array[1,1])
print('False Positive = ', confusion_array[0,1])

# 

## 8. Fuzzy Inference System Application 

In [None]:
!pip install tensorflow

In [None]:



import logging
logging.getLogger('tensorflow').disabled = True
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # remove WARNING Messages
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
# parameter class fis parameters


class fis_parameters():
    def __init__(self, n_input: int = 3, n_memb: int = 3, batch_size: int = 16, n_epochs: int = 25, memb_func: str = 'gaussian', optimizer: str = 'sgd', loss: str = 'mse'):
        self.n_input = n_input  # no. of Regressors
        self.n_memb = n_memb  # no. of fuzzy memberships
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.memb_func = memb_func  # 'gaussian' / 'gbellmf'
        self.optimizer = optimizer   # sgd / adam /
        self.loss = loss  # mse / mae


# Main Class ANFIS
class ANFIS:
    def __init__(self, n_input: int, n_memb: int, batch_size: int = 16, memb_func: str = 'gaussian', name: str = 'MyAnfis'):
        self.n = n_input
        self.m = n_memb
        self.batch_size = batch_size
        self.memb_func = memb_func
        input_ = keras.layers.Input(
            shape=(n_input), name='inputLayer', batch_size=self.batch_size)
        L1 = FuzzyLayer(n_input, n_memb, memb_func, name='fuzzyLayer')(input_)
        L2 = RuleLayer(n_input, n_memb, name='ruleLayer')(L1)
        L3 = NormLayer(name='normLayer')(L2)
        L4 = DefuzzLayer(n_input, n_memb, name='defuzzLayer')(L3, input_)
        L5 = SummationLayer(name='sumLayer')(L4)
        self.model = keras.Model(inputs=[input_], outputs=[L5], name=name)
        self.update_weights()

    def __call__(self, X):
        return self.model.predict(X, batch_size=self.batch_size)

    def update_weights(self):
        # premise parameters (mu&sigma for gaussian // a/b/c for bell-shaped)
        if self.memb_func == 'gaussian':
            self.mus, self.sigmas = self.model.get_layer(
                'fuzzyLayer').get_weights()
        elif self.memb_func == 'gbellmf':
            self.a, self.b, self.c = self.model.get_layer(
                'fuzzyLayer').get_weights()
        # consequence parameters
        self.bias, self.weights = self.model.get_layer(
            'defuzzLayer').get_weights()

    def plotmfs(self, show_initial_weights=False):
        n_input = self.n
        n_memb = self.m

        if self.memb_func == 'gaussian':
            mus, sigmas = np.around(self.model.get_layer(
                'fuzzyLayer').get_weights(), 2)
            mus, sigmas = mus.reshape(
                (n_memb, n_input, 1)), sigmas.reshape(n_memb, n_input, 1)

            xn = np.linspace(np.min(mus) - 2 * np.max(abs(sigmas)),
                             np.max(mus) + 2 * np.max(abs(sigmas)), 100).reshape((1, 1, -1))
            xn = np.tile(xn, (n_memb, n_input, 1))

            # broadcast all curves in one array
            memb_curves = np.exp(-np.square((xn - mus)) / np.square(sigmas))

            if show_initial_weights:
                mus_init, sigmas_init = np.around(self.init_weights, 2)
                mus_init, sigmas_init = mus_init.reshape(
                    n_memb, n_input, 1), sigmas_init.reshape(n_memb, n_input, 1)
                init_curves = np.exp(-np.square((xn - mus_init)
                                                ) / np.square(sigmas_init))

        elif self.memb_func == 'gbellmf':
            a, b, c = np.around(self.model.get_layer(
                'fuzzyLayer').get_weights(), 2)
            a, b, c = a.reshape((n_memb, n_input, 1)), b.reshape(
                n_memb, n_input, 1), c.reshape(n_memb, n_input, 1)

            xn = np.linspace(np.min(c) - 2 * np.max(abs(a)),
                             np.max(c) + 2 * np.max(abs(a)), 100).reshape((1, 1, -1))
            xn = np.tile(xn, (n_memb, n_input, 1))

            # broadcast all curves in one array
            memb_curves = 1 / (1 + np.square((xn - c) / a)**b)

            if show_initial_weights:
                a_init, b_init, c_init = np.around(self.init_weights, 2)
                a_init, b_init, c_init = a_init.reshape((n_memb, n_input, 1)), b_init.reshape(
                    n_memb, n_input, 1), c_init.reshape(n_memb, n_input, 1)
                init_curves = 1 / \
                    (1 + np.square((xn - c_init) / a_init)**b_init)

        elif self.memb_func == 'sigmoid':
            gammas, c = np.around(self.model.get_layer(
                'fuzzyLayer').get_weights(), 2)
            gammas, c = gammas.reshape(
                (n_memb, n_input, 1)), c.reshape(n_memb, n_input, 1)

            xn = np.linspace(np.min(c) - 2 * np.max(abs(c)), np.max(c) + 2 * np.max(
                abs(c)), 100).reshape((1, 1, -1))  # TODO: change confidence bands
            xn = np.tile(xn, (n_memb, n_input, 1))

            # broadcast all curves in one array
            memb_curves = 1 / (1 + np.exp(-gammas * (xn - c)))

            if show_initial_weights:
                gammas_init, c_init = np.around(self.init_weights, 2)
                gammas_init, c_init = gammas_init.reshape(
                    n_memb, n_input, 1), c_init.reshape(n_memb, n_input, 1)
                init_curves = 1 / (1 + np.exp(-gammas_init * (xn - c_init)))

        fig, axs = plt.subplots(nrows=n_input, ncols=1, figsize=(8, self.n * 3))
        fig.suptitle('Membership functions', size=16)
        for n in range(self.n):
            axs[n].grid(True)
            axs[n].set_title(f'Input {n+1}')
            for m in range(self.m):
                axs[n].plot(xn[m, n, :], memb_curves[m, n, :])

        if show_initial_weights:  # plot initial membership curve
            for n in range(self.n):
                axs[n].set_prop_cycle(None)  # reset color cycle
                for m in range(self.m):
                    axs[n].plot(xn[m, n, :], init_curves[m, n, :],
                                '--', alpha=.5)
        plt.show()

    def fit(self, X, y, **kwargs):
        # save initial weights in the anfis class
        self.init_weights = self.model.get_layer('fuzzyLayer').get_weights()

        # fit model & update weights in the anfis class
        history = self.model.fit(X, y, **kwargs)
        self.update_weights()

        # clear the graphs
        tf.keras.backend.clear_session()

        return history

    def get_memberships(self, Xs):
        intermediate_layer_model = keras.Model(inputs=self.model.input,
                                               outputs=self.model.get_layer('normLayer').output)

        intermediate_L2_output = intermediate_layer_model.predict(Xs)

        return intermediate_L2_output


# Custom weight initializer
def equally_spaced_initializer(shape, minval=-1.5, maxval=1.5, dtype=tf.float32):
    """
    Custom weight initializer:
        euqlly spaced weights along an operating range of [minval, maxval].
    """
    linspace = tf.reshape(tf.linspace(minval, maxval, shape[0]),
                          (-1, 1))
    return tf.Variable(tf.tile(linspace, (1, shape[1])))


# Layer 1
class FuzzyLayer(keras.layers.Layer):
    def __init__(self, n_input, n_memb, memb_func='gaussian', **kwargs):
        super(FuzzyLayer, self).__init__(**kwargs)
        self.n = n_input
        self.m = n_memb
        self.memb_func = memb_func

    def build(self, batch_input_shape):
        self.batch_size = batch_input_shape[0]

        if self.memb_func == 'gbellmf':
            self.a = self.add_weight(name='a',
                                     shape=(self.m, self.n),
                                     initializer=keras.initializers.RandomUniform(
                                         minval=.7, maxval=1.3, seed=1),
                                     #initializer = 'ones',
                                     trainable=True)
            self.b = self.add_weight(name='b',
                                     shape=(self.m, self.n),
                                     initializer=keras.initializers.RandomUniform(
                                         minval=.7, maxval=1.3, seed=1),
                                     #initializer = 'ones',
                                     trainable=True)
            self.c = self.add_weight(name='c',
                                     shape=(self.m, self.n),
                                     initializer=equally_spaced_initializer,
                                     #initializer = keras.initializers.RandomUniform(minval=-1.5, maxval=1.5, seed=1),
                                     #initializer = 'zeros',
                                     trainable=True)

        elif self.memb_func == 'gaussian':
            self.mu = self.add_weight(name='mu',
                                      shape=(self.m, self.n),
                                      initializer=equally_spaced_initializer,
                                      #initializer = keras.initializers.RandomUniform(minval=-1.5, maxval=1.5, seed=1),
                                      #initializer = 'zeros',
                                      trainable=True)
            self.sigma = self.add_weight(name='sigma',
                                         shape=(self.m, self.n),
                                         initializer=keras.initializers.RandomUniform(
                                             minval=.7, maxval=1.3, seed=1),
                                         #initializer = 'ones',
                                         trainable=True)

        elif self.memb_func == 'sigmoid':
            self.gamma = self.add_weight(name='gamma',
                                         shape=(self.m, self.n),
                                         initializer=equally_spaced_initializer,  # 'ones',
                                         trainable=True)

            self.c = self.add_weight(name='c',
                                     shape=(self.m, self.n),
                                     initializer=equally_spaced_initializer,  # 'ones',
                                     trainable=True)

        # Be sure to call this at the end
        super(FuzzyLayer, self).build(batch_input_shape)

    def call(self, x_inputs):
        if self.memb_func == 'gbellmf':
            L1_output = 1 / (1 +
                             tf.math.pow(
                                 tf.square(tf.subtract(
                                     tf.reshape(
                                         tf.tile(x_inputs, (1, self.m)), (-1, self.m, self.n)), self.c
                                 ) / self.a), self.b)
                             )
        elif self.memb_func == 'gaussian':
            L1_output = tf.exp(-1 *
                               tf.square(tf.subtract(
                                   tf.reshape(
                                       tf.tile(x_inputs, (1, self.m)), (-1, self.m, self.n)), self.mu
                               )) / tf.square(self.sigma))

        elif self.memb_func == 'sigmoid':
            L1_output = tf.math.divide(1,
                                       tf.math.exp(-self.gamma *
                                                   tf.subtract(
                                                       tf.reshape(
                                                           tf.tile(x_inputs, (1, self.m)), (-1, self.m, self.n)), self.c)
                                                   )
                                       )
        return L1_output


# Layer 2
class RuleLayer(keras.layers.Layer):
    def __init__(self, n_input, n_memb, **kwargs):
        super(RuleLayer, self).__init__(**kwargs)
        self.n = n_input
        self.m = n_memb
        self.batch_size = None

    def build(self, batch_input_shape):
        self.batch_size = batch_input_shape[0]
        # self.batch_size = tf.shape(batch_input_shape)[0]
        # Be sure to call this at the end
        super(RuleLayer, self).build(batch_input_shape)

    def call(self, input_):
        if self.n == 2:
            L2_output = tf.reshape(input_[:, :, 0], [self.batch_size, -1, 1]) * \
                tf.reshape(input_[:, :, 1], [self.batch_size, 1, -1])
        elif self.n == 3:
            L2_output = tf.reshape(input_[:, :, 0], [self.batch_size, -1, 1, 1]) * \
                tf.reshape(input_[:, :, 1], [self.batch_size, 1, -1, 1]) * \
                tf.reshape(input_[:, :, 2], [self.batch_size, 1, 1, -1])
        elif self.n == 4:
            L2_output = tf.reshape(input_[:, :, 0], [self.batch_size, -1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 1], [self.batch_size, 1, -1, 1, 1]) * \
                tf.reshape(input_[:, :, 2], [self.batch_size, 1, 1, -1, 1]) * \
                tf.reshape(input_[:, :, 3], [self.batch_size, 1, 1, 1, -1])
        elif self.n == 5:
            L2_output = tf.reshape(input_[:, :, 0], [self.batch_size, -1, 1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 1], [self.batch_size, 1, -1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 2], [self.batch_size, 1, 1, -1, 1, 1]) * \
                tf.reshape(input_[:, :, 3], [self.batch_size, 1, 1, 1, -1, 1]) * \
                tf.reshape(input_[:, :, 4], [self.batch_size, 1, 1, 1, 1, -1])
        elif self.n == 6:
            L2_output = tf.reshape(input_[:, :, 0], [self.batch_size, -1, 1, 1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 1], [self.batch_size, 1, -1, 1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 2], [self.batch_size, 1, 1, -1, 1, 1, 1]) * \
                tf.reshape(input_[:, :, 3], [self.batch_size, 1, 1, 1, -1, 1, 1]) * \
                tf.reshape(input_[:, :, 4], [self.batch_size, 1, 1, 1, 1, -1, 1]) * \
                tf.reshape(input_[:, :, 5], [
                           self.batch_size, 1, 1, 1, 1, 1, -1])
        else:
            raise ValueError(
                f'This ANFIS implementation works with 2 to 6 inputs.')

        return tf.reshape(L2_output, [self.batch_size, -1])


# Layer 3
class NormLayer(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, w):
        w_sum = tf.reshape(tf.reduce_sum(w, axis=1), (-1, 1))
        w_norm = w / w_sum
        return w_norm


# Layer 4
class DefuzzLayer(keras.layers.Layer):
    def __init__(self, n_input, n_memb, **kwargs):
        super().__init__(**kwargs)
        self.n = n_input
        self.m = n_memb

        self.CP_bias = self.add_weight(name='Consequence_bias',
                                       shape=(1, self.m ** self.n),
                                       initializer=keras.initializers.RandomUniform(
                                           minval=-2, maxval=2),
                                       # initializer = 'ones',
                                       trainable=True)
        self.CP_weight = self.add_weight(name='Consequence_weight',
                                         shape=(self.n, self.m ** self.n),
                                         initializer=keras.initializers.RandomUniform(
                                             minval=-2, maxval=2),
                                         # initializer = 'ones',
                                         trainable=True)

    def call(self, w_norm, input_):

        L4_L2_output = tf.multiply(w_norm,
                                   tf.matmul(input_, self.CP_weight) + self.CP_bias)
        return L4_L2_output  # Defuzzyfied Layer


# Layer 5
class SummationLayer(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def build(self, batch_input_shape):
        self.batch_size = batch_input_shape[0]
        #self.batch_size = tf.shape(batch_input_shape)[0]
        # Be sure to call this at the end
        super(SummationLayer, self).build(batch_input_shape)

    def call(self, input_):
        L5_L2_output = tf.reduce_sum(input_, axis=1)
        L5_L2_output = tf.reshape(L5_L2_output, (-1, 1))
        return L5_L2_output

    # def compute_L2_output_shape(self, batch_input_shape):
        # return tf.TensorShape([self.batch_size, 1])


#########################################################################################

# 

## 9. Classification System

In [None]:
water_data.columns

In [None]:
list1 = water_data.iloc[2:3, 0:9].values.flatten().tolist()
list1

In [None]:
ph = float(input('Enter the pH Value = '))
Hardness = float(input('Enter the Hardness Value = '))
Solids = float(input('Enter the Solids Value = '))
Chloramines = float(input('Enter the Chloramines Value  = '))
Sulfate = float(input('Enter the Sulfate Value = '))
Conductivity = float(input('Enter the Conductivity Value = '))
Organic_carbon = float(input('Enter the Organic_Carbon Value = '))
Trihalomethanes = float(input('Enter the Trihalomethanes Value = '))
Turbidity = float(input('Enter the Turbidity Value = '))

In [None]:
input_data = [ph, Hardness, Solids, Chloramines, Sulfate, Conductivity,
             Organic_carbon, Trihalomethanes, Turbidity]

In [None]:
water_data_input = std_scaler.transform([[ph, Hardness, Solids, Chloramines, Sulfate, Conductivity,
                                         Organic_carbon, Trihalomethanes, Turbidity]])
water_data_input

In [None]:
model_prediction = best_estimator.predict(water_data_input)
model_prediction

In [None]:
if model_prediction[0] == 0:
    print("Water is Not SAFE for Plant")
else :
    print("Water is SAFE for Plant")

In [None]:
def water_Quality_Prediction(input_data) :
    scaled_data = std_scaler.transform(scaled_data)
    model_prediction = best_estimator.predict(scaled_data)
    if model_prediction[0] == 0 :
        return "Water is NOT SAFE for Consumption"
    else : 
        return "Water is SAFE for Consumption"

In [None]:
ph = float(input('Enter the pH Value = '))
Hardness = float(input('Enter the Hardness Value = '))
Solids = float(input('Enter the Solids Value = '))
Chloramines = float(input('Enter the Chloramines Value  = '))
Sulfate = float(input('Enter the Sulfate Value = '))
Conductivity = float(input('Enter the Conductivity Value = '))
Organic_carbon = float(input('Enter the Organic_Carbon Value = '))
Trihalomethanes = float(input('Enter the Trihalomethanes Value = '))
Turbidity = float(input('Enter the Turbidity Value = '))

input_data = [ph, Hardness, Solids, Chloramines, Sulfate, Conductivity,
             Organic_carbon, Trihalomethanes, Turbidity]
water_Quality_Prediction(input_data)

# 

#### Save Trained Model: 

In [None]:
import pickle

In [None]:
filename = 'Water_Quality_ML_Trained_Model.sav'
pickle.dump(best_estimator, open(filename, 'wb'))

# 

#### Load Trained Model: 

In [None]:
water_quality_model = pickle.load(open('Water_Quality_ML_Trained_Model.sav', 'rb'))


In [None]:
data = [7.56, 237, 14245, 6.289, 373, 10.47, 85.9, 2.44]
scaled_data1 = std_scaler.transform([data])
water_quality_model.predict(scaled.data1)