# Vanishing gradient

## load / import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout,Flatten,Dense
import keras

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
from imblearn.over_sampling import RandomOverSampler

In [5]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [7]:
from keras.initializers import random_normal

In [8]:
from keras.constraints import max_norm

In [9]:
from keras.optimizers import SGD

## Loading the datasets

In [10]:
df=pd.read_csv("C:/Users/Rakesh/Desktop/career/AI ML AND DL Practice/Data/credit-card-dataset/Credit_card.csv")

In [11]:
labels=pd.read_csv("C:/Users/Rakesh/Desktop/career/AI ML AND DL Practice/Data/credit-card-dataset/Credit_card_label.csv")

In [12]:
df.head()

Unnamed: 0,Ind_ID,GENDER,Car_Owner,Propert_Owner,CHILDREN,Annual_income,Type_Income,EDUCATION,Marital_status,Housing_type,Birthday_count,Employed_days,Mobile_phone,Work_Phone,Phone,EMAIL_ID,Type_Occupation,Family_Members
0,5008827,M,Y,Y,0,180000.0,Pensioner,Higher education,Married,House / apartment,-18772.0,365243,1,0,0,0,,2
1,5009744,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,-13557.0,-586,1,1,1,0,,2
2,5009746,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,,-586,1,1,1,0,,2
3,5009749,F,Y,N,0,,Commercial associate,Higher education,Married,House / apartment,-13557.0,-586,1,1,1,0,,2
4,5009752,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,-13557.0,-586,1,1,1,0,,2


In [13]:
labels.head()

Unnamed: 0,Ind_ID,label
0,5008827,1
1,5009744,1
2,5009746,1
3,5009749,1
4,5009752,1


In [14]:
dep="Approved"
df[dep]=labels.label.astype(int)
df.loc[df[dep]==1,"Status"]="Approved"
df.loc[df[dep]==0,"Status"]="Declined"

## Feature Engineering

In [15]:
# Differentiate the  data bases on categorical data and numerical data or columns
cats = [
    'GENDER', 'Car_Owner', 'Propert_Owner', 'Type_Income',
    'EDUCATION', 'Marital_status', 'Housing_type', 'Mobile_phone',
    'Work_Phone', 'Phone', 'Type_Occupation', 'EMAIL_ID'
]

conts = [
    'CHILDREN', 'Family_Members', 'Annual_income',
    'Age', 'EmployedDaysOnly', 'UnemployedDaysOnly'
]

In [16]:
cats

['GENDER',
 'Car_Owner',
 'Propert_Owner',
 'Type_Income',
 'EDUCATION',
 'Marital_status',
 'Housing_type',
 'Mobile_phone',
 'Work_Phone',
 'Phone',
 'Type_Occupation',
 'EMAIL_ID']

In [17]:
def proc_data():
    df["Age"]=-df.Birthday_count//365
    df["EmployedDaysOnly"]=df.Employed_days.apply(lambda x: x if x>0 else 0)
    df["UnemployedDaysOnly"]=df.Employed_days.apply(lambda x: abs(x) if x<0 else 0) 

    for cat in cats:
        df[cat]=pd.Categorical(df[cat])

    modes=df.mode().iloc[0]
    df.fillna(modes,inplace=True)


proc_data()
    

In [18]:
df.head()

Unnamed: 0,Ind_ID,GENDER,Car_Owner,Propert_Owner,CHILDREN,Annual_income,Type_Income,EDUCATION,Marital_status,Housing_type,...,Work_Phone,Phone,EMAIL_ID,Type_Occupation,Family_Members,Approved,Status,Age,EmployedDaysOnly,UnemployedDaysOnly
0,5008827,M,Y,Y,0,180000.0,Pensioner,Higher education,Married,House / apartment,...,0,0,0,Laborers,2,1,Approved,51.0,365243,0
1,5009744,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,...,1,1,0,Laborers,2,1,Approved,37.0,0,586
2,5009746,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,...,1,1,0,Laborers,2,1,Approved,37.0,0,586
3,5009749,F,Y,N,0,135000.0,Commercial associate,Higher education,Married,House / apartment,...,1,1,0,Laborers,2,1,Approved,37.0,0,586
4,5009752,F,Y,N,0,315000.0,Commercial associate,Higher education,Married,House / apartment,...,1,1,0,Laborers,2,1,Approved,37.0,0,586


In [19]:
# Now data looks good 

## Oversampling due to heavily skewed data and Data Splitting





In [20]:
X=df[cats+conts]
y=df[dep]

In [21]:
X,y

(     GENDER Car_Owner Propert_Owner           Type_Income  \
 0         M         Y             Y             Pensioner   
 1         F         Y             N  Commercial associate   
 2         F         Y             N  Commercial associate   
 3         F         Y             N  Commercial associate   
 4         F         Y             N  Commercial associate   
 ...     ...       ...           ...                   ...   
 1543      F         N             Y  Commercial associate   
 1544      F         N             N  Commercial associate   
 1545      M         Y             Y               Working   
 1546      M         Y             N               Working   
 1547      F         Y             Y               Working   
 
                           EDUCATION        Marital_status       Housing_type  \
 0                  Higher education               Married  House / apartment   
 1                  Higher education               Married  House / apartment   
 2         

In [22]:
X_over,y_over=RandomOverSampler().fit_resample(X,y)

In [23]:
X_train,X_test,y_train,y_test=train_test_split(X_over,y_over,test_size=0.25)

In [24]:
# Step 1: Convert categorical columns to category dtype
for col in cats:
    X_train[col] = X_train[col].astype('category')
    X_test[col] = X_test[col].astype('category')

# Step 2: Replace category values with integer codes
X_train[cats] = X_train[cats].apply(lambda x: x.cat.codes)
X_test[cats] = X_test[cats].apply(lambda x: x.cat.codes)


In [25]:
# Create model
model=Sequential()

In [26]:
model.add(Dense(10,activation="sigmoid",input_dim=18))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(10,activation="sigmoid"))

model.add(Dense(1,activation="sigmoid"))



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [28]:
history=model.fit(X_train,y_train,epochs=100)

Epoch 1/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.4813 - loss: 0.6943 
Epoch 2/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4905 - loss: 0.6936 
Epoch 3/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4896 - loss: 0.6933 
Epoch 4/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4925 - loss: 0.6933 
Epoch 5/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4866 - loss: 0.6934 
Epoch 6/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5046 - loss: 0.6935 
Epoch 7/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4881 - loss: 0.6935 
Epoch 8/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5109 - loss: 0.6935 
Epoch 9/100
[1m65/65[0m [32m━━━━━━━━━

In [29]:
# In the above model we can easily observe  acuuracy change per epochs tiny cahnges model wont learn in that time 

## Solution for Vanishing Gradient Problem


In [30]:
# Scaling
scaler=StandardScaler()
X_train_scaler=scaler.fit_transform(X_train)
X_test_scaler=scaler.transform(X_test)

In [31]:
# Creating new model with name of model2
model2=Sequential()
model2.add(Dense(128,activation="relu",input_dim=18))
model2.add(Dropout(0.5))
model2.add(Dense(256,activation="relu"))
model2.add(Dropout(0.5))
model2.add(Dense(128,activation="relu"))
model2.add(Dropout(0.5))
model2.add(Dense(64,activation="relu"))
model2.add(Dropout(0.5))
model2.add(Dense(1,activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [32]:
earlystopping=EarlyStopping(patience=10,monitor="val_loss",restore_best_weights=True)
model2.compile(optimizer=Adam(learning_rate=0.001),loss="binary_crossentropy",metrics=["accuracy"])

In [33]:
history2=model2.fit(X_train_scaler,y_train,epochs=100,validation_data=(X_test_scaler,y_test),batch_size=32,
                    callbacks=[earlystopping])

Epoch 1/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5066 - loss: 0.7357 - val_accuracy: 0.5531 - val_loss: 0.6880
Epoch 2/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5304 - loss: 0.7008 - val_accuracy: 0.5677 - val_loss: 0.6871
Epoch 3/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5299 - loss: 0.6922 - val_accuracy: 0.5997 - val_loss: 0.6811
Epoch 4/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5488 - loss: 0.6887 - val_accuracy: 0.6026 - val_loss: 0.6784
Epoch 5/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5765 - loss: 0.6802 - val_accuracy: 0.6099 - val_loss: 0.6695
Epoch 6/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5920 - loss: 0.6661 - val_accuracy: 0.6201 - val_loss: 0.6556
Epoch 7/100
[1m65/65[0m [32m━━

In [34]:
# Here we can clearly see the increases of teh model accuracy for every epochs


In [35]:

prediction=model2.predict(X_test_scaler)
rounded_prediction=np.round(prediction)
report=classification_report(y_test,rounded_prediction)
print(f"The classification report is {report}")

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  
The classification report is               precision    recall  f1-score   support

           0       0.99      0.91      0.95       346
           1       0.92      0.99      0.95       341

    accuracy                           0.95       687
   macro avg       0.96      0.95      0.95       687
weighted avg       0.96      0.95      0.95       687



# Exploding gradient 


## Lets Build and train a model with Exploding gradient problem 

In [36]:
model=Sequential()
model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0),input_dim=18))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(10,activation="tanh",kernel_initializer=random_normal(mean=0.0,stddev=1.0)))

model.add(Dense(1,activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [37]:
optimizer=SGD(learning_rate=1.0)
model.compile(loss="binary_crossentropy",optimizer=optimizer,metrics=["accuracy"])

In [38]:
model.fit(X_train,y_train,epochs=100)

Epoch 1/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4857 - loss: 0.7811 
Epoch 2/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4915 - loss: 0.7245 
Epoch 3/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4852 - loss: 0.7273 
Epoch 4/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5153 - loss: 0.7185 
Epoch 5/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5041 - loss: 0.7157 
Epoch 6/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4920 - loss: 0.7200 
Epoch 7/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5017 - loss: 0.7032 
Epoch 8/100
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4857 - loss: 0.7239 
Epoch 9/100
[1m65/65[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1aeb19751d0>

In [39]:
# model is accuracy at epoch 1 is 0.5046 and at 100th epoch its 0.5017 the model doesnot have any improment due to exploding gradient