In [None]:
# importing essential libraries
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
pd.set_option('display.max_columns', None)
# from google.colab import drive
# drive.mount('\content\drive')/kaggle/input/praacticingrnn

raw_df = pd.read_csv('/kaggle/input/processed-data-credit-score/Score.csv', low_memory= False) # low_memory

df = raw_df.copy() # creating a copy of the original dataset.

In [None]:
df.head()# checking the top five rows

In [None]:
print(df.info()) # getting the information of the data

In [None]:
df.isnull().sum() # checking the sum of null values

In [None]:
# capturing the numerical features
num_features = [feature for feature in df.columns if df[feature].dtypes != 'O']
print('Numerical feature: ',num_features)


In [None]:
# capturing the categorical features
categorical_features = [feature for feature in df.columns if df[feature].dtypes == 'O']
print('categorical features: ', categorical_features)


In [None]:
# capturing discrete features
dis_features = [feature for feature in num_features if df[feature].nunique() <= 25]
print('discrete features: ',dis_features)

In [None]:
# capturing continuous features
con_features = [feature for feature in num_features if df[feature].nunique() > 25]
print('continuous features: ',con_features)


In [None]:
# checking the number of unique categories in categorical features
for feature in categorical_features:
  print("Number of unique categories in: '{}' feature are: {}\n".format(feature, df[feature].nunique()))

## Doing train test split by considering the 'Credit Score' feature as an output feature.

In [None]:
from sklearn.model_selection import train_test_split # 70% training, 15% validation, and 15% test.

# First-step: train + test
X_train, X_temp, y_train , y_temp = train_test_split(df.drop(['Credit_Score', 'Amount_invested_monthly', 'Monthly_Balance', 'Credit_Utilization_Ratio'],axis=1) ,df['Credit_Score'], test_size = 0.3, random_state= 42)

X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 42)


In [None]:
X_train.head()


In [None]:
X_val.head()

In [None]:
X_test.head()

In [None]:
print(X_train.info())
print(X_val.info())
print(X_test.info())

In [None]:
# checking the number of unique categories in categorical features
cat_features = [feature for feature in X_train.columns if X_train[feature].dtypes == 'O']
for feature in cat_features:
  print("Number of unique categories in: '{}' feature are: {}\n".format(feature, X_train[feature].nunique()))

In [None]:
numericalCols = [feature for feature in X_train.columns if X_train[feature].dtypes != 'O' ]
print((numericalCols))

# Log Transformation

In [None]:
# capturing right skewed features:
skewedFeatures = [feature for feature in X_train.columns if X_train[feature].dtypes != 'O' if (X_train[feature] > 0).all() if X_train[feature].nunique() > 25]
print('Features that are right skewed are: {}'.format(skewedFeatures))


# Creating a new dataframe where transformed x train and test will be stored.
XtrainLog = X_train.copy() 
XvalidateLog = X_val.copy()
XtestLog = X_test.copy()


# Log transformation on Xtrain and Xtest
for feature in skewedFeatures:
    XtrainLog[feature] = np.log1p(XtrainLog[feature])
    XvalidateLog[feature] = np.log1p(XvalidateLog[feature])
    XtestLog[feature] = np.log1p(XtestLog[feature])

In [None]:
XtrainLog.columns

In [None]:
XtrainLog.head()

In [None]:
XvalidateLog.head()

# Feature Scaling

In [None]:
# Using MinMaxScaler for feature scaling which will convert the value between 0 to infinity. Scaling only Numerical features.
from sklearn.preprocessing import MinMaxScaler

X_train_scaled = XtrainLog.copy()
X_val_scaled = XvalidateLog.copy()
X_test_scaled = XtestLog.copy()


scaler = MinMaxScaler()

X_train_scaled.loc[:,numericalCols] = scaler.fit_transform(X_train_scaled.loc[:,numericalCols])

X_val_scaled.loc[:, numericalCols] = scaler.transform(X_val_scaled.loc[:, numericalCols])

X_test_scaled.loc[:,numericalCols] = scaler.transform(X_test_scaled.loc[:,numericalCols])



# Before moving a head, I am training an XGBoostClassifier in order to check the most important/top-best features that will also be used to train the ANN. This will also help to find us whether training the simple XBG will give good accuracy as compared to ANN or not.

In [None]:
# Performing OHE as it is needed for XGB instead of Label Encoding.
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer


ct = ColumnTransformer(transformers = [('cat_enc', OneHotEncoder(drop = 'first', handle_unknown = "ignore"), cat_features)], remainder = 'passthrough')

X_train_scaled_copy = X_train_scaled.copy()
X_test_scaled_copy = X_test_scaled.copy()

X_train_encoded = ct.fit_transform(X_train_scaled_copy) # sparse = False will return a OHE array which we want.
X_test_encoded = ct.transform(X_test_scaled_copy)


# ------------------------------------
# Converting this encoded data into a proper data frame
# ------------------------------------
# 1️⃣ Get OHE feature names
ohe_feature_names = ct.named_transformers_['cat_enc'].get_feature_names_out(cat_features)

# 2️⃣ Get passthrough (numerical) feature names
passthrough_features = [name for name in X_train_scaled_copy.columns if name not in cat_features]

# 3️⃣ Combine them in order
all_feature_names = list(ohe_feature_names) + passthrough_features

# 4️⃣ Convert encoded array back to DataFrame with proper column names
X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=all_feature_names, index=X_train_scaled_copy.index)
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=all_feature_names, index=X_test_scaled_copy.index)


# --------------
# Encoding target variable
# --------------
y_encoder = LabelEncoder()
y_train_enc = y_encoder.fit_transform(y_train)
y_val_enc = y_encoder.transform(y_val)
y_test_enc = y_encoder.transform(y_test)

In [None]:
# Capturing important features
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import accuracy_score

# Train a quick XGBoost model
xgb = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=6,
    random_state=42,
    n_jobs=-1
)
xgb.fit(X_train_encoded_df, y_train_enc)

# Get feature importances
importances = xgb.feature_importances_
feat_imp = pd.Series(importances, index=X_train_encoded_df.columns)

# Sort descending
feat_imp = feat_imp.sort_values(ascending=False)

# Display ranked features
print("Feature Importances (XGBoost):")
print(feat_imp)

# Select top N features (say top 15)
top_features = feat_imp.head(15).index
X_train_selected = X_train_encoded_df[top_features]
X_test_selected = X_test_encoded_df[top_features]


print("XGBoost Classifier Accuracy:", accuracy_score(y_test_enc, xgb.predict(X_test_encoded)))


In [None]:
# Now, on these top features I am performing StratifiedKFold. This  will help to know that which top 'n' features that are giving higher accuracy
from sklearn.model_selection import cross_val_score # Cross validation will calculate the accuracy at each cv
import matplotlib.pyplot as plt
import numpy as np

max_k = len(feat_imp)

scores = []

for k in range(1, max_k + 1):
    topk = feat_imp.head(k).index.tolist()
    Xk = X_train_encoded_df[topk]

    model = XGBClassifier(n_estimator = 200, max_depth = 6, random_state = 42, n_jobs = -1)
    score = cross_val_score(model, Xk, y_train_enc, cv = 5, n_jobs = -1, scoring = 'accuracy')
    scores.append(score.mean())

    print(f"K = {k:2d} CV accuracy = {score.mean():.4f}") # 'd' means that a single integer 'K' value will take two-integer space 


# Plotting accuracy vs k
plt.figure(figsize = (10, 5))
plt.plot(range(1, max_k+1), scores, marker = 'o')
plt.xlabel("K-value")
plt.ylabel("Accuracy Mean")
plt.title("Accuracy Vs K-Value")
plt.grid(True)
plt.show()

best_k = np.argmax(scores) + 1 # Capturing the value of the best 'K' by adding 1 with the highest score index
print(f"Highest CV accuracy {max(scores):.4f} at K = {best_k}")

# Categorical Encoding will be performed using Categorical Embedding Instead of OHE to train ANN:

In [None]:
# At first using label encoding to convert each category into labelled integerIDs.
from sklearn.preprocessing import LabelEncoder

# Creating encoders
Payment_of_Min_Amount_enc = LabelEncoder()
Credit_Mix_enc = LabelEncoder()
Payment_Behaviour_enc = LabelEncoder()


# Fit and Transform. Since LabeEncoder can't handel multiple columns transformation therefore, doing column-wise transformation.
X_train_scaled['Payment_of_Min_Amount'] = Payment_of_Min_Amount_enc.fit_transform(X_train_scaled['Payment_of_Min_Amount'])
X_val_scaled['Payment_of_Min_Amount'] = Payment_of_Min_Amount_enc.transform(X_val_scaled['Payment_of_Min_Amount'])
X_test_scaled['Payment_of_Min_Amount'] = Payment_of_Min_Amount_enc.transform(X_test_scaled['Payment_of_Min_Amount'])


X_train_scaled['Credit_Mix'] = Credit_Mix_enc.fit_transform(X_train_scaled['Credit_Mix'])
X_val_scaled['Credit_Mix'] = Credit_Mix_enc.transform(X_val_scaled['Credit_Mix'])
X_test_scaled['Credit_Mix'] = Credit_Mix_enc.transform(X_test_scaled['Credit_Mix'])



X_train_scaled['Payment_Behaviour'] = Payment_Behaviour_enc.fit_transform(X_train_scaled['Payment_Behaviour'])
X_val_scaled['Payment_Behaviour'] = Payment_Behaviour_enc.transform(X_val_scaled['Payment_Behaviour'])
X_test_scaled['Payment_Behaviour'] = Payment_Behaviour_enc.transform(X_test_scaled['Payment_Behaviour'])





In [None]:
# Checking the number of unique categories in categorical features

for feature in cat_features:
  print("Number of unique categories in: '{}' feature are: {} and the unique values in each column are: {}\n" .format(feature, X_train_scaled[feature].nunique(), X_train_scaled[feature].unique()))
    

# Now, I am training the model without Hyperparameter Tuning.

In [None]:
# Creating embedding layers for categorical features.

from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight

# creating a dictionary to specify output_dimensions of each feature.
embedding_sizes = {
    'Payment_of_Min_Amount': 2,
    'Credit_Mix': 2,
    'Payment_Behaviour': 3
}

inputs = [] # To store different input features
embeddings = [] # Embdding layers will be concatenated

for cat in cat_features:
    inp = Input(shape = (1, ), name = cat+'_in') # Here, initially, we specify the shape of each category. This shape indicates that the category's shape is an integer value only. Later, when we fit the model with the data, the model will map each ID(category) according to this input shape that is the shape of an integer.

    inputs.append(inp) # Appending each Categorical feature's input size. Here this shape specify that there will be a single integer ID(label) taken.

    
    # Embedding layer:
    input_dim = X_train_scaled[cat].nunique()
    out_dim = embedding_sizes[cat] # It will capture the output dimensions of each feature
    emb = Embedding(input_dim = input_dim + 1, output_dim = out_dim)(inp) # creating embedding layer of the feature. (inp) will map each ID and create a dimensional vector for the ID .i.e., the category.
    emb = Flatten()(emb) # This will make the 3D shape of layer into 2D. 
    embeddings.append(emb)


# Numerical Input
num_in = Input(shape = (len(numericalCols), ), name = 'numerical_in')
inputs.append(num_in)


# Concatenating embeddings and inputs: Here, ()() works because Concatenate and Dense are two classes and they can call each other in this way. 'x' stores the entire information in the computational graph and passes the information to the output layer.
x = Concatenate()(embeddings + [num_in]) # This will concatenate the embeddings and numerical features. This concatenation as I said earlier, will be visible when the data gets fit.


x = Dense(512, activation = 'relu', kernel_initializer = 'he_uniform')(x)
x = Dropout(0.2)(x)

x = Dense(256, activation = 'relu', kernel_initializer = 'he_uniform')(x)
x = Dropout(0.2)(x)

x = Dense(256, activation = 'relu', kernel_initializer = 'he_uniform')(x) 
x = Dropout(0.2)(x) 

x = Dense(128, activation = 'relu', kernel_initializer = 'he_uniform')(x) 
x = Dropout(0.2)(x) 

x = Dense(64, activation = 'relu', kernel_initializer = 'he_uniform')(x) 
x = Dropout(0.2)(x)


# Creating Output layer
output = Dense(3, activation = 'softmax', kernel_initializer = 'glorot_uniform')(x)


model = Model(inputs = inputs, outputs = output) # You know about why we pass inputs. But, we are passing output here, to specify that the final output of the model should be considered from this output layer itself.
model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy']) # Accuracy can also be seen when during the output of every epoch


# ----------------------------------------------
# Preparing Input For Data Training. Now, here, we are specifying the value for each input which we have specified in 'num_in' and 'inputs'. 
# ----------------------------------------------

# Capturing input labels of each column
train_cat_inputs = [X_train_scaled[cat].values for cat in cat_features]
val_cat_inputs = [X_val_scaled[cat].values for cat in cat_features]

# Capturing values of each numerical feature
train_num_input = X_train_scaled[numericalCols].values
val_num_input = X_val_scaled[numericalCols].values


# Combining all inputs in the same order as 'inputs' list
train_model_inputs = train_cat_inputs + [train_num_input]
val_model_inputs = val_cat_inputs + [val_num_input]

# Early Stopping
es = EarlyStopping(
    monitor = 'val_loss',
    patience = 10,  # If validation loss stops decreasing till 15 rows then, the training will stops.
    restore_best_weights = True,
    verbose = 1
)


# Reducing the Learning Rate if the validation Loss reaches Plateau .i.e., stops improving. This will help in fine tuning the model and improve accuracy by reducing the Learning Rate after waiting for some 'Patience value'.
reduceLR = ReduceLROnPlateau(
    monitor = 'val_loss',
    patience = 8,
    factor = 0.5, # This will divide the learning rate by 0.5 each time IF the val_loss stops improving.
    vebose = 1,
    min_lr = 1e-6 # This is the minimum bound of the Learning Rate which is 0.000001. If the Learning Rate can dicrease till bound only. 
)


# Balancing output variable
class_weights = compute_class_weight(
    class_weight = "balanced",
    classes = np.unique(y_train_enc),
    y = y_train_enc
)

class_weights = dict(enumerate(class_weights)) # Creates a dictionary where the key will index of class and value will be weight of the corresponding class.

# Fiting model
model.fit(train_model_inputs, y_train_enc,
          batch_size = 64,
          epochs = 200,
          validation_data = (val_model_inputs, y_val_enc),
          callbacks = [es, reduceLR],
          class_weight = class_weights)



In [None]:
y_train.head(20)

In [None]:
y_train_enc[:20]

# Hyperparameter Tuning using Bayesian Optimization

In [None]:
# We can train hyperparameters like: layer, neurons, LR, embedding dimensions, etc. But, here, tunned only LR since tunning other parameters worsen the accuracy as compared to without tuning.

from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow import keras
import keras_tuner as kt
from keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from functools import partial
import numpy as np


# ---------------------------------
# Defining model builder using Bayesian Optimization
# ---------------------------------
def model_builder(hp, cat_features): # Passing 'hp' object and the cat_features as an argument. The 'hp' Hyperparameter is an object of kerastuner
        # --------------------------------
    # Preparing categorical embedding and numerical inputs.
    # --------------------------------
    categorical_inputs = [] # To store different input features
    embedding_layers = [] # Embdding layers will be concatenated
    
    for cat in cat_features:
        inp = Input(shape = (1, ), name = cat+'_in') # Here, initially, we specify the shape of each category. This shape indicates that the category's shape is an integer value only. Later, when we fit the model with the data, the model will map each ID(category) according to this input shape that is the shape of an integer.
    
        categorical_inputs.append(inp) # Appending each Categorical feature's input size. Here this shape specify that there will be a single integer ID(label) taken.
    
        
        # Embedding layer:
        input_dim = X_train_scaled[cat].nunique()
        out_dim = min(50, (input_dim + 1) // 2) # It will capture the output dimensions of each feature with correct formula
        emb = Embedding(input_dim = input_dim + 1, output_dim = out_dim, input_length = 1)(inp) # creating embedding layer of the feature. (inp) will map each ID and create a dimensional vector for the ID .i.e., the category. Written '+1' because categories are encoded and starting from 0, so, + 1 will to give the correct lenght of the unique cats The input_length = 1 will specify the shape of input as inp does but, mentioning 'input_length' is a good practice.
        emb = Flatten()(emb) # This will make the 3D shape of layer into 2D. 
        embedding_layers.append(emb)
    
    
    # Numerical Input
    numerical_input = Input(shape = (len(numericalCols), ), name = 'numerical_in')

    x = Concatenate()(embedding_layers + [numerical_input])


    # Creating dense layers
    x = Dense(128, activation = 'relu', kernel_initializer = 'he_uniform')(x)
    x = Dropout(0.2)(x)
    
    x = Dense(64, activation = 'relu', kernel_initializer = 'he_uniform')(x)
    x = Dropout(0.2)(x)

    # Output Layer
    num_classes = len(np.unique(y_train_enc))
    output = Dense(num_classes, activation = 'softmax')(x)

    model = Model(inputs = categorical_inputs + [numerical_input], outputs = output)


    # Tuning learning rate
    lr = hp.Float(f"learning_rate", 0.0001, 0.01, sampling = 'log') # sampling = 'log' ensures the values are taken on a logrithmic scale only betwenn this min-max range.

    # Compiling model
    model.compile(optimizer = keras.optimizers.Adam(learning_rate = lr), loss='sparse_categorical_crossentropy', metrics = ['accuracy'])
    
    return model

# ------------
# Balancing classes
# ------------
class_weights = compute_class_weight(
    class_weight = "balanced",
    classes = np.unique(y_train_enc),
    y = y_train_enc
)

class_weights = dict(enumerate(class_weights)) # Creates a dictionary where the key will index of class and value will be weight of the corresponding class.

# ---------------------
# Bayesian Optimization
# ---------------------
tuner = kt.BayesianOptimization( 
    partial(model_builder, cat_features = cat_features), # partial() is an function that helps to send an argument
    objective = 'val_accuracy',
    max_trials = 20,     # number of hyperparameter combinations to try
    directory = 'bayes_tuner',
    project_name = 'multiclass_ann'
)


es = EarlyStopping(monitor = 'val_loss',
                   patience = 8,
                   min_delta = 1e-4,    # Ignores tiny, insignificant improvements of val_loss that don’t matter practically.
                   restore_best_weights = True,  # Ensures the final model is the one with the highest validation accuracy, not the last epoch.
                   verbose = 2
                  
                  )


                   
# ----------
# Running Search
# ----------
tuner.search(
    [X_train_scaled[col].values for col in cat_features] + [X_train_scaled[numericalCols].values],
    y_train_enc, 
    epochs = 10,
    validation_data= ([X_val_scaled[col].values for col in cat_features] + [X_val_scaled[numericalCols].values], y_val_enc),
    batch_size = 32,
    verbose=2,   # this will display output of the accuracy and losss for each epoch
    class_weight = class_weights,
    callbacks = [es]
)


In [None]:
# -----------
# Getting Best Model
# -----------
best_hyps = tuner.get_best_hyperparameters(num_trials = 1)[0] # This will select top and best performing hyperparameters from a one trial only.
model = tuner.hypermodel.build(best_hyps) # Takes the best hyperparameters and rebuilds the model using them.


# Training final model fully for maximum accuracy.
history = model.fit([X_train_scaled[cat].values for cat in cat_features] + [X_train_scaled[numericalCols].values],
                    y_train_enc,
                    epochs = 30,
                    batch_size = 32,
                    validation_data = ([X_val_scaled[cat].values for cat in cat_features] + [X_val_scaled[numericalCols].values], y_val_enc),
                    class_weight = class_weights
                    )
                    
