<a href="https://colab.research.google.com/github/stav1236/google-machine-learning-course/blob/main/foundational-courses/2.machine-learning-crash-course/ml-models/classification/classification-credit-card-score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import required libraries
import io
import keras
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
import plotly.express as px
import dataclasses

# Load dataset
dataset_url = "https://raw.githubusercontent.com/stav1236/google-machine-learning-course/main/foundational-courses/2.machine-learning-crash-course/ml-models/classification/datasets/Score.csv"  # Update this path with the actual file
dateset = pd.read_csv(dataset_url)

@dataclasses.dataclass()
class ExperimentSettings:
  learning_rate: float
  number_epochs: int
  batch_size: int
  classification_threshold: float
  input_features: list[str]

@dataclasses.dataclass()
class Experiment:
  name: str
  settings: ExperimentSettings
  model: keras.Model
  epochs: np.ndarray
  metrics_history: keras.callbacks.History

def create_model(settings: ExperimentSettings, metrics: list[keras.metrics.Metric]) -> keras.Model:
  model_inputs = [keras.Input(name=feature, shape=(1,)) for feature in settings.input_features]
  concatenated_inputs = keras.layers.Concatenate()(model_inputs)
  dense = keras.layers.Dense(units=1, input_shape=(1,), activation=keras.activations.sigmoid)
  model_output = dense(concatenated_inputs)
  model = keras.Model(inputs=model_inputs, outputs=model_output)
  model.compile(
      optimizer=keras.optimizers.RMSprop(settings.learning_rate),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics,
  )
  return model

def train_model(experiment_name: str, model: keras.Model, dataset: pd.DataFrame, labels: np.ndarray, settings: ExperimentSettings) -> Experiment:
  features = {feature_name: np.array(dataset[feature_name]) for feature_name in settings.input_features}
  history = model.fit(x=features, y=labels, batch_size=settings.batch_size, epochs=settings.number_epochs)
  return Experiment(experiment_name, settings, model, history.epoch, pd.DataFrame(history.history))

# Evaluation
def evaluate_experiment(experiment: Experiment, test_dataset: pd.DataFrame, test_labels: np.array) -> dict[str, float]:
  features = {feature_name: np.array(test_dataset[feature_name]) for feature_name in experiment.settings.input_features}
  return experiment.model.evaluate(x=features, y=test_labels, batch_size=experiment.settings.batch_size, verbose=0, return_dict=True)

# Define experiment settings
settings = ExperimentSettings(
    learning_rate=0.001,
    number_epochs=60,
    batch_size=100,
    classification_threshold=0.35,
    input_features = [
    'Delay_from_due_date',
    'Num_of_Delayed_Payment',
    'Num_Credit_Inquiries',
    'Credit_Utilization_Ratio',
    'Credit_History_Age',
    'Amount_invested_monthly',
    'Monthly_Balance',
    'Age',
    'Annual_Income',
    'Num_Bank_Accounts',
    'Num_Credit_Card',
    'Interest_Rate',
    'Num_of_Loan',
    'Monthly_Inhand_Salary',
    'Changed_Credit_Limit',
    'Outstanding_Debt',
    'Total_EMI_per_month'
    ]
)

metrics = [
    keras.metrics.BinaryAccuracy(name='accuracy', threshold=settings.classification_threshold),
    keras.metrics.Precision(name='precision', thresholds=settings.classification_threshold),
    keras.metrics.Recall(name='recall', thresholds=settings.classification_threshold),
    keras.metrics.AUC(num_thresholds=100, name='auc'),
]

# Normalization
feature_mean = dateset.mean(numeric_only=True)
feature_std = dateset.std(numeric_only=True)
numerical_features = dateset.select_dtypes('number').columns
normalized_dataset = (dateset[numerical_features] - feature_mean) / feature_std
normalized_dataset['Is_Poor'] = (dateset['Credit_Score'] == "Poor")
normalized_dataset['Is_Standard'] = (dateset['Credit_Score'] == "Standard")
normalized_dataset['Is_Good'] = (dateset['Credit_Score'] == "Good")

# Splitting the dataset
number_samples = len(normalized_dataset)
index_80th = round(number_samples * 0.8)

shuffled_dataset = normalized_dataset.sample(frac=1, random_state=100)
train_data = shuffled_dataset.iloc[0:index_80th]
test_data = shuffled_dataset.iloc[index_80th:]

poor_vs_all_model = create_model(settings, metrics)
standart_vs_all_model = create_model(settings, metrics)
good_vs_all_model = create_model(settings, metrics)


experiment_poor_vs_all = train_model('baseline', poor_vs_all_model, train_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), train_data['Is_Poor'].to_numpy(), settings)
test_metrics_poor_vs_all = evaluate_experiment(experiment_poor_vs_all, test_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), test_data['Is_Poor'].to_numpy())

experiment_standard_vs_all = train_model('baseline', poor_vs_all_model, train_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), train_data['Is_Standard'].to_numpy(), settings)
test_metrics_standard_vs_all = evaluate_experiment(experiment_poor_vs_all, test_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), test_data['Is_Standard'].to_numpy())

experiment_good_vs_all = train_model('baseline', poor_vs_all_model, train_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), train_data['Is_Good'].to_numpy(), settings)
test_metrics_good_vs_all = evaluate_experiment(experiment_poor_vs_all, test_data.drop(columns=['Is_Poor','Is_Standard','Is_Good']), test_data['Is_Good'].to_numpy())


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6047 - auc: 0.7404 - loss: 0.6099 - precision: 0.4131 - recall: 0.8381
Epoch 2/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7845 - auc: 0.8027 - loss: 0.4762 - precision: 0.6095 - recall: 0.7027
Epoch 3/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7899 - auc: 0.7997 - loss: 0.4746 - precision: 0.6310 - recall: 0.6626
Epoch 4/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7931 - auc: 0.8037 - loss: 0.4699 - precision: 0.6342 - recall: 0.6627
Epoch 5/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7899 - auc: 0.8009 - loss: 0.4726 - precision: 0.6300 - recall: 0.6535
Epoch 6/60
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7922 - auc: 0.8054 - loss: 0.4693 - precision