# Importing libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

!pip install tensorflow

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
import tensorflow as tf






# Reading data

In [5]:
df = pd.read_csv('MLF_GP1_CreditScore.csv')
df.head()

Unnamed: 0,Sales/Revenues,Gross Margin,EBITDA,EBITDA Margin,Net Income Before Extras,Total Debt,Net Debt,LT Debt,ST Debt,Cash,...,Interest Coverage,Total Liquidity,Current Liquidity,Current Liabilities,EPS Before Extras,PE,ROA,ROE,InvGrd,Rating
0,-0.005496,0.030763,0.018885,0.024515,0.146849,-0.02971,-0.019296,-0.042648,0.049875,-0.133716,...,0.136748,0.392143,-0.184887,0.062781,0.148305,0.100409,0.163266,0.102521,1,A1
1,-0.005496,0.030763,0.088716,0.094733,0.146849,-0.02971,-0.019296,-0.042648,0.049875,-0.133716,...,0.214657,0.392143,-0.184887,0.062781,0.148305,-0.089598,0.163266,0.102521,1,A1
2,-0.007045,0.023159,0.088716,0.09644,0.10859,0.03941,0.034268,0.009059,0.250371,0.101315,...,0.20529,0.483257,-0.017877,0.121357,0.110656,-0.045142,0.105711,0.103378,1,A1
3,-0.009396,0.0284,0.088716,0.099046,0.146137,0.030071,0.036938,-0.016964,0.356994,-0.052606,...,0.232991,0.996955,-0.122017,0.079051,0.151639,-0.008231,0.162421,0.132295,1,A1
4,-0.009009,0.027714,0.088716,0.098611,0.1235,0.024224,0.034445,-0.034132,0.461894,-0.090869,...,0.172906,1.711426,-0.161561,0.084319,0.130435,0.015528,0.156427,0.225144,1,A1


# Pre-Processing data

In [6]:
# Encode categorical variables
label_encoder = LabelEncoder()
df['InvGrd'] = label_encoder.fit_transform(df['InvGrd'])
df['Rating'] = label_encoder.fit_transform(df['Rating'])

# Standardise numerical variables
scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-2])
y_rating = df['Rating']
y_grade = df['InvGrd']

# Spliting Data in ratio 80:20

In [7]:
X_train, X_test, y_train_rating, y_test_rating, y_train_grade, y_test_grade = train_test_split(X, y_rating, y_grade, test_size=0.2, random_state=0)


# Linear regression using Ridge

The features in X_train and their target variables in y_train_grade are used in the code to build a ridge regression model with L1 smoothing to predict investment grade. The smoothing strength is controlled by the Alpha parameter set to 0.5. Stronger regularization is associated with higher alpha values, while weaker regularization is associated with lower alpha values. The goal is to determine the alpha value that balances the bias-variance tradeoff and produces the best results on the test set. Make predictions after model training on a test set, then evaluate model performance using various metrics, including accuracy scores, confusion matrices, and classification rates. The accuracy score indicates the probability of correct guesses, while the confusion matrix indicates the number of correct positive predictions, correct negative predictions, false positive predictions, and false negative predictions. For each class, the classification report includes additional data including accuracy, recall, and F1 score. To predict investment quality, the code implements a ridge regression model with L1 smoothing. The parameters are chosen so as to reach a compromise between the complexity of the model and the performance of the test set. 
After implementing the model, we get the Accuracy Score (L1): 0.7705882352941177

- Reference https://www.geeksforgeeks.org/implementation-of-ridge-regression-from-scratch-using-python/


In [8]:
reg_l1 = Ridge(alpha=0.5)
reg_l1.fit(X_train, y_train_grade)

y_pred_l1 = reg_l1.predict(X_test)

print("Accuracy Score (L1):", accuracy_score(y_test_grade, np.round(y_pred_l1)))
print("Confusion Matrix (L1):")
print(confusion_matrix(y_test_grade, np.round(y_pred_l1)))
print("Classification Report (L1):")
print(classification_report(y_test_grade, np.round(y_pred_l1)))

Accuracy Score (L1): 0.7705882352941177
Confusion Matrix (L1):
[[  1  74]
 [  4 261]]
Classification Report (L1):
              precision    recall  f1-score   support

           0       0.20      0.01      0.03        75
           1       0.78      0.98      0.87       265

    accuracy                           0.77       340
   macro avg       0.49      0.50      0.45       340
weighted avg       0.65      0.77      0.68       340



# Linear regression using Lasso

The code uses the features in X_train and the corresponding target variables in y_train_grade to construct an L2 smooth Lasso regression model to predict the investment grade. The smoothing strength is controlled by the Alpha parameter set to 0.5. Stronger regularization is associated with higher alpha values, while weaker regularization is associated with lower alpha values. The goal is to determine the alpha value that balances the bias-variance tradeoff and performs best on the test set. After the model is trained, predictions are made on the test set, and then model performance is evaluated using various metrics, including accuracy score, confusion matrix, and classification rate. The accuracy value gives the number of times the model was correct, while the confusion matrix gives the number of accurate and inaccurate predictions made by the model. For each class, the classification report includes additional data such as precision, recall, and F1 score. . To predict investment grade, the code implements a lasso regression model with L2 smoothing. Parameters are chosen to find the trade-off between model complexity and test set performance. It should be noted that L2 regularization is not commonly used with lasso patterns, in which case it is likely only used for the intercept term.
After implementing the model, we get the Accuracy Score (L2): 0.7794117647058824

- Reference https://www.geeksforgeeks.org/implementation-of-lasso-regression-from-scratch-using-python/


In [9]:
reg_l2 = Lasso(alpha=0.5)
reg_l2.fit(X_train, y_train_grade)

y_pred_l2 = reg_l2.predict(X_test)

print("Accuracy Score (L2):", accuracy_score(y_test_grade, np.round(y_pred_l2)))
print("Confusion Matrix (L2):")
print(confusion_matrix(y_test_grade, np.round(y_pred_l2)))
print("Classification Report (L2):")
print(classification_report(y_test_grade, np.round(y_pred_l2)))

Accuracy Score (L2): 0.7794117647058824
Confusion Matrix (L2):
[[  0  75]
 [  0 265]]
Classification Report (L2):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        75
           1       0.78      1.00      0.88       265

    accuracy                           0.78       340
   macro avg       0.39      0.50      0.44       340
weighted avg       0.61      0.78      0.68       340



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Logistic regression using Lasso

The code uses the characteristics in X_train and the associated target variables in y_train_grade to build a logistic regression model with L1 smoothing to predict investment grade. The model applies L1 regularization because the penalty parameter is set to "l1". The solver option is set to saga, which enables L1 regularization and is suitable for processing large amounts of data. To ensure that the search results can be replicated, the random_state option is set to 0. After training the model, predictions are made on the test set, and then the model's performance on the test set is evaluated using various metrics. including accuracy ratings, confusion matrices, and classification. reports. The accuracy value gives the number of times the model was correct, while the confusion matrix gives the number of accurate and inaccurate predictions made by the model. For each class, the classification report includes additional data such as including accuracy, recall, and F1 score. The parameters are chosen according to their applicability to the given situation, and the code as a whole implements a logistic regression model with L1 regularization to predict investment scores. The choice of the solver and random_state parameters ensures efficient and reproducible results, while the use of L1 regularization helps minimize the amount of features used in the model and improve its interpretability.
After implementing the model, we get the Accuracy Score (Logistic L1): 0.7676470588235295

- Reference https://stackoverflow.com/questions/41639557/how-to-perform-logistic-lasso-in-python

In [10]:
log_reg_l1 = LogisticRegression(penalty='l1', solver='saga', random_state=0)
log_reg_l1.fit(X_train, y_train_grade)

y_pred_log_l1 = log_reg_l1.predict(X_test)

print("Accuracy Score (Logistic L1):", accuracy_score(y_test_grade, y_pred_log_l1))
print("Confusion Matrix (Logistic L1):")
print(confusion_matrix(y_test_grade, y_pred_log_l1))
print("Classification Report (Logistic L1):")
print(classification_report(y_test_grade, y_pred_log_l1))

Accuracy Score (Logistic L1): 0.7676470588235295
Confusion Matrix (Logistic L1):
[[  1  74]
 [  5 260]]
Classification Report (Logistic L1):
              precision    recall  f1-score   support

           0       0.17      0.01      0.02        75
           1       0.78      0.98      0.87       265

    accuracy                           0.77       340
   macro avg       0.47      0.50      0.45       340
weighted avg       0.64      0.77      0.68       340





# Logistic regression using Ridge

The code uses the features in X_train and the associated target variable in y_train_grade to build a logistic regression model with L2 smoothing to predict investment grade. The model uses L2 regularization because the penalty parameter is set to "l2". The solver option is set to liblinear, a solver that supports L2 regularization for small to medium datasets. The reciprocal parameter C of the smoothing power is fixed at 0.1. Lower values of C produce more regularization. After training the model, generate predictions on the test set, then evaluate model performance using various metrics, including accuracy scores, confusion matrices, and classification rates. The confusion matrix presents the count of correct positive predictions, correct negative predictions, incorrect positive predictions, and incorrect negative predictions, while the accuracy score displays the proportion of correct predictions as a percentage. For each class, the classification report includes additional data such as precision, recall, and F1 score. Parameters are chosen based on their applicability to the specified business, and the code as a whole implements a logistic regression model with L2 regularization to predict investment quality. While the choice of solver and C parameters provides effective and efficient regularization, the use of L2 regularization helps prevent overfitting and improves the generalizability of the model.
The accuracy score is the proportion of correct predictions, while the confusion matrix shows the number of true positives, true negatives, false positives, and false negatives.
After implementing the model, we get the Accuracy Score (Logistic L1): 0.7676470588235295

- Reference https://machinelearningmastery.com/ridge-regression-with-python/

In [11]:
logreg = LogisticRegression(penalty='l2', solver='liblinear', C=0.1)

logreg.fit(X_train, y_train_grade)

y_pred_log_l2 = logreg.predict(X_test)

print("Accuracy Score (Logistic L1):", accuracy_score(y_test_grade, y_pred_log_l2))
print("Confusion Matrix (Logistic L1):")
print(confusion_matrix(y_test_grade, y_pred_log_l2))
print("Classification Report (Logistic L1):")
print(classification_report(y_test_grade, y_pred_log_l2))

Accuracy Score (Logistic L1): 0.7676470588235295
Confusion Matrix (Logistic L1):
[[  1  74]
 [  5 260]]
Classification Report (Logistic L1):
              precision    recall  f1-score   support

           0       0.17      0.01      0.02        75
           1       0.78      0.98      0.87       265

    accuracy                           0.77       340
   macro avg       0.47      0.50      0.45       340
weighted avg       0.64      0.77      0.68       340



# Neural network for rating

The code uses the Keras package to create a neural network model for credit rating classification. Three dense layers compose the model architecture, the input layer has 64 nodes with "relu" activation, the second layer has 32 nodes with "relu" and the output layer has 16 nodes with "softmax" activation. To avoid overfitting, a suppression layer is also placed after the input layer and the second masked layer. The "adam" optimizer and the "sparse_categorical_crossentropy" loss function are used to build the model. A loss function is used to determine the difference between predicted and actual values, which is minimized by the optimizer during training. To monitor model performance during training, the Metrics option is set to Accuracy. The model is then trained using the training data for 1000 epochs and a batch size of 32. Validation data is also provided to monitor the performance of the model during training on data it has never seen before. After the model is trained, its accuracy is measured using the test set. Then use the prediction function to predict the credit value of the test set. The predicted probabilities are converted to their respective credit ratings using a label encoder. In summary, the code uses the Keras library to build a neural network model to rank credit scores, choosing parameters based on how well they solve the problem at hand. Choose model architectures, optimizers, loss functions, and metrics to improve model performance on credit classification problems.
After implementing the model, we get the Accuracy for credit rating classification: 41.18%.

- Reference https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65


In [13]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# define the model architecture
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='softmax'))

# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train the model
model.fit(X_train, y_train_rating, epochs=1000, batch_size=32, validation_data=(X_test, y_test_rating))

# evaluate the model for credit rating classification
_, accuracy = model.evaluate(X_test, y_test_rating)
print('Accuracy for credit rating classification: {:.2f}%'.format(accuracy*100))

# predict the credit ratings for the test set
y_pred_rating = model.predict(X_test)
y_pred_rating = label_encoder.inverse_transform(y_pred_rating.argmax(axis=1))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

# Neural network for grading

The code defines and trains a neural network model to classify whether a company's credit rating is investment grade. The first layer of the model architecture consists of 64 neurons with ReLU activation function and accepts input data in the same form as training data, consisting of three thick layers. The 32 second-layer neurons have a ReLU activation function with a dropout rate of 0.2, while one third-layer neuron has a sigmoid activation function. Accuracy is chosen as the evaluation metric for the model built using the Adam optimizer and the binary cross-entropy loss function. After validating the model on the training data for 1000 epochs with a batch size of 32, the model is tested on the test data. Evaluate and print the correctness of the model on the test data. Predict asset class membership probabilities, threshold using a decision limit of 0.5, and generate asset class predictions for the test set. Overall, this code demonstrates how to use the Keras library and a neural network model for binary classification of investment-grade credit ratings. The parameters used are activation function, optimizer, loss function, stack size, number of epochs and number of neurons in each layer.
After implementing the model, we get the Accuracy for investment grade classification: 83.53%

- Reference  https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65


In [12]:
# define the model architecture
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# train the model
model.fit(X_train, y_train_grade, epochs=1000, batch_size=32, validation_data=(X_test, y_test_grade))
# evaluate the model for investment grade classification
_, accuracy = model.evaluate(X_test, y_test_grade)
print('Accuracy for investment grade classification: {:.2f}%'.format(accuracy*100))

# predict the investment grade for the test set
y_pred_grade = model.predict(X_test)
y_pred_grade = (y_pred_grade > 0.5).astype(int)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E