In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # matplotlib is for drawing graphs
import matplotlib.colors as colors
from sklearn.utils import resample # downsample the dataset
from sklearn.model_selection import train_test_split # split data into training and testing sets
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC # this will make a support vector machine for classificaiton
from sklearn.model_selection import GridSearchCV #this will do cross validation
from sklearn.metrics import confusion_matrix # this creates a confusion matrix
from sklearn.decomposition import PCA # to perform PCA to plot the data
from sklearn.metrics import precision_score, accuracy_score, recall_score
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc

In [2]:
df = pd.read_csv("default of credit card clients.txt",sep='\t',header=1)
df.drop('ID',axis=1,inplace=True)
df.rename(columns={'default payment next month': 'DEFAULT'}, inplace=True)
df_no_missing = df.loc[(df['MARRIAGE']!=0) & (df['EDUCATION']!=0)]
df_no_missing.head()
df_no_default = df_no_missing[df_no_missing['DEFAULT']==0]
df_default = df_no_missing[df_no_missing['DEFAULT']==1]
df_no_default_downsampled = resample(df_no_default,replace=False,n_samples=1000,random_state=42)
df_default_downsampled = resample(df_default,replace=False,n_samples=1000,random_state=42)
df_downsampled = pd.concat([df_no_default_downsampled,df_default_downsampled])
X = df_downsampled.drop(['DEFAULT','SEX','EDUCATION','MARRIAGE','PAY_0','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6'],axis=1).copy()
y = df_downsampled['DEFAULT'].copy()
#X_encoded = pd.get_dummies(X,columns=['SEX','EDUCATION','MARRIAGE','PAY_0','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']).astype(int)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
print("pre-proccessing");

pre-proccessing


In [47]:
print(f"Number of columns: {X_train_scaled.shape[1]}")
input_nodes_num = X_train_scaled.shape[1]
hidden_nodes_num = 8
output_nodes_num = 1
np.random.seed(42)


equation1 = r'$\frac{\partial \text{Ssr}}{\partial b} = \frac{\partial \text{Ssr}}{\partial \text{pred}} \cdot \frac{\partial \text{pred}}{\partial b}$'
equation2 = r'$\frac{\partial \text{Ssr}}{\partial \text{pred}} = \sum\frac{\partial (pred - real)^{2}}{\partial \text{pred}} = \sum2(pred - real)$'
equation3 = r'$\frac{\partial \text{pred}}{\partial \text{b}} = \frac{\partial sigmoid(out)}{\partial \text{b}} = \frac{d}{db}sigmoid(out)*1$'    
plotEquations(equation1,equation2,equation3)
equation1 = r'$\frac{\partial \text{Ssr}}{\partial hiddenOutWeights} = \frac{\partial \text{Ssr}}{\partial \text{pred}} \cdot \frac{\partial \text{pred}}{\partial hiddenOutWeights}$' 
equation2 = ''
equation3 = r'$\frac{\partial \text{pred}}{\partial \text{hiddenOutWeights}} =  \frac{d}{dhiddenOutWeights} \cdot sigmoid(out)\cdot hiddenNodes$'    
plotEquations(equation1,equation2,equation3)


Number of columns: 14


In [48]:
learning_rate = 0.01
#hidden-input layer wieghts and biases
input_hidden_weights = np.random.randn(input_nodes_num,hidden_nodes_num)
input_hidden_baises = np.random.randn(hidden_nodes_num)

#hidden-output layer wieghts and biases
hidden_out_weights = np.random.randn(hidden_nodes_num,output_nodes_num)
hidden_out_baises = 0

for epoch in range(10):
    i =0
    predicated_values = []
    hidden_layer_outputs =[]
    outputs_values =[]
    for i in range(0,len(X_train_scaled)):
        input_nodes = X_train_scaled[i]
        pred,hidden_nodes,output_node = forward(input_nodes,input_hidden_weights,hidden_out_weights,input_hidden_baises,hidden_out_baises)
        real = y_train.iloc[i]
        outputs_values.append(output_node)
        predicated_values.append(pred)
        hidden_layer_outputs.append(hidden_nodes)

    outputs_values = np.array(outputs_values)
    predicated_values = np.array(predicated_values)
    hidden_layer_outputs = np.array(hidden_layer_outputs)
    observed_values =  y_train.to_numpy()
    
    ssr = np.sum((predicated_values-observed_values)**2)

    dSSR_dpred = np.sum(2 * (predicated_values - observed_values))
    sigmoid_derivs = outputs_values*(1-outputs_values)
    
    d_output_weights = hidden_layer_outputs.T@(dSSR_dpred*sigmoid_derivs)
    d_output_biases = np.sum(dSSR_dpred * sigmoid_derivs, axis=0) 
    
    d_hidden_output_weights = np.array(X_train_scaled).T @ ( dSSR_dpred * sigmoid_derivs @ hidden_out_weights.T * (hidden_layer_outputs > 0))
    d_hidden_biases = np.sum(np.dot(dSSR_dpred * sigmoid_derivs, hidden_out_weights.T) * (hidden_layer_outputs > 0))

    input_hidden_weights -= learning_rate * d_hidden_output_weights
    input_hidden_baises -= learning_rate * d_hidden_biases
    
    hidden_out_baises -= learning_rate*d_output_biases
    hidden_out_weights -= learning_rate*d_output_weights
    
    print(f"hidden_out_baises : {hidden_out_baises}" )
    print(f"hidden_out_weights : {hidden_out_weights}" )
    print(f"ssr : {ssr}")
    #print(d_output_weights)
# print(hidden_out_baises)
# print(hidden_out_weights)

hidden_out_baises : [-7.87843366e+08]
hidden_out_weights : [[-2.55679387e+09]
 [-9.62873115e+09]
 [-1.49687039e+09]
 [-6.48673113e+09]
 [-8.20494602e+08]
 [-1.94840163e+08]
 [-2.98559777e+09]
 [-3.44067738e+09]]
ssr : 930261.8794070856
hidden_out_baises : [-4.44011009e+47]
hidden_out_weights : [[-1.53421409e+55]
 [-1.36331066e+58]
 [-6.26740872e+55]
 [-2.07041462e+58]
 [-8.51436668e+55]
 [-8.59086383e+56]
 [-1.26364517e+58]
 [-5.89230405e+57]]
ssr : 1135500.0
hidden_out_baises : [-1.46904197e+242]
hidden_out_weights : [[-1.51807501e+300]
 [-4.36108979e+301]
 [-1.89155093e+300]
 [-2.98688760e+301]
 [-1.72286154e+300]
 [-2.10692142e+300]
 [-1.45554224e+301]
 [-1.65399443e+301]]
ssr : 1135500.0
hidden_out_baises : [-inf]
hidden_out_weights : [[-inf]
 [ nan]
 [-inf]
 [ nan]
 [-inf]
 [-inf]
 [ nan]
 [ nan]]
ssr : 1135500.0


  return 1 / (1 + np.exp(-x))
  d_output_weights = hidden_layer_outputs.T@(dSSR_dpred*sigmoid_derivs)
  (np.dot(dSSR_dpred * sigmoid_derivs, hidden_out_weights.T) * (hidden_layer_outputs > 0)))
  d_hidden_biases = np.sum(np.dot(dSSR_dpred * sigmoid_derivs, hidden_out_weights.T) * (hidden_layer_outputs > 0))


hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan
hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan
hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan
hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan
hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan
hidden_out_baises : [nan]
hidden_out_weights : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
ssr : nan


In [5]:
def forward(input_nodes1,input_hidden_weights1,hidden_out_weights1,input_hidden_baises1,hidden_out_baises1):
    #initialize hidden nodes
    hidden_nodes1 = np.dot(input_nodes1,input_hidden_weights1)
    hidden_nodes1 = np.add(hidden_nodes1,input_hidden_baises1)
    hidden_nodes1 = relu(hidden_nodes1)
    #initialize out node
    output_node1 = np.dot(hidden_nodes1,hidden_out_weights1)
    output_node1 = np.add(output_node1,hidden_out_baises1)
    predictions1 = sigmoid(output_node1)
    return predictions1,hidden_nodes1,output_node1
    
def plotEquations(q1,q2,q3):
    import matplotlib.pyplot as plt

    fig, ax = plt.subplots()

    ax.axis('off')
    
    ax.text(0.5, 0.7, q1, fontsize=20, ha='center', va='center')
    
    ax.text(0.5, 0.5, q2, fontsize=20, ha='center', va='center')
    
    ax.text(0.5, 0.3, q3, fontsize=20, ha='center', va='center')
    
    plt.savefig('equations.png', bbox_inches='tight', pad_inches=0.1, dpi=300)
    plt.show()
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def relu(x):
    return np.maximum(0, x) 
def sigmoid_derivative(x):
    sigmoid_x = sigmoid(x)
    return sigmoid_x * (1 - sigmoid_x)