In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from imblearn.over_sampling import RandomOverSampler
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline



In [None]:
def clean_data(data):
  '''
    This function will remove unwanted features which are not required during modelling
  '''
  data.drop("employ", inplace=True, axis=1)
  data.drop("ed", inplace=True, axis=1)
  data.drop("age", inplace=True, axis=1)
  data.drop("address", inplace=True, axis=1)
  data.drop("othdebt", inplace=True, axis=1)

  return data


In [None]:
def missing_values(data):

  '''
    THis function will fill the missing values of data in the dataset
    Categorical type values will be filled using mode
    Numerical type values will be filled using median
  '''
  data["income"]=data["income"].fillna(data["income"].median())
  data["debtinc"]=data["debtinc"].fillna(data["debtinc"].median())
  data["creddebt"]=data["creddebt"].fillna(data["creddebt"].median())
  data["savings"]=data["savings"].fillna(data["savings"].median())
  data["default"]=data["default"].fillna(data["default"].mode()[0])

  return data

In [None]:
def feature_scaling(X_train):
  ''' 
    Nomralisation:the data is scaled to a fixed range - usually 0 to 1
    Standardization: the features will be rescaled so that they’ll have the properties of a standard normal distribution with 
                      μ=0 and σ=1
    This function will be used for normalising the values so that one value does not have any greater effect on the model training.

    X_train: 2-D numpy array, size=(length of dataset, number of features)
   
  '''
  min_max_scaler = preprocessing.MinMaxScaler()
  X_train_minmax = min_max_scaler.fit_transform(X_train)

  return X_train_minmax





In [None]:
def preprocessing_data(data):
  '''
    X: features
    y: label
  '''
  data_cleaned=clean_data(data)
  # data_filled=missing_values(data_cleaned)

  X=data_cleaned.iloc[:700,:4]
  y= data_cleaned.iloc[:700,4]

  #dealing with imbalanced data
  oversample = RandomOverSampler(random_state=0)
  X, y = oversample.fit_resample(X, y)

  #dividing the data into training and test data
  X_train, X_test, y_train, y_test = train_test_split(X, y)

  
  #feature scaling
  # X_trainr=feature_scaling(X_train)
  # X_test=feature_scaling(X_test)

  return X_train, X_test, y_train, y_test

In [None]:
def wrapper():
  ''' 
    This function will implement all other functions 
  '''
  df=pd.read_csv("/content/drive/MyDrive/Amex Codestreet/bankloans.csv")

  print(df["default"].value_counts(normalize=True))
  X_train, X_test, y_train, y_test= preprocessing_data(df)
  


  return X_train, X_test, y_train, y_test

## **Modelling and Training**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,roc_auc_score,cohen_kappa_score,confusion_matrix,roc_curve,balanced_accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

In [None]:
def logistic_regression(X_train, X_test, y_train, y_test):
  pipe = Pipeline([('scaler', preprocessing.MinMaxScaler()), ('lr',LogisticRegression())])
  # clf = LogisticRegression(random_state=0).fit(X_train, y_train)
  # print("Training Accuracy:", clf.score(X_train, y_train))
  # print("Validation Accuracy:", accuracy_score(y_test, clf.predict(X_test)))

  # parameters = {'C':[0.0001, 0.001, 0.01, 0.1, 1], 'solver':["newton-cg", "lbfgs", "liblinear", "sag", "saga"],'penalty':["l1", "l2", "elasticnet", "none"]}
  parameters = {'lr__C':[0.0000001,0.0001, 0.001, 0.01, 0.1, 1],'lr__solver':["newton-cg", "lbfgs"],'lr__max_iter':[10000]}
  grid_search = GridSearchCV(pipe, parameters, scoring='accuracy')
  grid_fit = grid_search.fit(X_train, y_train)

  best_par = grid_search.best_params_
  print('Best params:', best_par) 
  
  best_dt = grid_fit.best_estimator_

  print(classification_report(y_test, best_dt.predict(X_test)))

  



## Getting training and testing dataset

In [None]:
#calling wrapper function
X_train, X_test, y_train, y_test= wrapper()
print(X_train, X_test, y_train
      , y_test)

''' PIEPLINE'''
# pipe = make_pipeline(MinMaxScaler(), LogisticRegression(C=8,random_state=7,fit_intercept=True))

0.0    0.738571
1.0    0.261429
Name: default, dtype: float64
[[6.700000e+01 1.120000e+01 5.245296e+00 3.000000e+05]
 [7.000000e+01 1.200000e+01 4.334400e+00 3.000000e+04]
 [2.500000e+01 1.380000e+01 1.976850e+00 5.000000e+04]
 ...
 [2.400000e+01 1.560000e+01 1.636128e+00 3.700000e+05]
 [3.900000e+01 1.610000e+01 1.701609e+00 3.600000e+05]
 [9.100000e+01 2.520000e+01 2.316132e+00 3.000000e+04]] [[2.200000e+01 1.500000e+01 1.970100e+00 2.200000e+05]
 [4.300000e+01 1.320000e+01 3.042336e+00 2.000000e+05]
 [1.130000e+02 1.200000e+01 3.376440e+00 3.000000e+05]
 ...
 [6.700000e+01 1.320000e+01 3.741012e+00 6.000000e+04]
 [2.700000e+01 1.050000e+01 2.472120e+00 4.700000e+05]
 [1.130000e+02 2.600000e+00 9.871680e-01 1.400000e+05]] [0. 0. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 1. 0.
 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0.
 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1



' PIEPLINE'

### **LOGISTIC REGRESSION CODE**

In [None]:
logistic_regression(X_train, X_test, y_train, y_test)

Best params: {'lr__C': 1, 'lr__max_iter': 10000, 'lr__solver': 'newton-cg'}
              precision    recall  f1-score   support

         0.0       0.76      0.69      0.72       139
         1.0       0.68      0.75      0.71       120

    accuracy                           0.72       259
   macro avg       0.72      0.72      0.72       259
weighted avg       0.72      0.72      0.72       259



### SVM

In [None]:
from sklearn.svm import SVC
def support_vector(X_train, X_test, y_train, y_test):
  pipe = Pipeline([('scaler', preprocessing.MinMaxScaler()), ('svc', SVC())])
  # clf = SVC().fit(X_train, y_train)
  # print("Training Accuracy:", clf.score(X_train, y_train))
  # print("Validation Accuracy:", accuracy_score(y_test, clf.predict(X_test)))

  # parameters = {'C':[0.0001, 0.001, 0.01, 0.1, 1], 'solver':["newton-cg", "lbfgs", "liblinear", "sag", "saga"],'penalty':["l1", "l2", "elasticnet", "none"]}
  parameters={'svc__C': [0.001,0.01,0.1, 1, 10, 100], 'svc__gamma':['scale', 'auto'], 'svc__kernel': ['linear','rbf','poly', 'sigmoid'], 'svc__degree':[1,2,3,4]}
  grid_search = GridSearchCV(pipe, parameters, scoring='accuracy')
  grid_fit = grid_search.fit(X_train, y_train)

  best_par = grid_search.best_params_
  print('Best params:', best_par) 
  
  best_dt = grid_fit.best_estimator_

  print(classification_report(y_test, best_dt.predict(X_test)))


In [None]:
support_vector(X_train, X_test, y_train, y_test)

Best params: {'svc__C': 10, 'svc__degree': 1, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}
              precision    recall  f1-score   support

         0.0       0.75      0.68      0.72       139
         1.0       0.67      0.74      0.70       120

    accuracy                           0.71       259
   macro avg       0.71      0.71      0.71       259
weighted avg       0.71      0.71      0.71       259



###**MLP**

In [None]:
from sklearn.neural_network import MLPClassifier
def multi_layer_perceptron(X_train, X_test, y_train, y_test):
  pipe = Pipeline([('scaler', preprocessing.MinMaxScaler()), ('mlp', MLPClassifier())])
  # clf = MLPClassifier().fit(X_train, y_train)
  # print("Training Accuracy:", clf.score(X_train, y_train))
  # print("Validation Accuracy:", accuracy_score(y_test, clf.predict(X_test)))

  
  parameters={'mlp__max_iter':[5000,1000,2000,3000], 'mlp__activation':['identity', 'logistic', 'tanh', 'relu'], 'mlp__hidden_layer_sizes':[(400,20)]}
  grid_search = GridSearchCV(pipe, parameters, scoring='accuracy')
  grid_fit = grid_search.fit(X_train, y_train)

  best_par = grid_search.best_params_
  print('Best params:', best_par) 
  
  best_dt = grid_fit.best_estimator_

  print(classification_report(y_test, best_dt.predict(X_test)))

In [None]:
multi_layer_perceptron(X_train, X_test, y_train, y_test)

Best params: {'mlp__activation': 'relu', 'mlp__hidden_layer_sizes': (400, 20), 'mlp__max_iter': 3000}
              precision    recall  f1-score   support

         0.0       0.78      0.65      0.71       139
         1.0       0.66      0.79      0.72       120

    accuracy                           0.71       259
   macro avg       0.72      0.72      0.71       259
weighted avg       0.73      0.71      0.71       259



In [None]:
pipe = Pipeline([('scaler', preprocessing.MinMaxScaler()), ('mlp', MLPClassifier(activation= 'relu', hidden_layer_sizes= (400, 20), max_iter= 2000))])


In [None]:
pipe.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('mlp',
                 MLPClassifier(activation='relu', alpha=0.0001,
                               batch_size='auto', beta_1=0.9, beta_2=0.999,
                               early_stopping=False, epsilon=1e-08,
                               hidden_layer_sizes=(400, 20),
                               learning_rate='constant',
                               learning_rate_init=0.001, max_fun=15000,
                               max_iter=2000, momentum=0.9, n_iter_no_change=10,
                               nesterovs_momentum=True, power_t=0.5,
                               random_state=None, shuffle=True, solver='adam',
                               tol=0.0001, validation_fraction=0.1,
                               verbose=False, warm_start=False))],
         verbose=False)

In [None]:
pipe.score(X_test, y_test)

0.7297297297297297

In [None]:
x=pipe.predict_proba([[21,12.9,1.3156,200000]])


TypeError: ignored

In [None]:
import pickle 
filename = 'final_model.sav'
pickle.dump(pipe, open(filename, 'wb')) 

### **Optimizing values and calculating financial plans** 

for savings to spending ratio , normalised savings, debt to income ratio

###Using CVXPY Framework

Links to refer:

https://towardsdatascience.com/optimization-with-python-how-to-make-the-most-amount-of-money-with-the-least-amount-of-risk-1ebebf5b2f29


https://www.cvxpy.org/examples/applications/max_entropy.html

https://colab.research.google.com/github/cvxgrp/cvx_short_course/blob/master/applications/worst_case_analysis.ipynb



Steps to follow:
1. Define constraints
2. Define Minimizing equation
3. Use framework

Constraints:

spendings < income

savings > spending

savings>=income



savings > debt



In [None]:
import cvxpy as cp


In [None]:
import mosek

In [None]:

def optimize_values(saving,debt,spending,income,minimum):
  #please include minimum_amount as function parameter
  x1=cp.Variable(pos=True)
  x2=cp.Variable(pos=True)
  
  
  constraint1=[saving*x1>=0,
               saving*x1<=income,
                saving*x1>=spending,
              
                x1>=0
                
  ]
  constraint2=[
               spending*x2+ +saving*x1<=income,
               x2>=0.5,
               spending*x1>=minimum_amount
               x2<=1
  ]

 
  objective_fn1=x1
  objective_fn2=x2
  

  problem3 = cp.Problem(cp.Maximize(objective_fn1),constraint1)
  problem3.solve(verbose=True)

  problem4 = cp.Problem(cp.Minimize(objective_fn2),constraint2)
  problem4.solve()
  assert problem3.is_dqcp()
  assert problem4.is_dqcp()
 
  
  return x1.value, x2.value


In [None]:
print(cp.installed_solvers())
saving=152474
income=2100000
debt=12.9*income
spending=saving/10.316
print("current spending", spending)
x,y=optimize_values(saving,debt,spending,income)  

['CVXOPT', 'ECOS', 'ECOS_BB', 'GLPK', 'GLPK_MI', 'MOSEK', 'OSQP', 'SCS']
current spending 14780.341217526171

ECOS 2.0.7 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  -4.472e+00  -5.349e+03  +8e+03  2e-06  4e-01  1e+00  1e+03    ---    ---    1  1  - |  -  - 
 1  -4.482e+00  -6.366e+01  +9e+01  2e-08  2e-02  1e-01  1e+01  0.9890  1e-04   0  0  0 |  0  0
 2  -5.288e+00  -9.344e+00  +6e+00  1e-09  1e-03  2e-02  9e-01  0.9353  2e-04   0  0  0 |  0  0
 3  -1.363e+01  -1.393e+01  +5e-01  1e-10  2e-04  8e-02  9e-02  0.9853  4e-02   0  0  0 |  0  0
 4  -1.377e+01  -1.377e+01  +6e-03  1e-12  3e-06  1e-03  1e-03  0.9890  1e-04   0  0  0 |  0  0
 5  -1.377e+01  -1.377e+01  +6e-05  2e-14  3e-08  1e-05  1e-05  0.9890  1e-04   0  0  0 |  0  0
 6  -1.377e+01  -1.377e+01  +7e-07  2e-16  3e-10  1e-07  1e-07  0.9890  1e-04   0  0  0 |  0  0
 7  -1.377e+01  -1.377e+01  +8e-0

In [None]:
print("Future Saving Required",saving*x)
print("Future Spending Required",spending*y)

Future Saving Required 271714.9591750198
Future Spending Required 7390.170609119673
