# Machine Learning Algorithms

## Pre-processing

### Import Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support as score

df = pd.read_csv('Personal_Loans.csv')

### Pre-process Data

In [2]:
# Regression labels
y_r = df['Loan Size']

# Classification labels
y_c = df['Personal Loan']

# Features
X = df.drop(['Loan Size', 'Personal Loan'], axis=1)

In [3]:
# Transform data
X_transformed = pd.get_dummies(X, drop_first=True)

In [4]:
# Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_transformed)
X_standardize = pd.DataFrame(X_scaled,columns=X_transformed.columns)

In [5]:
# Regression train/test split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_standardize, y_r, test_size=0.3, random_state=101)

# Classification train/test split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_standardize, y_c, test_size=0.3, random_state=101)

### Intercept

In [6]:
def lin_reg_intercept(X_train, y_train):
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    return  round(float(lr.intercept_), 3)

In [7]:
lin_reg_intercept(X_train_r, y_train_r)

3.113

### Coefficients greater than zero LASSO

In [8]:
def lasso_predictors(X_train, y_train, alpha):
    las = Lasso(alpha = alpha)
    las.fit(X_train, y_train)
    return len(las.coef_ > 0)

In [9]:
lasso_predictors(X_train_r, y_train_r, 0.001)

49

### Mean squared error KNN

In [10]:
def knn_mse(X_train, y_train, X_test, y_test, k):
    knr = KNeighborsRegressor(k)
    knr.fit(X_train, y_train)
    predictions = knr.predict(X_test)
    return round(mean_squared_error(y_test, predictions), 3)

In [11]:
knn_mse(X_train_r, y_train_r, X_test_r, y_test_r, 3)

85.286

### Mean squared error

In [12]:
def forest_mse(X_train, y_train, X_test, y_test, random_state, no_trees, max_depth):
    rf = RandomForestRegressor(n_estimators = no_trees, max_depth = max_depth, random_state = random_state)
    rf.fit(X_train, y_train)
    predictions = rf.predict(X_test)
    return round(mean_squared_error(y_test, predictions), 3)

In [13]:
forest_mse(X_train_r, y_train_r, X_test_r, y_test_r, 23, 100, 3)

12.854

### Mean squared error of specific model

In [14]:
def choose_model(X_train, y_train, X_test, y_test, model):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return round(mean_squared_error(y_test, predictions), 3)

In [15]:
choose_model(X_train_r, y_train_r, X_test_r, y_test_r, RandomForestRegressor())

7.688

### False negatives Logistic Regression

In [16]:
def log_reg_fn(X_train, y_train, X_test, y_test):
    lor = LogisticRegression()
    lor.fit(X_train, y_train)
    predictions = lor.predict(X_test)
    return confusion_matrix(y_test, predictions)[1][0]

In [17]:
log_reg_fn(X_train_c, y_train_c, X_test_c, y_test_c)

53

### True positives DTC

In [18]:
def decision_tree_tp(X_train, y_train, X_test, y_test, max_depth, random_state):
    dt = DecisionTreeClassifier(max_depth = max_depth, random_state = random_state)
    dt.fit(X_train, y_train)
    predictions = dt.predict(X_test)
    return confusion_matrix(y_test, predictions)[1][1]

In [19]:
decision_tree_tp(X_train_c, y_train_c, X_test_c, y_test_c, 3, 100)

135

### Weighted average precision

In [20]:
def svm_precision(X_train, y_train, X_test, y_test, kernel, C, gamma, random_state):
    svc = SVC(kernel = kernel, gamma = gamma, random_state = random_state)
    svc.fit(X_train, y_train)
    predictions = svc.predict(X_test)
    return round(score(y_test, predictions, average = 'weighted')[0], 3)

In [21]:
svm_precision(X_train_c, y_train_c, X_test_c, y_test_c, 'rbf', 1, 0.1, 23)

0.942

### Weighted average f1-score

In [22]:
def model_f1_score(X_train, y_train, X_test, y_test, model):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return round(score(y_test, predictions, average = 'weighted')[2], 3)

In [23]:
model_f1_score(X_train_c, y_train_c, X_test_c, y_test_c, SVC())

0.944

### Weighted average of specific metric 

In [24]:
def model_metric_output(X_train, y_train, X_test, y_test, model, metric):
    metric_dic = {'Precision': 0, 'Recall': 1, 'F1_score': 2}
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return round(score(y_test, predictions, average = 'weighted')[metric_dic[metric]], 3)

In [25]:
model_metric_output(X_train_c, y_train_c, X_test_c, y_test_c, SVC(), 'F1_score')

0.944