# Neural networks - Bank marketing campaign

In [None]:
0.64-0.1*2*0.64

0.512

Too high learning rate may lead to convergence loss.

In [None]:
w = 1
eta = 1.1
n_iter = 100

for i in range(n_iter):
  w = w - eta * 2 * w  

print(w)




82817974.5220158


In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

df = pd.read_csv('/content/bank_mark_campaign.csv', sep=';')

df = df.replace('unknown', np.nan) 

col_nan = df.columns[df.isna().any(axis=0)].to_list()
col_num = df.describe().columns.to_list()
df.columns.difference(col_nan + col_num)
col_cat = df.columns.difference(col_nan + col_num + ['y']).to_list()

na_treat = Pipeline([
    ('imp', SimpleImputer(strategy='most_frequent')),
    ('oneh', OneHotEncoder(drop='first'))])

preprocessor = ColumnTransformer([
    ('na_tr', na_treat, col_nan),
    ('cat_tr', OneHotEncoder(drop='first'), col_cat),
    ('scale_tr', StandardScaler(), col_num)], 
    remainder='passthrough')

# hyper = {
#     'ccp_alpha': [0.001, 0.01, 0.1, 0.2, 0.5]
# }

# pipe = Pipeline([
#     ('pre', preprocessor),
#     ('grid', GridSearchCV(DecisionTreeClassifier(class_weight="balanced"), hyper, cv=5, scoring='roc_auc'))])

pipe = Pipeline([
    ('pre', preprocessor),
    ('nn', MLPClassifier(hidden_layer_sizes=(5, 3)))]) # using 2 hidden layers with 5 and 3 neuron, respectively


X = df.drop('y', axis=1)
y = df['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

pipe.fit(X_train, y_train)

# train
y_pred = pipe.predict(X_train)

acur = accuracy_score(y_train, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_train, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_train, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_train, y_pred, pos_label='yes')
print(f'Recall= {recall}')

# test
y_pred = pipe.predict(X_test)

acur = accuracy_score(y_test, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_test, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_test, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_test, y_pred, pos_label='yes')
print(f'Recall= {recall}')


Accuracy= 0.9177541729893779
[[ 1856  1844]
 [  866 28384]]
Precision= 0.6818515797207936
Recall= 0.5016216216216216
Accuracy= 0.9113862588006798
[[ 451  489]
 [ 241 7057]]
Precision= 0.6517341040462428
Recall= 0.4797872340425532




*   Since the classes are too imblanced (the yes one is very under-represented), we need to compensate for that.
*   The neural networks tend to overfit, and we need to use a sort of Lasso regularization to avoid overfitting.



In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier


df = pd.read_csv('/content/bank_mark_campaign.csv', sep=';')

df = df.replace('unknown', np.nan) 

col_nan = df.columns[df.isna().any(axis=0)].to_list()
col_num = df.describe().columns.to_list()
df.columns.difference(col_nan + col_num)
col_cat = df.columns.difference(col_nan + col_num + ['y']).to_list()

na_treat = Pipeline([
    ('imp', SimpleImputer(strategy='most_frequent')),
    ('oneh', OneHotEncoder(drop='first'))])

preprocessor = ColumnTransformer([
    ('na_tr', na_treat, col_nan),
    ('cat_tr', OneHotEncoder(drop='first'), col_cat),
    ('scale_tr', StandardScaler(), col_num)], 
    remainder='passthrough')

hyper = {
    'alpha': [0.0001, 0.01, 0.2]
}

pipe = Pipeline([
    ('pre', preprocessor),
    ('grid', GridSearchCV(MLPClassifier(hidden_layer_sizes=(5, 3)), hyper, cv=5, scoring='roc_auc'))])

# pipe = Pipeline([
#     ('pre', preprocessor),
#     ('nn', MLPClassifier(hidden_layer_sizes=(5, 3)))]) # using 2 hidden layers with 5 and 3 neuron, respectively


X = df.drop('y', axis=1)
y = df['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

pipe.fit(X_train, y_train)

# train
y_pred = pipe.predict(X_train)

acur = accuracy_score(y_train, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_train, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_train, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_train, y_pred, pos_label='yes')
print(f'Recall= {recall}')

# test
y_pred = pipe.predict(X_test)

acur = accuracy_score(y_test, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_test, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_test, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_test, y_pred, pos_label='yes')
print(f'Recall= {recall}')


Accuracy= 0.919453717754173
[[ 1993  1707]
 [  947 28303]]
Precision= 0.677891156462585
Recall= 0.5386486486486487
Accuracy= 0.9136926438455936
[[ 480  460]
 [ 251 7047]]
Precision= 0.6566347469220246
Recall= 0.5106382978723404


In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import RandomOverSampler


df = pd.read_csv('/content/bank_mark_campaign.csv', sep=';')

df = df.replace('unknown', np.nan) 

col_nan = df.columns[df.isna().any(axis=0)].to_list()
col_num = df.describe().columns.to_list()
df.columns.difference(col_nan + col_num)
col_cat = df.columns.difference(col_nan + col_num + ['y']).to_list()

na_treat = Pipeline([
    ('imp', SimpleImputer(strategy='most_frequent')),
    ('oneh', OneHotEncoder(drop='first'))])

preprocessor = ColumnTransformer([
    ('na_tr', na_treat, col_nan),
    ('cat_tr', OneHotEncoder(drop='first'), col_cat),
    ('scale_tr', StandardScaler(), col_num)], 
    remainder='passthrough')

hyper = {
    'alpha': [0.0001, 0.01, 0.2]
}

pipe = Pipeline([
    # ('pre', preprocessor),
    ('grid', GridSearchCV(MLPClassifier(hidden_layer_sizes=(5, 3)), hyper, cv=5, scoring='roc_auc'))])

X = df.drop('y', axis=1)
y = df['y']

X = preprocessor.fit_transform(X) # this transforms the columns outside the pipeline

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

ros = RandomOverSampler(random_state=45) # this creates the sampler

X_train_res, y_train_res = ros.fit_resample(X_train, y_train) # this does the oversmapling on the training set

pipe.fit(X_train_res, y_train_res) # this applies the model to the resampled training set

# train
y_pred = pipe.predict(X_train)

acur = accuracy_score(y_train, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_train, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_train, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_train, y_pred, pos_label='yes')
print(f'Recall= {recall}')

# test
y_pred = pipe.predict(X_test)

acur = accuracy_score(y_test, y_pred)
print(f'Accuracy= {acur}')
cm = confusion_matrix(y_test, y_pred, labels=['yes', 'no'])
print(cm)
precision = precision_score(y_test, y_pred, pos_label='yes')
print(f'Precision= {precision}')
recall = recall_score(y_test, y_pred, pos_label='yes')
print(f'Recall= {recall}')


Accuracy= 0.8612443095599392
[[ 3440   260]
 [ 4312 24938]]
Precision= 0.4437564499484004
Recall= 0.9297297297297298
Accuracy= 0.8550619082301529
[[ 852   88]
 [1106 6192]]
Precision= 0.4351378958120531
Recall= 0.9063829787234042


In [None]:
852 / (852+1106)

0.4351378958120531

# Neural networks - Data car radios

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

import datetime

df = pd.read_excel('/content/data_carradios.xlsx')

def get_ages(col):
  result = (datetime.datetime.now()-col).astype('<m8[Y]')
  result = pd.DataFrame(result)
  return result

ager = Pipeline([
    ('ages', FunctionTransformer(get_ages, feature_names_out='one-to-one')),
    ('scale', StandardScaler())
])

def get_weekdays(col):
  result = col.iloc[:,0].dt.weekday
  result = pd.DataFrame(result)
  return result

weeker = Pipeline([
    ('weekd', FunctionTransformer(get_weekdays, feature_names_out='one-to-one')),
    ('oneh', OneHotEncoder(drop='first'))
])

preprocessor = ColumnTransformer([
    ('ages_tr', ager, ['bdate']),
    ('weekd_tr', weeker, ['datep']),
    ('team_tr', OneHotEncoder(drop='first'), ['team']),
    ('scaler', StandardScaler(), ['prized', 'prizeq'])],
    remainder='passthrough')

X = df.drop('perc_defec', axis=1)
y = df['perc_defec']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)^

hyper = {
    'alpha': [],
    'hidden_layer_sizes': [(5, 3), ]
}

pipe = Pipeline([
    ('pre', preprocessor),
    ('nn', MLPRegressor(max_iter=10000))])

pipe.fit(X_train, y_train)

# train
y_pred = pipe.predict(X_train)

mae = mean_absolute_error(y_train, y_pred)
rsme = mean_squared_error(y_train, y_pred, squared=False)
r2 = r2_score(y_train, y_pred)

print(f'MAE= {mae}')
print(f'RSME= {rsme}')
print(f'R2= {r2}')

# test
y_pred = pipe.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rsme = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f'MAE= {mae}')
print(f'RSME= {rsme}')
print(f'R2= {r2}')

MAE= 2.9050785626515365
RSME= 3.858558520768176
R2= 0.9345277886097233
MAE= 3.7638325037406144
RSME= 5.078833638588232
R2= 0.8904468028945378
