In [14]:
# same step to LVQ/KNN
#scikit-learn, matplotlib
#pip install sklvq

import pandas as pd
df = pd.read_csv('bank-additional-full.csv', sep= ';')

# input missing values?
#df['Item_Weight'].fillna(mean, inplace =True)

df = df[['age', 'job','marital','education','default','housing', 'loan', 'y']].dropna()
df.replace({'yes': 0, 'no': 1}, inplace=True)

from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size = 0.6, shuffle=True)

x_train = pd.get_dummies(train, prefix=['job','marital','education','default','housing', 'loan'])
y_train = x_train["y"]
x_train.drop("y", axis=1, inplace=True)

x_test = pd.get_dummies(test, prefix=['job','marital','education','default','housing', 'loan'])
y_test = x_test["y"]
x_test.drop("y", axis=1, inplace=True)

# normalização
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

x_train_scaled = scaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train_scaled)

x_test_scaled = scaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test_scaled)


In [15]:
from sklearn.preprocessing import StandardScaler
from sklvq import GLVQ

# Sklearn's standardscaler to perform z-transform
#scaler = StandardScaler()

# Compute (fit) and apply (transform) z-transform
#df1 = scaler.fit_transform(df)

# The creation of the model object used to fit the data to.
model = GLVQ(
    distance_type="squared-euclidean",
    activation_type="swish",
    activation_params={"beta": 2},
    solver_type="steepest-gradient-descent",
    solver_params={"max_runs": 20, "step_size": 0.1},
)

In [16]:
from sklearn.metrics import classification_report

# Train the model using the iris dataset
model.fit(x_train, y_train)

# Predict the labels using the trained model
predicted_labels = model.predict(x_test)

# To get a sense of the training performance we could print the classification report.
print(classification_report(y_test, predicted_labels))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2760
           1       0.89      1.00      0.94     21953

    accuracy                           0.89     24713
   macro avg       0.44      0.50      0.47     24713
weighted avg       0.79      0.89      0.84     24713



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
# Implementação antiga:
#LVQ - código em https://towardsdatascience.com/learning-vector-quantization-ed825f8c807d
import numpy as np

#train_lvq: trains an lvq system using the given training data corresponding labels
#Run the desired number of epochs using the given learning rate. Optional validation set to monitor performance.

def train_lvq(data, labels, num_epochs, learning_rate, validation_data=None, validation_labels=None):
  #Get unique class labels
  num_dims = data.shape[1] #número de dimensões
  labels=labels.astype(int)
  unique_labels =list(set(labels))

  num_protos = len(unique_labels) #tamanho do grupo
  prototypes = np.empty((num_protos, num_dims)) #matriz com o tamanho da labels unicas e número de dimensões
  proto_labels = []

  #input_data = data.drop("y", axis=1, inplace=False)
  #input_validation_data = validation_data.drop("y", axis=1, inplace=False)

  #initialize prototypes using class means - o protótipo é inicializado com a media da classe
  for i in unique_labels:
    #class_data = data[labels ==i,:]
    class_data=list(map(lambda s: s[0], filter(lambda s: s[1] == i, zip(data.values,labels))))

    #compute class mean
    #mean = np.mean(class_data, axis=0)
    mean = np.mean(class_data, axis=0)
    print(i, mean)

    prototypes[i] = mean
    proto_labels.append(i)


  #Loop through data set
  for epoch in range (0, num_epochs):
    for fvec, lbl in zip (data,labels):
      #compute distance from each prototype to this point
      distances = list(np.sum(np.subtract(fvec, p)**2) for p in prototypes)
      #print(min(distances))
      min_dist_index = distances.index(min(distances))

      #determine winner prototype
      winner =prototypes[min_dist_index]
      winner_label=proto_labels[min_dist_index]


      #push or repel the prototype based on the label
      if winner_label == lbl:
        sign = 1
      else:
        sign = -1

      #update winner prototype
      prototypes[min_dist_index] = np.add(prototypes[min_dist_index], np.subtract(fvec, winner) * learning_rate * sign)

    #use validation set to test the performance
    val_err =0
    if validation_labels is not None:
      for fvec, lbl in zip (validation_data, validation_labels):
        distances = list(np.sum(np.subtract(fvec,p)**2) for p in prototypes)
        min_dist_index= distances.index(min(distances))

        #determine winner prototype label
        winner_label = proto_labels[min_dist_index]

        #check if labels match
        if not winner_label ==lbl:
          val_err = val_err +1

      val_err = val_err/len(validation_labels)
      print("Epoch" + str(epoch) + ". Validation error: " + str(val_err))
    else:
      print("Epoch" + str(epoch))

  return(prototypes, proto_labels)

In [18]:
#y_train_num = y_train.replace({'yes': 0, 'no': 1}, inplace=False)
#y_test_num = y_test.replace({'yes': 0, 'no': 1}, inplace=False)
(a, b) = train_lvq(x_train, y_train, 30, 0.04, x_test, y_test)

0 [3.15276596e-01 2.98404255e-01 1.35106383e-01 2.44680851e-02
 2.12765957e-02 6.96808511e-02 9.04255319e-02 3.08510638e-02
 7.44680851e-02 6.43617021e-02 1.52127660e-01 3.03191489e-02
 8.51063830e-03 9.73404255e-02 5.35106383e-01 3.65425532e-01
 2.12765957e-03 8.67021277e-02 4.41489362e-02 1.06382979e-01
 2.27127660e-01 5.31914894e-04 1.26595745e-01 3.55851064e-01
 5.26595745e-02 0.00000000e+00 9.06382979e-01 9.36170213e-02
 5.33510638e-01 4.42021277e-01 2.44680851e-02 1.43085106e-01
 8.32446809e-01 2.44680851e-02]
1 [3.03875757e-01 2.48441247e-01 2.32476876e-01 3.90544707e-02
 2.63103803e-02 7.20794793e-02 3.25453923e-02 3.50119904e-02
 9.68139774e-02 1.62384378e-02 1.67317575e-01 2.66529633e-02
 7.05721137e-03 1.14628297e-01 6.07399794e-01 2.76533059e-01
 1.43884892e-03 1.03117506e-01 5.70058239e-02 1.53545735e-01
 2.29599178e-01 4.11099692e-04 1.30044536e-01 2.85919836e-01
 4.03562864e-02 6.85166153e-05 7.76909901e-01 2.23021583e-01
 5.24494690e-01 4.51935594e-01 2.35697157e-02 1.4

In [56]:

opposites=list(map(lambda s: s[0], filter(lambda s: s[1] == 2, zip(x_train.values,y_train_num))))
#opposites=map(lambda s: s[0], opposites)
mean = np.mean(opposites, axis=1)