In [None]:
import numpy as np, pandas as pd 

import json
import matplotlib.pyplot as plt 
import seaborn as sns 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def piecewise_norm(vec,val,n):
    assert val > min(vec)
    
    return np.where(vec < val, n*(vec - min(vec))/(val - min(vec)), (1-n)*(vec - val)/(max(vec) - val) + n)


def transform_data(dataset,dataset_columns,normalize=True):
    new_df = pd.DataFrame()
    for name in dataset.columns:
        if name in dataset_columns.keys():
            
            if dataset_columns[name] == 'categorical':
                raw_data = dataset[name].values
                d_encoder = LabelEncoder()
                d_encoder.fit(raw_data)
                d_encoded = d_encoder.transform(raw_data)
                dummy_y = to_categorical(d_encoded)
                #print(dummy_y.shape)
                
                for (j,k) in enumerate(d_encoder.classes_):
                    new_df[f'{name}_{k}'] = dummy_y[:,j].astype('int')
            
            
            elif dataset_columns[name] == 'numeric':
                raw_data = dataset[name].values
                if normalize:
                    new_df[name] = (raw_data - min(raw_data))/(max(raw_data) - min(raw_data))
                else:
                    new_df[name] = raw_data
                
            elif dataset_columns[name] == 'original':
                new_df[name] = dataset[name].values
            
            elif dataset_columns[name] == 'target':
                new_df[name] = dataset[name].values
                
    return new_df

In [None]:
class NeuralNetwork():

  def __init__(self,hidden_neurons = [4], hidden_activation = ['relu'], output_activation='softmax', lr = 0.05, n_input = 1, n_output = 1):
    # create model
    self.model = Sequential()
    self.model.add(Dense(hidden_neurons[0], input_dim=n_input, activation='relu'))
    for i in range(1,len(hidden_neurons)):
      self.model.add(Dense(hidden_neurons[i], input_dim=hidden_neurons[i-1], activation='relu'))
          

    self.model.add(Dense(n_output, activation=output_activation))
    # Compile model
    opt = Adam(lr=lr)
    self.model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
      
  def train(self, X, y, epochs=10, verbose = 0):
    #callback = EarlyStopping(monitor='loss', patience=5)
    self.model.fit(x=X,y=y,epochs=epochs, verbose=verbose)

  def predict(self, X, y):
    raw_Y_pred = self.model.predict(X)
    y_pred = np.where(raw_Y_pred > 0.5, 1, 0)
    print('Confusion Matrix')
    print(confusion_matrix(y, y_pred))
    cm = confusion_matrix(y, y_pred)
    print('Classification Report')
    target_names = ['0','1']
    print(classification_report(y, y_pred))

## Carregando a base de dados

## Codificação de variáveis categóricas

## Normalização de variáveis numéricas

In [None]:
hidden_neurons = [15]
activation_layers = ['relu']

output_activation = 'sigmoid'

lr = 0.05
epochs = 50

model = NeuralNetwork(hidden_neurons=hidden_neurons,output_activation=output_activation,n_input=n_input,n_output=n_output, lr = lr)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
model.train(X=x_train,y=y_train,epochs=epochs)

In [None]:
model.predict(x_test, y_test)

Confusion Matrix
[[71 28]
 [16 39]]
Classification Report
              precision    recall  f1-score   support

           0       0.82      0.72      0.76        99
           1       0.58      0.71      0.64        55

    accuracy                           0.71       154
   macro avg       0.70      0.71      0.70       154
weighted avg       0.73      0.71      0.72       154



# Seleção de variáveis

## Usando RFE

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE, SelectKBest, chi2

In [None]:
model = RandomForestClassifier()

rfe = RFE(model, 3)
fit = rfe.fit(X_train, y_train)

names = [name for name, l in zip(new_df.columns, fit.support_) if l == True]

print(f"Num Features: {fit.n_features_}")
print(f"Name Features: {new_df.columns}")
print(f"Masked Features: {fit.support_}")
print(f"Feature Ranking: {fit.ranking_}")
print(f"Selected Features: {names}")

Num Features: 3
Name Features: Index(['preg', 'plas', 'pres', 'skin', 'insu', 'mass', 'pedi', 'age', 'class'], dtype='object')
Masked Features: [False  True False False False  True  True False]
Feature Ranking: [5 1 3 6 4 1 1 2]
Selected Features: ['plas', 'mass', 'pedi']


## Usando Feature Importance

## Usando chi2

In [None]:
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X_train, y_train)

# Summarize scores
np.set_printoptions(precision=3)
print(fit.scores_)

features = fit.transform(X)

[  77.453 1082.927   20.373   24.007 1197.141  107.766    3.542  194.164]


In [None]:
new_df.corr()

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age
preg,1.0,0.129459,0.141282,-0.081672,-0.073535,0.017683,-0.033523,0.544341
plas,0.129459,1.0,0.15259,0.057328,0.331357,0.221071,0.137337,0.263514
pres,0.141282,0.15259,1.0,0.207371,0.088933,0.281805,0.041265,0.239528
skin,-0.081672,0.057328,0.207371,1.0,0.436783,0.392573,0.183928,-0.11397
insu,-0.073535,0.331357,0.088933,0.436783,1.0,0.197859,0.185071,-0.042163
mass,0.017683,0.221071,0.281805,0.392573,0.197859,1.0,0.140647,0.036242
pedi,-0.033523,0.137337,0.041265,0.183928,0.185071,0.140647,1.0,0.033561
age,0.544341,0.263514,0.239528,-0.11397,-0.042163,0.036242,0.033561,1.0
