In [1]:
import tensorflow as tf

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_excel("PurchaseBike.xlsx")
df.info()
df.head()

# removing the irrelevant columns
cols_to_drop = ["ID"]
df = df.drop(columns=cols_to_drop,axis=1)
# first five rows of dataframe after removing columns
df.head()

### Variável Dependente: Purchase Bike
df['Purchased Bike'] = df['Purchased Bike'].apply(lambda x: 1 if x == 'Yes' else 0)
df

df = df.rename(columns={'Marital Status': 'MaritalStatus'})
df = df.rename(columns={'Home Owner': 'HomeOwner'})
df = df.rename(columns={'Commute Distance': 'CommuteDistance'})
df = df.rename(columns={'Purchased Bike': 'PurchasedBike'})
df

# separating the features and labels
deep_feat = df.drop(columns=["PurchasedBike"],axis=1)
deep_label = df["PurchasedBike"]

# first just take a look at all the columns
list(deep_feat.columns)

categorical_columns = [col for col in deep_feat.columns if len(deep_feat[col].unique())==2 or deep_feat[col].dtype=='O']
continuous_columns = [col for col in deep_feat.columns if len(deep_feat[col].unique())>2 and (deep_feat[col].dtype=='int64' or deep_feat[col].dtype=='float64')]
print("categorical columns : ", categorical_columns)
print("continuous columns : ", continuous_columns)

### Bases para treino (train) e aplicação (test) - separando em 70% para treino e 30% para aplicação
from sklearn.model_selection import train_test_split
# making a train test split
X_T, X_t, y_T, y_t = train_test_split(deep_feat, deep_label, test_size=0.3)

cols_to_scale = continuous_columns[:]
cols_to_scale.remove("Income")
cols_to_scale.remove("Age")

from sklearn.preprocessing import StandardScaler
# scaling the listed columns
scaler = StandardScaler()
X_T.loc[:,cols_to_scale] = scaler.fit_transform(X_T.loc[:,cols_to_scale])
X_t.loc[:,cols_to_scale] = scaler.fit_transform(X_t.loc[:,cols_to_scale])

categorical_object_feat_cols = [tf.feature_column.embedding_column(                                   
tf.feature_column.categorical_column_with_hash_bucket(key=col,hash_bucket_size=1000), dimension = len(df[col].unique()))
for col in categorical_columns if df[col].dtype=='O']

categorical_integer_feat_cols = [tf.feature_column.embedding_column(                 
tf.feature_column.categorical_column_with_identity(key=col,num_buckets=2),dimension = len(df[col].unique())) 
for col in categorical_columns if df[col].dtype=='int64']

continuous_feat_cols = [tf.feature_column.numeric_column(key=col) for col in continuous_columns if col != "Age" and col != "Income"]

### df["Age"] = pd.cut(df["Age"],bins=[30,40,50,60,70,100])
age_bucket = tf.feature_column.bucketized_column(tf.feature_column.numeric_column(key="Age"), boundaries=[30,40,50,60,70,100])
### df["Income"] = pd.cut(df["Income"],bins=[10000,50000,90000,120000,200000])
income_bucket = tf.feature_column.bucketized_column(tf.feature_column.numeric_column(key="Income"), boundaries=[10000,50000,90000,120000,200000])

feat_cols = categorical_object_feat_cols + \
            categorical_integer_feat_cols + \
            continuous_feat_cols + \
            [age_bucket] + [income_bucket]

input_fun = tf.compat.v1.estimator.inputs.pandas_input_fn(X_T,y_T,batch_size=50,num_epochs=1000,shuffle=True)
pred_input_fun = tf.compat.v1.estimator.inputs.pandas_input_fn(X_t,batch_size=50,shuffle=False)

DNN_model = tf.estimator.DNNClassifier(hidden_units=[12,12,12], feature_columns=feat_cols, n_classes=2)

DNN_model.train(input_fn=input_fun, steps=5000)

predictions = DNN_model.predict(pred_input_fun)
print(predictions)

res_pred = list(predictions)
res_pred[0]

y_pred = []
for i in range(len(res_pred)):
    y_pred.append(res_pred[i]["class_ids"][0])

from sklearn.metrics import classification_report
rep = classification_report(y_t,y_pred)

print(rep)







def deep_learning_dnn(df_dl, dep_var, classes):
    # Separa a variável dependente das demais
    deep_feat = df_dl.drop(columns=[dep_var], axis=1)
    deep_label = df_dl[dep_var]
    # Verifica os tipos das variáveis
    categorical_columns = [col for col in deep_feat.columns if len(deep_feat[col].unique()) == 2 or deep_feat[col].dtype == 'O']
    continuous_columns = [col for col in deep_feat.columns if len(deep_feat[col].unique()) > 2 and (deep_feat[col].dtype == 'int64' or deep_feat[col].dtype == 'float64')]
    # Verifica as colunas para normalização - as demais serão discretizadas - Função Bucketize do Tensor Flow
    cols_to_scale = continuous_columns[:]
    #cols_to_scale.remove('meses')
    # Ajusta as bases de treino e de teste
    XX_T = df_dl.drop(columns=[dep_var], axis=1)
    XX_t = df_dl.drop(columns=[dep_var], axis=1)
    yy_T = df_dl[dep_var]
    yy_t = df_dl[dep_var]
    # Normaliza as variáveis nas bases de treino e teste
    scaler = StandardScaler()
    XX_T.loc[:, cols_to_scale] = scaler.fit_transform(XX_T.loc[:, cols_to_scale])
    XX_t.loc[:, cols_to_scale] = scaler.fit_transform(XX_t.loc[:, cols_to_scale])
    # Ajustes das Variáveis Categórica - Não presentes neste modelo
    categorical_object_feat_cols = [tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_hash_bucket(key=col, hash_bucket_size=1000), dimension=len(df_dl[col].unique()))
    for col in categorical_columns if df_dl[col].dtype == 'O']
    # Ajustes das Variáveis Categórica - Não presentes neste modelo
    categorical_integer_feat_cols = [tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_identity(key=col, num_buckets=2), dimension=len(df_dl[col].unique()))
    for col in categorical_columns if df[col].dtype=='int64']
    continuous_feat_cols = [tf.feature_column.numeric_column(key=col) for col in continuous_columns] 
    # Ajustes das variáveis discretizadas (buckets) - ajustadas conforme os limites de confiança das médias
    #meses_bucket = tf.feature_column.bucketized_column(tf.feature_column.numeric_column(key="meses"), boundaries=[12, 24, 36, 48])
    ### df["Age"] = pd.cut(df["Age"],bins=[30,40,50,60,70,100])
    age_bucket = tf.feature_column.bucketized_column(tf.feature_column.numeric_column(key="Age"), boundaries=[30,40,50,60,70,100])
    ### df["Income"] = pd.cut(df["Income"],bins=[10000,50000,90000,120000,200000])
    income_bucket = tf.feature_column.bucketized_column(tf.feature_column.numeric_column(key="Income"), boundaries=[10000,50000,90000,120000,200000])
    # Inclui as colunas ajustadas no modelo
    feat_cols = categorical_object_feat_cols + \
                categorical_integer_feat_cols + \
                continuous_feat_cols + \
                [age_bucket] + [income_bucket]    
    # Rotina de DNN (Deep Neural Network)
    input_fun = tf.compat.v1.estimator.inputs.pandas_input_fn(XX_T, yy_T, batch_size=50, num_epochs=1000, shuffle=True)
    pred_input_fun = tf.compat.v1.estimator.inputs.pandas_input_fn(XX_t, batch_size=50, shuffle=False)
    DNN_model = tf.estimator.DNNClassifier(hidden_units=[10, 10, 10], feature_columns=feat_cols, n_classes=classes)
    DNN_model.train(input_fn=input_fun, steps=5000)
    # Resgata os resultados da DNN
    predictions = DNN_model.predict(pred_input_fun)
    pred = list(predictions)
    return pred

res_pred = deep_learning_dnn(df, 'PurchasedBike', 2)

# Classe prevista pela DNN
y_pred_classe = []
for i in range(len(res_pred)):
    y_pred_classe.append(res_pred[i]["class_ids"][0])
# Probabilidade da variável PurchaseBike ser 0
y_pred_prob0 = []
for i in range(len(res_pred)):
    y_pred_prob0.append(res_pred[i]["probabilities"][0])
# Probabilidade da variável PurchaseBike ser 1
y_pred_prob1 = []
for i in range(len(res_pred)):
    y_pred_prob1.append(res_pred[i]["probabilities"][1])

df['dl_predict_class'] = y_pred_classe
df['prob_0'] = y_pred_prob0
df['prob_1'] = y_pred_prob1
df

ModuleNotFoundError: No module named 'tensorflow'