In [24]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

In [38]:
df = pd.read_csv("online_shoppers_intention.csv")
df_train = pd.read_csv("df_train.csv")
df_test = pd.read_csv("df_test.csv")

In [40]:
df.Weekend = df.Weekend.replace({True: 1, False: 0})
df_train.Weekend = df_train.Weekend.replace({True: 1, False: 0})
df_test.Weekend = df_test.Weekend.replace({True: 1, False: 0})

In [21]:
df_train.columns

Index(['Administrative', 'Administrative_Duration', 'Informational',
       'Informational_Duration', 'ProductRelated', 'ProductRelated_Duration',
       'BounceRates', 'ExitRates', 'PageValues', 'SpecialDay', 'Month',
       'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType',
       'Weekend', 'Revenue'],
      dtype='object')

In [32]:
numeric_vars = ["Administrative", "Administrative_Duration", "Informational", "Informational_Duration", "ProductRelated", "ProductRelated_Duration", "BounceRates", "ExitRates", "PageValues", "SpecialDay"]
categorical_vars = ["Month", "OperatingSystems", "Browser", "Region", "TrafficType", "VisitorType", "Weekend"]

In [22]:
# https://www.tensorflow.org/tutorials/structured_data/feature_columns
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop("Revenue")
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [62]:
feature_columns = []

In [63]:
for col_name in numeric_vars:
  feature_columns.append(feature_column.numeric_column(col_name))

In [46]:
for col_name in categorical_vars:
  categorical_column = feature_column.categorical_column_with_vocabulary_list(
      col_name, df[col_name].unique())
  indicator_column = feature_column.indicator_column(categorical_column)
  feature_columns.append(indicator_column)

In [64]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [65]:
batch_size = 32
tf_train = df_to_dataset(df_train, batch_size=batch_size)
tf_test = df_to_dataset(df_test, shuffle=False, batch_size=batch_size)

In [53]:
tf_train

<BatchDataset shapes: ({Administrative: (None,), Administrative_Duration: (None,), Informational: (None,), Informational_Duration: (None,), ProductRelated: (None,), ProductRelated_Duration: (None,), BounceRates: (None,), ExitRates: (None,), PageValues: (None,), SpecialDay: (None,), Month: (None,), OperatingSystems: (None,), Browser: (None,), Region: (None,), TrafficType: (None,), VisitorType: (None,), Weekend: (None,)}, (None,)), types: ({Administrative: tf.int64, Administrative_Duration: tf.float64, Informational: tf.int64, Informational_Duration: tf.float64, ProductRelated: tf.int64, ProductRelated_Duration: tf.float64, BounceRates: tf.float64, ExitRates: tf.float64, PageValues: tf.float64, SpecialDay: tf.float64, Month: tf.string, OperatingSystems: tf.int64, Browser: tf.int64, Region: tf.int64, TrafficType: tf.int64, VisitorType: tf.string, Weekend: tf.int64}, tf.int64)>

In [60]:
del(model)

In [67]:
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(100, activation='relu'),
  layers.Dense(100, activation='relu'),
  layers.Dense(100, activation='relu'),
  layers.Dense(50, activation='relu'),
  layers.Dropout(.5),
  layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(tf_train,
          validation_data=tf_test,
          epochs=10)