## This dataset was gathered from Auidobook app, covering inputs such as:
    - id
    - book length in minutes overall
    - book length in minutes average
    - price overall
    - price average
    - review (did the customer left one or not) - values of 0 or 1
    - review 10/10 (satisfaction of a review with the book) - here we will propagate avarage values because some values are missing (this feature is related to 'review' - if  revies wasn't left, no satisfaction rate can be presented, thus making missing values)
    - minutes listened
    - completion (did the book was listened to the end, etc.)
    - support requests
    - last visited minus purchased date
    - targets (will customer return again or not) - our target Y - values of 0 or 1

### All needed imports

In [28]:
import numpy as np
from sklearn import preprocessing

import tensorflow as tf

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


## Data

In [3]:
data = np.loadtxt('Audiobooks_data.csv', delimiter = ',')

# excluding id column and target column
inputs = data[:, 1:-1] 

In [4]:
inputs

array([[2.160e+03, 2.160e+03, 1.013e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.404e+03, 2.808e+03, 6.660e+00, ..., 0.000e+00, 0.000e+00,
        1.820e+02],
       [3.240e+02, 3.240e+02, 1.013e+01, ..., 0.000e+00, 1.000e+00,
        3.340e+02],
       ...,
       [1.080e+03, 1.080e+03, 6.550e+00, ..., 0.000e+00, 0.000e+00,
        2.900e+01],
       [2.160e+03, 2.160e+03, 6.140e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.620e+03, 1.620e+03, 5.330e+00, ..., 0.000e+00, 0.000e+00,
        9.000e+01]])

In [5]:
inputs.shape

(14084, 10)

#### Retrieving targets

In [7]:
targets = data[:, -1]
targets.shape

(14084,)

### Balancing the dataset
Counting the number of targets that are 1 and keeping as many zeros as there ones

In [8]:
number_ofones_targets = int(np.sum(targets))
number_ofones_targets

2237

#### Keeping only 2237 zero targets to balance the dataset

In [9]:
zero_targets_counter = 0
indices_to_remove = []

In [10]:
for i in range (targets.shape[0]):
    if targets[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > number_ofones_targets:
            indices_to_remove.append(i) # this thingy will contain all indices we don't need

##### delete method 'deletes' an object along an axis 

In [11]:
inputs_balanced = np.delete(inputs, indices_to_remove, axis = 0)
inputs_balanced

array([[2.160e+03, 2.160e+03, 1.013e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.404e+03, 2.808e+03, 6.660e+00, ..., 0.000e+00, 0.000e+00,
        1.820e+02],
       [3.240e+02, 3.240e+02, 1.013e+01, ..., 0.000e+00, 1.000e+00,
        3.340e+02],
       ...,
       [2.160e+03, 2.160e+03, 1.013e+01, ..., 2.592e+02, 0.000e+00,
        1.400e+01],
       [2.160e+03, 2.160e+03, 8.300e+00, ..., 2.592e+02, 0.000e+00,
        9.300e+01],
       [2.160e+03, 2.160e+03, 8.000e+00, ..., 2.592e+02, 0.000e+00,
        2.400e+01]])

In [12]:
inputs_balanced.shape

(4474, 10)

In [13]:
targets_bal = np.delete(targets, indices_to_remove, axis = 0)

In [14]:
targets_bal.shape

(4474,)

In [15]:
targets_bal

array([1., 1., 1., ..., 0., 0., 0.])

### Standardizing (scaling) inputs

In [16]:
inputs_bal_scaled = preprocessing.scale(targets_bal)
inputs_bal_scaled

array([ 1.,  1.,  1., ..., -1., -1., -1.])

### Shuffling data

In [17]:
shuffled_indices = np.arange(inputs_bal_scaled.shape[0])
np.random.shuffle(shuffled_indices)

In [18]:
shuffled_indices

array([3957, 1435, 1344, ..., 2804, 1420,  379])

In [19]:
inputs_shuffled = inputs_bal_scaled[shuffled_indices]
shuffled_targets = targets_bal[shuffled_indices]

In [20]:
inputs_shuffled

array([-1.,  1.,  1., ..., -1.,  1.,  1.])

### Splitting the dataset

In [21]:
samples_count = shuffled_indices.shape[0]

train_samples_count = int(0.8 * samples_count)
val_samples_count = int(0.1 * samples_count)
test_samples_count = samples_count - train_samples_count - val_samples_count

In [23]:
train_samples_count, val_samples_count, test_samples_count, samples_count

(3579, 447, 448, 4474)

In [24]:
train_inputs = inputs_shuffled[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]
train_inputs.shape, train_targets.shape

((3579,), (3579,))

In [25]:
valid_inputs = inputs_shuffled[train_samples_count:train_samples_count + val_samples_count]
valid_targets = shuffled_targets[train_samples_count:train_samples_count + val_samples_count]
valid_inputs.shape, valid_inputs.shape

((447,), (447,))

In [26]:
test_inputs = inputs_shuffled[train_samples_count + val_samples_count: ]
test_targets = shuffled_targets[train_samples_count + val_samples_count: ]
test_inputs.shape, test_inputs.shape

((448,), (448,))

### Saving the preprocessed dataset

In [27]:
np.savez('audiobook_train', inputs = train_inputs, targets = train_targets)
np.savez('audiobook_valid', inputs = valid_inputs, targets = valid_targets)
np.savez('audiobook_test', inputs = test_inputs, targets = test_targets)

### Loading data

#### Loading Training set

In [32]:
npz_train = np.load('audiobook_train.npz')
npz_train

<numpy.lib.npyio.NpzFile at 0x7f8652765510>

In [35]:
npz_train['inputs']

array([-1.,  1.,  1., ...,  1.,  1.,  1.])

In [37]:
train_inputs = npz_train['inputs'].astype(np.float)

In [41]:
train_targets = npz_train['targets'].astype(np.int)

In [42]:
train_inputs.shape, train_targets.shape

((3579,), (3579,))

In [43]:
train_targets

array([0, 1, 1, ..., 1, 1, 1])

#### Loading Validation set

In [45]:
npz_val = np.load('audiobook_valid.npz')

val_inputs = npz_val['inputs'].astype(np.float)
val_targets = npz_val['targets'].astype(np.int)

In [47]:
val_inputs.shape, val_targets

((447,),
 array([0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
        1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,
        1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
        1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
        0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
        1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1,
        1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1,
        0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
        0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
        0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        1, 1, 1, 1, 1, 0, 0, 

#### Loading Test set

In [48]:
npz_test = np.load('audiobook_test.npz')

test_inputs = npz_test['inputs'].astype(np.float)
test_targets = npz_test['targets'].astype(np.int)

In [49]:
test_inputs.shape, test_targets

((448,),
 array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
        0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
        0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,
        1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
        1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
        0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
        1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0,
        0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
        1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
        0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1,
        1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        1, 0, 0, 0, 1, 0, 1, 

## Model

In [50]:
input_size = 10
output_size = 2
hidden_units = 50

In [54]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_units, activation = 'relu'),
    tf.keras.layers.Dense(hidden_units, activation = 'relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

In [55]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

In [56]:
model.fit(train_inputs, train_targets, batch_size = 100, epochs = 100,
         validation_data = (val_inputs, val_targets))

Train on 3579 samples, validate on 447 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f865402ac10>