# Predict Discount Based on Age

In [1]:
from random import randrange, choice
import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler


## Create Data

In [2]:
sample_size =10000
data_file = f'../data/discount_data_{sample_size}.xlsx'

business_list = ['bar', 'restaurant', 'hotel', 'pharmacy', 'spa', 'airline']


def calculate_discount(age, sex, business_type, is_resident):
    business_discounts = dict()
    business_discounts['restaurant'] = 0.2
    business_discounts['pharmacy'] = 0.2
    business_discounts['hotel'] = 0.25
    
    if sex == 'M' and age >= 64 and is_resident:
        return business_discounts.get(business_type, 0.0)
    elif sex == 'F' and age >= 57 and is_resident:
        return business_discounts.get(business_type, 0.0)
    else:
        return 0.0
    
def build_dataframe(samples):

    data = dict()
    data['sex'] = list()
    data['age'] = list()
    data['business_type'] = list()
    data['discount'] = list()
    data['is_resident'] = list()
    for i in range(samples):
        age = randrange(18, 95, 1)
        sex = choice(['M', 'F'])
        business_type = choice(business_list)
        is_resident =  choice([True, False])
        discount = calculate_discount(age, sex, business_type, is_resident)
        #print(f'{sex} age {age} for {business_type}')
        data['sex'].append(sex)
        data['age'].append(age)
        data['business_type'].append(business_type)
        data['discount'].append(discount)
        data['is_resident'].append(is_resident)

        df = pd.DataFrame.from_dict(data)
    return df
    


In [3]:
if os.path.exists(data_file):
    df = pd.read_excel(data_file)
    print(f'Loaded from {data_file}')
else:
    df = build_dataframe(sample_size)
    df.to_excel(data_file, index=False)
    print(f'Saved to {data_file}')

Loaded from ../data/discount_data_10000.xlsx


In [4]:
print(f'Shape: {df.shape}')
df.head()

Shape: (10000, 5)


Unnamed: 0,sex,age,business_type,discount,is_resident
0,F,78,bar,0.0,True
1,F,24,bar,0.0,False
2,F,45,bar,0.0,True
3,F,21,pharmacy,0.0,False
4,F,45,pharmacy,0.0,False


## Cleanup


### Separate dependent and independent variables

In [5]:
discounts = df['discount'].copy()
df.drop(columns=['discount',], inplace=True)

print(f'discount shape: {discounts.shape}')

df.head()

discount shape: (10000,)


Unnamed: 0,sex,age,business_type,is_resident
0,F,78,bar,True
1,F,24,bar,False
2,F,45,bar,True
3,F,21,pharmacy,False
4,F,45,pharmacy,False


### Hot One Encode

In [6]:
df = pd.get_dummies(df, columns=['sex', 'business_type'])


In [7]:


df['is_resident'].replace({False: 0, True: 1}, inplace=True)

### Scaling age

In [8]:
scaler = MinMaxScaler()
df['age'] = scaler.fit_transform(df['age'].values.reshape(-1, 1))


In [9]:
print(f'Shape: {df.shape}')
df.head()

Shape: (10000, 10)


Unnamed: 0,age,is_resident,sex_F,sex_M,business_type_airline,business_type_bar,business_type_hotel,business_type_pharmacy,business_type_restaurant,business_type_spa
0,0.789474,1,1,0,0,1,0,0,0,0
1,0.078947,0,1,0,0,1,0,0,0,0
2,0.355263,1,1,0,0,1,0,0,0,0
3,0.039474,0,1,0,0,0,0,1,0,0
4,0.355263,0,1,0,0,0,0,1,0,0


## Basic Neuronal Network


In [10]:
learning_rate = 0.01
input_layer = tf.keras.layers.Dense(units=1, input_shape=[df.shape[1]])
hidden_layer = tf.keras.layers.Dense(units=256)
output_layer = tf.keras.layers.Dense(units=1)

In [11]:
model = tf.keras.models.Sequential()

model.add(input_layer)
model.add(hidden_layer)
model.add(output_layer)


In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    loss='mean_squared_error',
    metrics=['accuracy']
)

In [13]:
history = model.fit(df.values, discounts.values, epochs=20)

Epoch 1/20


ValueError: in user code:

    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:855 train_function  *
        return step_function(self, iterator)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:838 run_step  **
        outputs = model.train_step(data)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:799 train_step
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:530 minimize
        return self.apply_gradients(grads_and_vars, name=name)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:630 apply_gradients
        grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
    /Users/luiscberrocal/anaconda3/envs/star_wars_analysis_env/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/utils.py:75 filter_empty_gradients
        raise ValueError("No gradients provided for any variable: %s." %

    ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0', 'dense_2/kernel:0', 'dense_2/bias:0'].


In [None]:
print(history.history)

In [None]:
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.show

In [None]:
df_values = df.values
disc_value = discounts.values

for i in range(500):
    if disc_value[i] > 0.0:
        print(i)
        break

In [None]:
print(df_values[36].reshape(1,10))
print(disc_value[36])

In [None]:
pred = model.predict(df_values[36].reshape(1,10))

print(f'Estimated discount {pred}')

In [None]:
pred = model.predict(df_values[0].reshape(1,10))

print(f'Estimated discount {pred}')