In [1]:
# Machine Learning Model for 'Geld ausgegeben' prediction over other parameters of Einkaufsstatistik

# Importing the libraries

# Load numpy
import numpy as np

# Load pandas
import pandas as pd

# Plotting library
import seaborn as sns
import matplotlib.pyplot as plt

# sklearn library
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error 
from sklearn.preprocessing import StandardScaler

# render plots in jupyter notebook
%matplotlib inline

In [2]:
# Importing the dependencies
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
import warnings 
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

Using TensorFlow backend.


In [3]:
# Importing the dataset
dataset = pd.read_csv('Preprocessed_data.csv')

In [4]:
# Modelling

# X and y arrays
X = dataset[['ProduktID']]
y = dataset['Geld_ausgegeben']

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [5]:
# Creating and Training the Model

# Initialising the DNN
NN_model = Sequential()

# The Input Layer :
NN_model.add(Dense(128, kernel_initializer='normal',input_dim = X_train.shape[1], activation='relu'))

# The Hidden Layers :
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))

# The Output Layer :
NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

# Compile the network :
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
NN_model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               256       
_________________________________________________________________
dense_2 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 257       
Total params: 165,121
Trainable params: 165,121
Non-trainable params: 0
_________________________________________________________________


In [6]:
# Define a checkpoint callback
checkpoint_name = 'Weights_Deep_Neural_Network_All_Features-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]

In [None]:
# Train the model
NN_model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split = 0.2, callbacks=callbacks_list)

Train on 320000 samples, validate on 80000 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 3915.35083, saving model to Weights_Deep_Neural_Network_All_Features-001--3915.35083.hdf5
Epoch 2/50

Epoch 00002: val_loss did not improve from 3915.35083
Epoch 3/50

Epoch 00003: val_loss improved from 3915.35083 to 3902.90434, saving model to Weights_Deep_Neural_Network_All_Features-003--3902.90434.hdf5
Epoch 4/50

Epoch 00004: val_loss improved from 3902.90434 to 3882.76853, saving model to Weights_Deep_Neural_Network_All_Features-004--3882.76853.hdf5
Epoch 5/50

Epoch 00005: val_loss improved from 3882.76853 to 3850.97656, saving model to Weights_Deep_Neural_Network_All_Features-005--3850.97656.hdf5
Epoch 6/50

Epoch 00006: val_loss did not improve from 3850.97656
Epoch 7/50

Epoch 00007: val_loss did not improve from 3850.97656
Epoch 8/50

Epoch 00008: val_loss did not improve from 3850.97656
Epoch 9/50

Epoch 00009: val_loss did not improve from 3850.97656
Epoch 10/50

Epoch

In [None]:
# Load weights file of the best model :
# weights_file = 'Weights_Deep_Neural_Network_All_Features-50--2083.24621.hdf5' # choose the best checkpoint 
# NN_model.load_weights(weights_file) # load it
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])

In [None]:
# Predictions from our Model
predictions = NN_model.predict(X_test)
#plt.scatter(y_test,predictions)
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(predictions, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()

In [None]:
# sns.distplot((y_test-predictions),bins=50);

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
print('R2_Score:', metrics.r2_score(y_test, predictions))