In [10]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [11]:
import itertools
import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle

from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
layers = keras.layers

# This code was tested with TensorFlow v1.7
print("You have TensorFlow version", tf.__version__)

You have TensorFlow version 1.7.0


In [12]:
# Load our model
# !wget 'https://storage.googleapis.com/keras_wine/final_wine_model.h5'
# new_model = keras.models.load_model('my_model.h5')
model = keras.models.load_model('WideandDeep_model_1.h5')
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 15)           0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 170)          0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 115)          0           input_1[0][0]                    
                                                                 input_2[0][0]                    
__________

In [13]:
# Load our vocabulary tokenizer and variety encoder
# !wget 'https://storage.googleapis.com/keras_wine/word_tokenizer.p'
tokenizer = pickle.load(open('tokenizer1.obj', 'rb'))


In [14]:

# !wget 'https://storage.googleapis.com/keras_wine/variety_encoder.p'
encoder = pickle.load(open('encoder1.p', 'rb'))

In [15]:
f = open('Sheet1.txt', 'r')
sheet = f.read().splitlines()
f.close()
print(len(sheet))
f1 = open('TeganPrices.txt', 'r')
prices = f1.read().splitlines()
f1.close()

# print(x)
print(len(prices))

f2 = open('Models.txt', 'r')
models = f2.read().splitlines()
f2.close()

# print(x)
print(len(models))

159
159
159


In [16]:
# Let's make predictions on some raw data

# Enter wine descriptions here
test_descriptions = sheet

# Enter the corresponding varieties here
test_varieties = models

# Enter the corresponding prices here
labels = prices

In [17]:
# Vocab and variety lookup
vocab_lookup = tokenizer.word_index
first_20_words = {k: vocab_lookup[k] for k in list(vocab_lookup)[:20]}
print("Sample vocab\n", first_20_words, "\n")
print("Variety encoder\n", encoder.classes_, "\n")

Sample vocab
 {'iphone': 1, 'og': 2, 'gb': 3, 'er': 4, 'i': 5, 'på': 6, 'den': 7, 'perfekt': 8, 'har': 9, 'med': 10, 'god': 11, '64': 12, 'til': 13, '7': 14, 'det': 15, 'sort': 16, 'vores': 17, 'der': 18, 'medfølger': 19, '6': 20} 

Variety encoder
 ['5' '5C' '5S' '6' '6 Plus' '6S' '6S Plus' '7' '7 Plus' '8' '8 Plus' 'SE'
 'X' 'XS' 'XS Max'] 



In [18]:
# Wide model features
bow_description = tokenizer.texts_to_matrix(test_descriptions)
variety = encoder.transform(test_varieties)
variety = keras.utils.to_categorical(variety, len(encoder.classes_))

# Print an example for the model inputs
print("Bag of words matrix")
print(bow_description[0], "\n")
print("Variety matrix")
print(variety[0], "\n")

Bag of words matrix
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.] 

Variety matrix
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 



In [19]:
# Deep model feature: word embeddings of wine descriptions
embed_description = tokenizer.texts_to_sequences(test_descriptions)
embed_description = keras.preprocessing.sequence.pad_sequences(
    embed_description, maxlen=170, padding="post")

print(embed_description[0])

[61 36  3 11  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0]


In [20]:
predictions = model.predict([bow_description, variety] + [embed_description])

In [21]:

for i in range(len(test_descriptions)):
    val = predictions[i]
    print(test_descriptions[i])
    print('Predicted: ', val[0], 'Actual: ', labels[i], '\n')

Apple 5C 8 GB pink God
Predicted:  547.92566 Actual:  300kr. 

Apple 5C 8 GB grï¿½n God
Predicted:  547.92566 Actual:  300kr. 

Apple 5C 8 GB hvid God
Predicted:  624.0664 Actual:  300kr. 

Apple 5C 8 GB gule God
Predicted:  547.92566 Actual:  300kr. 

Apple 5C 8 GB blï¿½ God
Predicted:  547.92566 Actual:  300kr. 

Apple 5C 16 GB pink God
Predicted:  670.849 Actual:  500kr. 

Apple 5C 16 GB grï¿½n  God
Predicted:  670.849 Actual:  500kr. 

Apple 5C 16 GB hvid God
Predicted:  755.21094 Actual:  500kr. 

Apple 5C 16 GB gule God
Predicted:  670.849 Actual:  500kr. 

Apple 5C 16 GB blï¿½ God
Predicted:  670.849 Actual:  500kr. 

Apple 5C 32 GB pink God
Predicted:  192.22629 Actual:  500kr. 

Apple 5C 32 GB grï¿½n  God
Predicted:  192.22629 Actual:  500kr. 

Apple 5C 32 GB hvid God
Predicted:  168.08469 Actual:  500kr. 

Apple 5C 32 GB gule God
Predicted:  192.22629 Actual:  500kr. 

Apple 5C 32 GB blï¿½ God
Predicted:  192.22629 Actual:  500kr. 

Apple 5 16 GB sort God
Predicted:  896.7481

In [22]:
with open('preditions_1.txt', 'w') as f:
    for item in predictions:
        f.write("%s\n" % item)