# First Load the dataset and create vocab

In [1]:
from gpt_tf import VOCAB_SIZE, EMBEDDING_DIM, MAX_LEN
from gpt_tf import TransformerBlock, TokenAndPositionEmbedding, causal_attention_mask

In [2]:
import numpy as np
import pathlib
from tensorflow.keras import layers, models

In [3]:
# Load the full dataset
import pathlib
import json

datasets_folder = pathlib.Path(r"C:\Users\amrul\programming\deep_learning\dl_projects\Generative_Deep_Learning_2nd_Edition\data")
wine_review_filepath=datasets_folder/"wine_reviews"/"winemag-data-130k-v2.json"
with open(str(wine_review_filepath)) as json_data:
    wine_data = json.load(json_data)

In [4]:
# Filter the dataset
filtered_data = [
    "wine review : "
    + x["country"]
    + " : "
    + x["province"]
    + " : "
    + x["variety"]
    + " : "
    + x["description"]
    for x in wine_data
    if x["country"] is not None
    and x["province"] is not None
    and x["variety"] is not None
    and x["description"] is not None
]

In [5]:
import re
import string

# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in filtered_data]

In [6]:
import tensorflow as tf
from gpt_tf import BATCH_SIZE
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [7]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [8]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

# Let's load the trained model and generate texts

In [9]:
saved_model_path=pathlib.Path(r"C:\Users\amrul\programming\deep_learning\dl_projects\Generative_Deep_Learning_2nd_Edition\notebooks\09_transformer\gpt\models\gpt")
gpt = models.load_model(str(saved_model_path))

In [10]:
gpt.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 256)         2580480   
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_block (Transfo  ((None, None, 256),       658688    
 rmerBlock)                   (None, 2, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 10000)       2570000   
                                                                 
Total params: 5809168 (22.16 MB)
Trainable params: 5809168 (22.16 MB)
Non-trainable params: 0 (0.00 Byte)
_____________________

In [11]:
from gpt_tf import TextGenerator

In [13]:
# let's import TextGenerator, initialize it and set its model
from gpt_tf import TextGenerator
text_generator=TextGenerator(vocab, gpt)


In [15]:
info=text_generator.generate("wine review : us : chardaney",MAX_LEN,0.8)


generated text:
wine review : us : chardaney : red blend : a blend of sangiovese , cabernet sauvignon and petit verdot , this is a thick , dense and tannic wine that delivers a deep core of black cherry , spice and peppercorn that carries a hint of vanilla . smooth , firm and velvety on the palate , with a hint of fresh greens . 



In [16]:
from gpt_tf import print_probs

print_probs(info,vocab)

::   	98.34%
valley:   	0.55%
hills:   	0.47%
-:   	0.18%
new:   	0.09%
--------



chardonnay:   	20.96%
cabernet:   	17.33%
pinot:   	13.89%
red:   	10.95%
merlot:   	8.85%
--------



blend:   	99.99%
wine:   	0.0%
::   	0.0%
[UNK]:   	0.0%
bordeaux:   	0.0%
--------



::   	100.0%
-:   	0.0%
grosso:   	0.0%
of:   	0.0%
blend:   	0.0%
--------



this:   	49.13%
a:   	15.43%
the:   	7.27%
aromas:   	1.62%
made:   	1.51%
--------



blend:   	57.25%
[UNK]:   	2.5%
wine:   	2.42%
very:   	1.27%
big:   	1.08%
--------



of:   	99.89%
from:   	0.04%
that:   	0.02%
with:   	0.02%
made:   	0.01%
--------



60:   	15.13%
cabernet:   	11.84%
50:   	8.6%
55:   	7.26%
51:   	5.83%
--------



,:   	65.11%
and:   	29.59%
(:   	3.35%
from:   	1.01%
with:   	0.49%
--------



cabernet:   	33.75%
merlot:   	24.64%
syrah:   	21.5%
barbera:   	2.66%
petit:   	2.21%
--------



sauvignon:   	95.75%
franc:   	3.26%
,:   	0.53%
and:   	0.41%
-:   	0.01%
--------



,:   	52.17%
and:   	47.61%
(:   	0.11%
with:   	0.06%
from:   	0.02%
--------



merlot:   	50.59%
syrah:   	22.3%
petit:   	13.25%
cabernet:   	5.69%
malbec:   	1.1%
--------



verdot:   	98.8%
sirah:   	1.05%
[UNK]:   	0.13%
,:   	0.01%
manseng:   	0.0%
--------



,:   	96.51%
from:   	1.7%
that:   	0.47%
are:   	0.2%
is:   	0.17%
--------



this:   	89.89%
it:   	3.37%
the:   	2.64%
with:   	0.78%
and:   	0.48%
--------



is:   	45.1%
wine:   	21.57%
has:   	9.13%
blend:   	5.76%
offers:   	1.75%
--------



a:   	79.01%
an:   	4.19%
the:   	1.71%
soft:   	0.85%
one:   	0.73%
--------



blend:   	18.33%
soft:   	4.93%
big:   	4.53%
full:   	4.34%
wine:   	3.35%
--------



,:   	86.35%
and:   	8.99%
wine:   	1.75%
but:   	0.35%
blend:   	0.32%
--------



dense:   	11.03%
meaty:   	9.9%
extracted:   	9.78%
chewy:   	6.92%
concentrated:   	4.87%
--------



and:   	43.15%
,:   	25.19%
wine:   	20.82%
cabernet:   	2.77%
merlot:   	1.21%
--------



extracted:   	14.53%
tannic:   	10.78%
concentrated:   	10.43%
chewy:   	8.31%
dense:   	4.78%
--------



wine:   	83.3%
blend:   	5.61%
,:   	2.97%
cabernet:   	1.85%
expression:   	1.2%
--------



that:   	32.7%
,:   	31.37%
.:   	29.14%
with:   	5.03%
from:   	0.37%
--------



':   	21.76%
is:   	13.38%
shows:   	9.98%
has:   	9.29%
will:   	7.24%
--------



a:   	23.66%
the:   	5.22%
power:   	4.66%
plenty:   	4.55%
blackberry:   	4.21%
--------



lot:   	15.79%
wealth:   	8.08%
mix:   	6.69%
deep:   	3.64%
[UNK]:   	2.74%
--------



,:   	55.15%
core:   	25.58%
and:   	2.97%
color:   	2.36%
array:   	2.06%
--------



of:   	99.77%
and:   	0.08%
that:   	0.05%
.:   	0.03%
,:   	0.03%
--------



black:   	42.7%
blackberry:   	19.78%
dark:   	13.09%
blueberry:   	3.42%
cassis:   	2.0%
--------



cherry:   	51.42%
-:   	9.94%
olive:   	8.83%
fruit:   	8.67%
licorice:   	4.32%
--------



,:   	70.02%
and:   	22.06%
fruit:   	2.0%
flavor:   	1.45%
.:   	1.29%
--------



cassis:   	17.26%
blackberry:   	17.08%
chocolate:   	14.28%
blueberry:   	9.88%
black:   	7.01%
--------



and:   	59.38%
,:   	40.33%
cake:   	0.09%
.:   	0.06%
that:   	0.02%
--------



chocolate:   	15.17%
leather:   	14.78%
black:   	7.48%
a:   	5.96%
dark:   	5.92%
--------



.:   	86.8%
,:   	4.45%
flavors:   	3.14%
that:   	1.21%
flavor:   	0.6%
--------



are:   	20.88%
is:   	10.17%
':   	6.59%
[UNK]:   	4.47%
complement:   	3.32%
--------



through:   	68.62%
a:   	11.74%
the:   	7.61%
over:   	2.72%
its:   	1.36%
--------



hint:   	25.4%
touch:   	11.04%
long:   	7.97%
deep:   	4.1%
whiff:   	3.83%
--------



of:   	99.96%
at:   	0.01%
in:   	0.01%
to:   	0.01%
from:   	0.0%
--------



leather:   	12.76%
chocolate:   	11.79%
tobacco:   	4.51%
cedar:   	3.73%
dark:   	3.5%
--------



.:   	64.69%
on:   	8.22%
and:   	7.95%
bean:   	6.53%
,:   	3.31%
--------



:   	27.15%
the:   	26.41%
it:   	21.84%
a:   	3.54%
drink:   	2.11%
--------



and:   	65.83%
,:   	16.68%
in:   	4.47%
as:   	3.2%
on:   	2.39%
--------



supple:   	12.85%
velvety:   	9.62%
it:   	5.81%
polished:   	4.6%
rich:   	4.0%
--------



tannins:   	55.68%
and:   	35.1%
,:   	5.71%
structure:   	0.67%
but:   	0.37%
--------



tannic:   	7.26%
velvety:   	7.17%
integrated:   	5.63%
polished:   	5.55%
fine:   	5.29%
--------



tannins:   	37.02%
in:   	25.1%
on:   	13.96%
,:   	11.82%
texture:   	2.92%
--------



the:   	99.95%
its:   	0.01%
entry:   	0.01%
a:   	0.01%
both:   	0.01%
--------



palate:   	80.8%
finish:   	18.5%
midpalate:   	0.3%
tongue:   	0.16%
long:   	0.08%
--------



,:   	85.04%
.:   	11.57%
and:   	1.0%
that:   	0.42%
with:   	0.41%
--------



it:   	45.82%
the:   	30.87%
with:   	6.04%
this:   	3.3%
which:   	2.35%
--------



a:   	71.43%
fine:   	2.19%
an:   	1.98%
the:   	1.36%
more:   	1.23%
--------



long:   	15.71%
hint:   	9.55%
lingering:   	5.94%
velvety:   	3.86%
touch:   	3.58%
--------



of:   	100.0%
at:   	0.0%
and:   	0.0%
to:   	0.0%
from:   	0.0%
--------



chocolate:   	19.24%
tobacco:   	5.59%
black:   	5.48%
leather:   	4.86%
dark:   	3.17%
--------



herb:   	21.91%
acidity:   	15.42%
tobacco:   	9.81%
-:   	9.41%
herbs:   	6.61%
--------



.:   	36.91%
and:   	34.93%
,:   	16.91%
on:   	2.03%
that:   	1.57%
--------



:   	95.96%
it:   	0.95%
drink:   	0.8%
the:   	0.61%
a:   	0.47%
--------

