# 🚀 GPT

In this notebook, we'll walk through the steps required to train your own GPT model on the wine review dataset

The code is adapted from the excellent [GPT tutorial](https://keras.io/examples/generative/text_generation_with_miniature_gpt/) created by Apoorv Nandan available on the Keras website.

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

## 0. Parameters <a name="parameters"></a>

In [2]:
VOCAB_SIZE = 10000
MAX_LEN = 80
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 2
FEED_FORWARD_DIM = 256
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 32
EPOCHS = 5

## 1. Load the data <a name="load"></a>

In [3]:
def read_and_write_json(input_file, output_file):
    # Read the JSON file
    with open(input_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    # Write the JSON data to a new file with indentation
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4)


def replace_unicode_with_question_mark(input_file, output_file):
    # Read the content of the file
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()
    
    # Replace all occurrences of \uXXXX with ?
    modified_content = re.sub(r'\\u[0-9a-fA-F]{4}', '?', content)
    
    # Write the modified content to a new file
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(modified_content)


input_file = "../../../data/wine-reviews/winemag-data-130k-v2.json"  # Replace with your input file name
output_file = "../../../data/wine-reviews/winemag-data-130k-v2_.json"  # Replace with your desired output file name
# read_and_write_json(input_file, input_file)
replace_unicode_with_question_mark(input_file, output_file)

In [4]:
# Load the full dataset
with open("../../../data/wine-reviews/winemag-data-130k-v2_.json") as json_data:
    wine_data = json.load(json_data)

In [5]:
wine_data[10]

{'points': '87',
 'title': 'Kirkland Signature 2011 Mountain Cuv?e Cabernet Sauvignon (Napa Valley)',
 'description': 'Soft, supple plum envelopes an oaky structure in this Cabernet, supported by 15% Merlot. Coffee and chocolate complete the picture, finishing strong at the end, resulting in a value-priced wine of attractive flavor and immediate accessibility.',
 'taster_name': 'Virginie Boone',
 'taster_twitter_handle': '@vboone',
 'price': 19,
 'designation': 'Mountain Cuv?e',
 'variety': 'Cabernet Sauvignon',
 'region_1': 'Napa Valley',
 'region_2': 'Napa',
 'province': 'California',
 'country': 'US',
 'winery': 'Kirkland Signature'}

In [6]:
# Filter the dataset
filtered_data = [
    "wine review : "
    + x["country"]
    + " : "
    + x["province"]
    + " : "
    + x["variety"]
    + " : "
    + x["description"]
    for x in wine_data
    if x["country"] is not None
    and x["province"] is not None
    and x["variety"] is not None
    and x["description"] is not None
]

In [7]:
# Count the recipes
n_wines = len(filtered_data)
print(f"{n_wines} recipes loaded")

129907 recipes loaded


In [8]:
example = filtered_data[25]
print(example)

wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard-designated Pinot that hails from a high-elevation site. Small in production, it offers intense, full-bodied raspberry and blackberry steeped in smoky spice and smooth texture.


## 2. Tokenize the data <a name="tokenize"></a>

In [9]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in filtered_data]

In [10]:
# Display an example of a recipe
example_data = text_data[25]
example_data

'wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard - designated Pinot that hails from a high - elevation site . Small in production , it offers intense , full - bodied raspberry and blackberry steeped in smoky spice and smooth texture . '

In [11]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [12]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [13]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [14]:
# Display some token:word mappings
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

0: 
1: [UNK]
2: :
3: ,
4: .
5: and
6: the
7: wine
8: a
9: of


In [15]:
# Display the same example converted to ints
example_tokenised = vectorize_layer(example_data)
print(example_tokenised.numpy())

[   7   10    2   20    2   30    2   44   64    2   56    5  245 4141
  460  642   26    9  503  506  675   17   12  143   14 2226   44   25
 2499   33    8  227   14 2217  963    4  604   17 1004    3   15   77
  238    3   66   14   83   98    5   75 2649   17  200   49    5  126
   78    4    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0]


## 3. Create the Training Set <a name="create"></a>

In [16]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [17]:
example_input_output = train_ds.take(1).get_single_element()

In [18]:
# Example Input
example_input_output[0][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([   7,   10,    2,   20,    2,   30,    2,   46,   45,    2, 1785,
          5,  439,   17,   35,    5,  424,    3,   34,   12,   13,    8,
        285,  100,  441,   47,    8,   59,    3,   66,   14,   83,   24,
          7,   11,   75,    5,  133,   16,    4,   36,   11,  575, 1226,
          3,  824,   52, 2375,  122, 1236,   11, 4541,    4,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0], dtype=int64)>

In [19]:
# Example Output (shifted by one token)
example_input_output[1][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([  10,    2,   20,    2,   30,    2,   46,   45,    2, 1785,    5,
        439,   17,   35,    5,  424,    3,   34,   12,   13,    8,  285,
        100,  441,   47,    8,   59,    3,   66,   14,   83,   24,    7,
         11,   75,    5,  133,   16,    4,   36,   11,  575, 1226,    3,
        824,   52, 2375,  122, 1236,   11, 4541,    4,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0], dtype=int64)>

## 5. Create the causal attention mask function <a name="causal"></a>

In [20]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)


np.transpose(causal_attention_mask(1, 10, 10, dtype=tf.int32)[0])

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

## 6. Create a Transformer Block layer <a name="transformer"></a>

In [21]:
class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

## 7. Create the Token and Position Embedding <a name="embedder"></a>

In [22]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

## 8. Build the Transformer model <a name="transformer_decoder"></a>

In [23]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
gpt = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
gpt.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [24]:
gpt.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddin  (None, None, 256)        2580480   
 g (TokenAndPositionEmbeddin                                     
 g)                                                              
                                                                 
 transformer_block (Transfor  ((None, None, 256),      658688    
 merBlock)                    (None, 2, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 10000)       2570000   
                                                                 
Total params: 5,809,168
Trainable params: 5,809,168
Non-trainable params: 0
___________________________________________________

In [25]:
if LOAD_MODEL:
    # model.load_weights('./models/model')
    gpt = models.load_model("./models/gpt", compile=True)

## 9. Train the Transformer <a name="train"></a>

In [26]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("wine review", max_tokens=80, temperature=1.0)

In [27]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [28]:
gpt.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5
generated text:
wine review : france : bordeaux : bordeaux - style red blend : 92 - 94 a blend of malbec , this wine is soft and rounded . cherry , full and ripe , it has forward fruit flavors on hints of spice and wood on the finish . it ' s attractive . 

Epoch 2/5
generated text:
wine review : us : oregon : chenin blanc : awkward in tannic , this bright and fruity wine , with an emphasis on fruit flavors that include citrus rind and some chalk . not bordering on smoke , as much as fruit or some bitter bean . 

Epoch 3/5
generated text:
wine review : argentina : mendoza province : malbec : opaque ruby colored oak , with herbal , mint and herb aromas . this feels wide and grabby , with points red plum and cherry flavors . a short finish with a bit oak twist and buttery oak [UNK] add down . 

Epoch 4/5
generated text:
wine review : spain : catalonia : grenache : there ' s an at this high - elevation malbec and verdejo is tight with sharp acidity . dusty tannins and a slight b

<keras.callbacks.History at 0x2dca19b9490>

In [29]:
# Save the final model
gpt.save("./models/gpt")



INFO:tensorflow:Assets written to: ./models/gpt\assets


INFO:tensorflow:Assets written to: ./models/gpt\assets


# 3. Generate text using the Transformer

In [30]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        highlighted_text = []
        for word, att_score in zip(
            i["prompt"].split(), np.mean(i["atts"], axis=0)
        ):
            highlighted_text.append(
                '<span style="background-color:rgba(135,206,250,'
                + str(att_score / max(np.mean(i["atts"], axis=0)))
                + ');">'
                + word
                + "</span>"
            )
        highlighted_text = " ".join(highlighted_text)
        display(HTML(highlighted_text))

        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
        print("--------\n")

In [31]:
info = text_generator.generate(
    "wine review : us", max_tokens=80, temperature=1.0
)


generated text:
wine review : us : california : rh ? ne - style red blend : [UNK] [UNK] ii is comprised of sangiovese forest floor , blue plum and currant , and dried plum skins aromas and flavors that finish baked into a very solid tannic backbone . drink now . 



In [32]:
info = text_generator.generate(
    "wine review : italy", max_tokens=80, temperature=0.5
)


generated text:
wine review : italy : tuscany : sangiovese grosso : there ' s a touch of spice and dried fruit here . the wine has a tight , compact feel in the mouth with a bright , streamlined feel and bright acidity . 



In [33]:
info = text_generator.generate(
    "wine review : germany", max_tokens=80, temperature=0.5
)
print_probs(info, vocab)


generated text:
wine review : germany : mosel : riesling : a touch of smoke lends complexity to this dry riesling . it ' s a delicately framed riesling , with a delicate hint of petrol . it ' s a waxy , racy wine , with a delicate , ethereal expression of the mosel riesling that ' s penetrating on the palate , with a long , lingering finish . 



::   	100.0%
zealand:   	0.0%
grosso:   	0.0%
-:   	0.0%
africa:   	0.0%
--------



mosel:   	87.41%
rheingau:   	5.61%
rheinhessen:   	5.44%
pfalz:   	0.97%
nahe:   	0.55%
--------



::   	98.12%
-:   	1.88%
in:   	0.0%
and:   	0.0%
.:   	0.0%
--------



riesling:   	100.0%
pinot:   	0.0%
white:   	0.0%
ros:   	0.0%
gr:   	0.0%
--------



::   	100.0%
-:   	0.0%
grosso:   	0.0%
blanc:   	0.0%
?:   	0.0%
--------



while:   	38.26%
a:   	12.63%
this:   	9.39%
dusty:   	5.21%
fresh:   	4.92%
--------



whiff:   	28.26%
bit:   	16.01%
hint:   	13.51%
crush:   	10.97%
wisp:   	10.34%
--------



of:   	100.0%
[UNK]:   	0.0%
on:   	0.0%
off:   	0.0%
weightier:   	0.0%
--------



smoke:   	42.42%
honey:   	25.23%
sweet:   	10.93%
saffron:   	6.32%
petrol:   	1.65%
--------



lends:   	56.96%
and:   	40.13%
in:   	0.95%
,:   	0.77%
on:   	0.36%
--------



a:   	36.13%
nuance:   	29.6%
complexity:   	25.49%
elegance:   	1.63%
freshness:   	1.35%
--------



to:   	99.87%
and:   	0.07%
on:   	0.04%
in:   	0.01%
,:   	0.0%
--------



this:   	98.48%
the:   	0.84%
a:   	0.5%
fresh:   	0.07%
an:   	0.02%
--------



dry:   	34.77%
off:   	26.23%
gorgeously:   	6.5%
sweet:   	6.39%
elegant:   	2.94%
--------



riesling:   	64.34%
,:   	35.2%
wine:   	0.29%
white:   	0.08%
and:   	0.05%
--------



.:   	62.29%
,:   	35.7%
full:   	0.82%
that:   	0.72%
':   	0.12%
--------



it:   	89.73%
the:   	6.38%
fresh:   	0.91%
sweet:   	0.47%
this:   	0.32%
--------



':   	99.99%
is:   	0.01%
has:   	0.0%
balances:   	0.0%
starts:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
11:   	0.0%
d:   	0.0%
--------



a:   	57.2%
full:   	9.53%
straightforward:   	4.09%
light:   	2.4%
dry:   	2.39%
--------



bit:   	32.4%
straightforward:   	29.52%
sweet:   	4.55%
bright:   	3.15%
light:   	2.99%
--------



sweet:   	52.45%
textured:   	16.55%
structured:   	10.67%
perfumed:   	6.66%
concentrated:   	5.42%
--------



riesling:   	64.06%
kabinett:   	25.6%
,:   	3.44%
wine:   	3.41%
auslese:   	2.44%
--------



,:   	64.98%
with:   	21.85%
that:   	11.46%
.:   	0.91%
to:   	0.2%
--------



but:   	50.27%
with:   	48.32%
yet:   	1.09%
and:   	0.1%
showing:   	0.04%
--------



a:   	71.62%
sweet:   	3.78%
fresh:   	2.58%
notes:   	1.92%
bright:   	1.88%
--------



steely:   	13.0%
brisk:   	7.7%
bracing:   	7.19%
delicate:   	6.58%
hint:   	5.4%
--------



,:   	30.68%
mouthfeel:   	20.85%
floral:   	7.65%
hint:   	5.68%
mineral:   	5.52%
--------



of:   	100.0%
suggesting:   	0.0%
at:   	0.0%
to:   	0.0%
that:   	0.0%
--------



honey:   	87.41%
petrol:   	5.96%
saffron:   	1.51%
sweetness:   	0.71%
waxy:   	0.45%
--------



.:   	66.45%
,:   	8.08%
on:   	7.27%
and:   	7.08%
in:   	5.83%
--------



it:   	70.7%
the:   	12.84%
lime:   	3.13%
fresh:   	2.15%
off:   	1.86%
--------



':   	99.98%
is:   	0.01%
finishes:   	0.01%
has:   	0.0%
drinks:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
11:   	0.0%
riesling:   	0.0%
--------



a:   	67.34%
dry:   	4.95%
straightforward:   	3.94%
brisk:   	3.15%
off:   	2.93%
--------



straightforward:   	43.39%
bit:   	18.97%
deeply:   	4.29%
sweet:   	3.79%
brisk:   	2.91%
--------



,:   	65.67%
lemon:   	14.26%
lanolin:   	5.31%
slick:   	2.86%
white:   	2.14%
--------



waxy:   	61.24%
slightly:   	4.7%
floral:   	3.6%
lemon:   	3.59%
yet:   	2.89%
--------



wine:   	59.1%
riesling:   	30.5%
palate:   	3.16%
mouthfeel:   	1.49%
style:   	1.14%
--------



,:   	52.8%
with:   	37.53%
that:   	8.92%
to:   	0.35%
but:   	0.22%
--------



with:   	76.01%
but:   	22.01%
yet:   	1.59%
and:   	0.07%
finishing:   	0.06%
--------



a:   	94.71%
lime:   	0.84%
an:   	0.51%
bracing:   	0.42%
fresh:   	0.41%
--------



long:   	34.71%
brisk:   	18.03%
bracing:   	6.56%
steely:   	5.04%
lingering:   	4.8%
--------



,:   	37.26%
mouthfeel:   	13.78%
mineral:   	6.84%
touch:   	3.85%
hint:   	3.63%
--------



dancing:   	16.58%
lacy:   	15.81%
mineral:   	13.33%
perfumed:   	7.25%
elegant:   	5.77%
--------



flair:   	31.26%
character:   	21.89%
mouthfeel:   	21.35%
palate:   	4.11%
texture:   	3.36%
--------



of:   	99.98%
that:   	0.01%
,:   	0.0%
from:   	0.0%
with:   	0.0%
--------



riesling:   	84.15%
the:   	6.36%
lime:   	3.74%
its:   	0.77%
lemon:   	0.75%
--------



palate:   	46.09%
mosel:   	28.85%
riesling:   	7.66%
variety:   	4.04%
midpalate:   	3.13%
--------



riesling:   	85.5%
kabinett:   	11.96%
rieslings:   	1.3%
trocken:   	0.47%
.:   	0.17%
--------



,:   	27.2%
that:   	21.88%
.:   	15.34%
':   	12.85%
[UNK]:   	4.92%
--------



':   	84.63%
is:   	3.41%
should:   	3.31%
extends:   	2.64%
finishes:   	1.55%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
riesling:   	0.0%
persists:   	0.0%
--------



a:   	15.05%
perfect:   	14.49%
refreshing:   	9.01%
feather:   	5.52%
accentuated:   	3.39%
--------



and:   	73.02%
,:   	8.83%
in:   	8.76%
with:   	4.29%
but:   	2.5%
--------



the:   	99.68%
its:   	0.29%
a:   	0.02%
fruit:   	0.0%
elegance:   	0.0%
--------



palate:   	99.06%
finish:   	0.82%
tongue:   	0.1%
sip:   	0.01%
midpalate:   	0.01%
--------



,:   	85.31%
.:   	10.24%
with:   	2.39%
and:   	0.86%
yet:   	0.69%
--------



with:   	72.38%
but:   	22.4%
yet:   	3.81%
finishing:   	0.5%
where:   	0.28%
--------



a:   	83.53%
lime:   	4.72%
loads:   	1.28%
sweet:   	1.24%
bracing:   	0.94%
--------



long:   	63.92%
lingering:   	19.5%
kiss:   	3.77%
slick:   	1.36%
steely:   	1.09%
--------



,:   	77.56%
finish:   	16.08%
time:   	3.84%
life:   	0.9%
-:   	0.62%
--------



lingering:   	46.79%
mouthwatering:   	11.45%
mineral:   	7.5%
steely:   	4.43%
complex:   	2.41%
--------



finish:   	99.67%
kiss:   	0.28%
minerality:   	0.01%
long:   	0.01%
persistence:   	0.0%
--------



.:   	99.95%
that:   	0.02%
marked:   	0.01%
of:   	0.01%
,:   	0.0%
--------



:   	96.04%
drink:   	3.87%
it:   	0.03%
a:   	0.01%
drinks:   	0.01%
--------

