<a href="https://colab.research.google.com/github/radwaahmed20112000/QA-Chatbot/blob/main/Chatbot_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Preprocessing**

## Data Preparation

### Imports


In [None]:
import numpy as np
import pandas as pd 
import os
import io
import gzip
from google.colab import drive
from sklearn.model_selection import train_test_split

In [None]:
drive.mount('/content/drive')

In [None]:
from keras import backend as K
K._get_available_gpus()

### Global Variables

In [None]:
drive_root_path = '/content/drive/My Drive/Colab Notebooks/chatbot project/Chatbot/'
test_dev_ratio = 0.1
gener_examples = 20000
train_set = dev_set = test_set = pd.DataFrame(columns=['question','answer'])

### Dataset parsing


In [None]:
def parse(path):
  g = gzip.open(path, 'rb')
  
  for l in g:
    yield eval(l)

In [None]:
def getDF(path):
  i = 0
  df = {}

  for d in parse(path):
    df[i] = d
    i += 1
    
  return pd.DataFrame.from_dict(df, orient='index')

## Clean Data

### Imports


In [None]:
import string
import re
import nltk
from nltk.tokenize import word_tokenize

### Punctuation Removal

In [None]:
def remove_punctuation(text):  
  return text.translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))

### Deconstruction


In [None]:
def decontracted(phrase):
    phrase = re.sub(r"won't", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase

### Clean

In [None]:
clean_question = lambda x: remove_punctuation(decontracted(x.lower()))
clean_answer = lambda x: 'START_ '+ remove_punctuation(decontracted(x.lower())) + ' _END' 

## Data load and Split

### Split

In [None]:
def data_split(dataset):

  global train_set, dev_set, test_set

  train, test = train_test_split(dataset, test_size=test_dev_ratio, 
                                         random_state=0)

  test, dev = train_test_split(test, test_size=0.5, 
                                         random_state=0) 
  
  train_set = pd.concat([train_set, train])
  dev_set   = pd.concat([dev_set, dev])
  test_set  = pd.concat([test_set, test])

### Load

In [None]:
def load_split_amazon_dataset():

  geners = ['qa_Clothing_Shoes_and_Jewelry.json.gz',
            'qa_Health_and_Personal_Care.json.gz',
            'qa_Sports_and_Outdoors.json.gz']

  for gener in geners:

    df = getDF(drive_root_path + gener)

    df = df[df['answer'].apply(lambda x: len(x.split()) <= 200)].head(gener_examples)

    df['question'] = df['question'].apply(clean_question)
    df['answer']   = df['answer'].apply(clean_answer)    
    
    data_split(df[['question', 'answer']])

In [None]:
def load_split_kaggle_dataset():
  data = pd.read_csv(drive_root_path + 'chatbot dataset.txt', sep="\t", 
                    header=None)

  data.columns = ["question", "answer"]

  data['question'] = data['question'].apply(clean_question)
  data['answer']   = data['answer'].apply(clean_answer)
  
  data_split(data)

### Shuffle

In [None]:
def shuffle_dataset():
  global train_set, dev_set, test_set

  train_set = train_set.sample(frac=1, random_state=1).reset_index(drop=True)
  dev_set   = dev_set.sample(frac=1, random_state=1).reset_index(drop=True)
  test_set  = test_set.sample(frac=1, random_state=1).reset_index(drop=True)

## Generate Data after processing


In [None]:
load_split_kaggle_dataset()
load_split_amazon_dataset()
shuffle_dataset()

In [None]:
x_train, y_train = train_set['question'], train_set['answer']
x_dev, y_dev     = dev_set['question'], dev_set['answer']
x_test, y_test   = test_set['question'], test_set['answer']

# **Words Vectorization**


### Imports

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.utils import to_categorical
from tensorflow.io import write_file, read_file
import gc
import pickle
import os

### Constants

In [None]:
SAVE_DIR = '/content/drive/My Drive/Colab Notebooks/chatbot project/'
VOCAB_SIZE = 38140
# VOCAB_SIZE = 45924
BATCH_SIZE = 64

### Vectorization

In [None]:
vectorize_layer = tf.keras.layers.TextVectorization(standardize=None)

vectorize_layer.adapt(pd.concat([x_train, y_train]))

VOCAB_SIZE = vectorize_layer.vocabulary_size()

gc.collect()

#### Training Data

In [None]:
enc_input_data = vectorize_layer(x_train)
dec_input_data = vectorize_layer(y_train)
dec_output_data = dec_input_data[:, 1:]
dec_output_data = tf.concat([dec_output_data, tf.zeros((dec_output_data.shape[0], 1), dtype=tf.int64)], 1)
gc.collect()

#### Validation Data

In [None]:
val_enc_input_data = vectorize_layer(x_dev)
val_dec_input_data = vectorize_layer(y_dev)
val_dec_output_data = val_dec_input_data[:, 1:]
val_dec_output_data = tf.concat([val_dec_output_data, tf.zeros((val_dec_output_data.shape[0], 1), dtype=tf.int64)], 1)

#### Testing Data

In [None]:
test_enc_input_data = vectorize_layer(x_test)
test_dec_input_data = vectorize_layer(y_test)
test_dec_output_data = test_dec_input_data[:, 1:]
test_dec_output_data = tf.concat([test_dec_output_data, tf.zeros((test_dec_output_data.shape[0], 1), dtype=tf.int64)], 1)

### Save Processed Data

In [None]:
def save_as_batches(foldername, np_array, batch_size):
  num_batches = np_array.shape[0] // batch_size
  dir = SAVE_DIR + foldername + '/'
  for i in range(num_batches):
    np.save(dir + str(i) + '.npy', np_array[i*batch_size:(i+1)*batch_size, :])
  if(num_batches*batch_size < np_array.shape[0]):
    np.save(dir + str(num_batches) + '.npy', np_array[num_batches*batch_size:, :])

In [None]:
def save(foldername, np_array):
  path = SAVE_DIR + foldername + '/0.npy'
  np.save(path, np_array)

In [None]:
save_as_batches('dec_output_data_h', dec_output_data, BATCH_SIZE)
save_as_batches('dec_input_data_h', dec_input_data, BATCH_SIZE)
save_as_batches('enc_input_data_h', enc_input_data, BATCH_SIZE)

In [None]:
save('val_dec_output_data_h', val_dec_output_data)
save('val_dec_input_data_h', val_dec_input_data)
save('val_enc_input_data_h', val_enc_input_data)

In [None]:
save('test_enc_input_data', test_enc_input_data)
save('test_dec_input_data', test_dec_input_data)
save('test_dec_output_data', test_dec_output_data)

### Load Processed Data


In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, input_folder1, input_folder2, output_folder, batch_size=64):
        self.input_folder1 = input_folder1
        self.input_folder2 = input_folder2
        self.output_folder = output_folder
        self.batch_size = batch_size

    def __len__(self):
        # assuming there is nothing in the folders other than the preprocessed data, and all folders have the same number of files
        return len(os.listdir(SAVE_DIR + self.output_folder))

    def __getitem__(self, index):
        enc_input = np.load(SAVE_DIR + self.input_folder1 + '/' + str(index) + '.npy')
        dec_input = np.load(SAVE_DIR + self.input_folder2 + '/' + str(index) + '.npy')
        dec_output = np.load(SAVE_DIR + self.output_folder + '/' + str(index) + '.npy')
        return [enc_input, dec_input], dec_output

In [None]:
def load(foldername):
  path = SAVE_DIR + foldername + '/0.npy'
  return np.load(path)

In [None]:
val_dec_output_data = load('val_dec_output_data')
val_dec_input_data = load('val_dec_input_data')
val_enc_input_data = load('val_enc_input_data')

In [None]:
test_dec_output_data = load('test_dec_output_data')
test_dec_input_data = load('test_dec_input_data')
test_enc_input_data = load('test_enc_input_data')

# **Training Model**

### Imports

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Dense, Embedding, Dropout
from tensorflow.keras.layers import GRU, LSTM, Bidirectional, Concatenate
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.losses import CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from nltk.translate.bleu_score import sentence_bleu

### Custom Evaluation Functions

In [None]:
def bleu_score(y_true, y_pred):
  return sentence_bleu([y_true.numpy().tolist()], y_pred.numpy().tolist(), smoothing_function=SmoothingFunction().method1)

### Constants

In [None]:
EMBEDDING_SIZE = 50
UNITS = 100

## Embedding Layer

### Downloading Glove Embedding

In [None]:
!wget http://nlp.stanford.edu/data/glove.42B.300d.zip
!apt install unzip
!unzip "glove.42B.300d.zip"

--2022-09-10 12:25:11--  http://nlp.stanford.edu/data/glove.42B.300d.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.42B.300d.zip [following]
--2022-09-10 12:25:11--  https://nlp.stanford.edu/data/glove.42B.300d.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.42B.300d.zip [following]
--2022-09-10 12:25:11--  https://downloads.cs.stanford.edu/nlp/data/glove.42B.300d.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1877800501 (1.7G) [application/zip]


In [None]:
import os
os.listdir()

['.config', 'glove.42B.300d.txt', 'drive', 'glove.42B.300d.zip', 'sample_data']

### Convert Glove Embedding to Word2Vec Embedding

In [None]:
from gensim.scripts.glove2word2vec import glove2word2vec
glove2word2vec(glove_input_file="glove.42B.300d.txt", word2vec_output_file="gensim_glove_vectors.txt")

from gensim.models.keyedvectors import KeyedVectors
glove_model = KeyedVectors.load_word2vec_format("gensim_glove_vectors.txt", binary=False)

### GloVe Layer

In [None]:
enc_embedding_layer = glove_model.get_keras_embedding()
dec_embedding_layer = glove_model.get_keras_embedding()

### Normal Embedding Layer

In [None]:
enc_embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_SIZE, mask_zero=True)
dec_embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_SIZE, mask_zero=True)


## GRU Model

### Encoder

In [None]:
enc_input = Input(shape=(None,))
enc_embedding = enc_embedding_layer(enc_input)
enc_output, enc_state = GRU(UNITS, return_state=True, 
                            kernel_regularizer='l2')(enc_embedding)

###  Decoder

In [None]:
dec_input = Input(shape=(None,))
dec_embedding = dec_embedding_layer(dec_input)
dec_outputs, _ = GRU(UNITS, return_state=True, return_sequences=True, 
                     kernel_regularizer='l2')(dec_embedding, initial_state=enc_state)
dropout = Dropout(0.3)(dec_outputs)
output = Dense(VOCAB_SIZE, activation='softmax')(dropout)

## Bidirectional Model



### Encoder

In [None]:
enc_input = Input(shape=(None,))
enc_embedding = enc_embedding_layer(enc_input)

encoder = Bidirectional(LSTM(UNITS, return_state=True))

encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder(enc_embedding)

state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])

enc_state = [state_h, state_c]

###  Decoder

In [None]:
dec_input = Input(shape=(None,))
dec_embedding = dec_embedding_layer(dec_input)
dec_outputs, _, _ = LSTM(UNITS*2, return_state=True, 
                         return_sequences=True)(dec_embedding, initial_state=enc_state)
dropout = Dropout(0.3)(dec_outputs)
output = Dense(VOCAB_SIZE, activation='softmax')(dropout)

##LSTM Model

### Encoder

In [None]:
enc_input = Input(shape=(None,))

enc_embedding = enc_embedding_layer(enc_input)

enc_outputs, state_h, state_c = LSTM(UNITS, return_state=True)(enc_embedding)
enc_states = [state_h, state_c]

###  Decoder

In [None]:
dec_input = Input(shape=(None,))

dec_embedding = dec_embedding_layer(dec_input)
dec_outputs, _, _  = LSTM(UNITS, return_state=True, 
                          return_sequences=True)(dec_embedding, initial_state=enc_states)
output = Dense(VOCAB_SIZE, activation='softmax')(dec_outputs)

## Training Model

In [None]:
model = Model([enc_input, dec_input], output, name='model_79')
lr_schedule = ExponentialDecay(
    0.01,
    decay_steps=10000,
    decay_rate=0.9)
opt = Adam(learning_rate=lr_schedule)
model.compile(optimizer=opt, loss=SparseCategoricalCrossentropy(), metrics=['accuracy'])
model.summary()

Model: "model_79"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 300)    575248200   ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, None, 300)    575248200   ['input_2[0][0]']                
                                                                                           

In [None]:
path = SAVE_DIR + 'checkpoints/model_2'
model = load_model(path)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 200)    7628000     ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, None, 200)    7628000     ['input_2[0][0]']                
                                                                                              

In [None]:
data_generator = DataGenerator('enc_input_data', 'dec_input_data', 
                               'dec_output_data')
save_callback = ModelCheckpoint(SAVE_DIR + 'checkpoints/model_79')
history = model.fit(x=data_generator, epochs=30, validation_data=([val_enc_input_data, val_dec_input_data], val_dec_output_data), callbacks=[save_callback])

In [None]:
history

# **Testing Model**

In [None]:
model.evaluate([val_enc_input_data, val_dec_input_data], val_dec_output_data, batch_size=BATCH_SIZE)



[0.7589655518531799, 0.8779336810112]

In [None]:
model.evaluate([test_enc_input_data, test_dec_input_data], test_dec_output_data, batch_size=BATCH_SIZE)



[0.7789523005485535, 0.874611496925354]

# **Inference Model**

## Load Vectorized Layer


In [None]:
def load_vectorizer():
  path = SAVE_DIR +'vectorizer.pkl'
  from_disk = pickle.load(open(path, "rb"))
  vectorizer = TextVectorization.from_config(from_disk['config'])
  vectorizer.set_weights(from_disk['weights'])
  return vectorizer

In [None]:
vectorizer = load_vectorizer()

## LSTM/GRU Layers

In [None]:
enc_input = model.input[0]
dec_input = model.input[1]
enc_embedding_layer = model.get_layer(index=2)
dec_embedding_layer = model.get_layer(index=3)
enc_layer = model.get_layer(index=4)
dec_layer = model.get_layer(index=5)
dec_dense_layer = model.get_layer(index=-1)

In [None]:
EMBEDDING_SIZE = dec_embedding_layer.output.shape[-1]
UNITS = dec_layer.output[0].shape[-1]

### LSTM 

#### Encoder

In [None]:
_ , enc_state_h, enc_state_c = enc_layer.output
enc_model = Model(enc_input, [enc_state_h, enc_state_c], 
                  name='encoder_inference_model')
enc_model.summary()

Model: "encoder_inference_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 200)         7628000   
                                                                 
 lstm (LSTM)                 [(None, 200),             320800    
                              (None, 200),                       
                              (None, 200)]                       
                                                                 
Total params: 7,948,800
Trainable params: 7,948,800
Non-trainable params: 0
_________________________________________________________________


#### Decoder

In [None]:
dec_embedding = dec_embedding_layer.output 
dec_state_input_h = Input(shape=(UNITS,), name='decoder_input_state_h')
dec_state_input_c = Input(shape=(UNITS,), name='decoder_input_state_c')
dec_outputs, dec_state_output_h, dec_state_output_c = dec_layer(dec_embedding, 
                                                                     initial_state=[dec_state_input_h, dec_state_input_c])
output = dec_dense_layer(dec_outputs)
dec_model = Model([dec_input, dec_state_input_h, dec_state_input_c], 
                  [output, dec_state_output_h, dec_state_output_c], 
                  name='decoder_inference_model')
dec_model.summary()

Model: "decoder_inference_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_1 (Embedding)        (None, None, 200)    7628000     ['input_2[0][0]']                
                                                                                                  
 decoder_input_state_h (InputLa  [(None, 200)]       0           []                               
 yer)                                                                                             
                                                                                                  
 decoder_input_state_c (InputLa  [(None, 200)]       0           []         

### GRU

#### Encoder

In [None]:
_ , enc_state = enc_layer.output
enc_model = Model(enc_input, enc_state, name='encoder_inference_model')
enc_model.summary()

#### Decoder

In [None]:
dec_embedding = dec_embedding_layer.output
dec_state_input = Input(shape=(UNITS,), name='decoder_input_states')
dec_outputs, dec_state_output = dec_layer(dec_embedding, 
                                          initial_state=dec_state_input)
output = dec_dense_layer(dec_outputs)
dec_model = Model([dec_input, dec_state_input], [output, dec_state_output], name='decoder_inference_model')
dec_model.summary()

## Bidirectional Layers

In [None]:
enc_input = model.input[0]
dec_input = model.input[1]
enc_embedding_layer = model.get_layer(index=1)
enc_bidirectional_layer = model.get_layer(index=3)
dec_embedding_layer = model.get_layer(index=4)
concatenate_layer_1 = model.get_layer(index=5)
concatenate_layer_2 = model.get_layer(index=6)
dec_lstm_layer = model.get_layer(index=7)
dec_dense_layer = model.get_layer(index=-1)

In [None]:
EMBEDDING_SIZE = dec_embedding_layer.output.shape[-1]
UNITS = dec_lstm_layer.output[0].shape[-1]

### Encoder

In [None]:
enc_state_h = concatenate_layer_1.output
enc_state_c = concatenate_layer_2.output
enc_model = Model(enc_input, [enc_state_h, enc_state_c], name='encoder_inference_model')
enc_model.summary()

Model: "encoder_inference_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_1 (Embedding)        (None, None, 50)     1907000     ['input_2[0][0]']                
                                                                                                  
 bidirectional (Bidirectional)  [(None, 300),        241200      ['embedding_1[0][0]']            
                                 (None, 150),                                                     
                                 (None, 150),                                                     
                                 (None, 150),                               

### Decoder

In [None]:
dec_embedding = dec_embedding_layer.output
dec_state_input_h = Input(shape=(UNITS,), name='decoder_input_state_h')
dec_state_input_c = Input(shape=(UNITS,), name='decoder_input_state_c')
dec_outputs, dec_state_output_h, dec_state_output_c = dec_lstm_layer(dec_embedding, 
                                                                     initial_state=[dec_state_input_h, dec_state_input_c])
output = dec_dense_layer(dec_outputs)
dec_model = Model([dec_input, dec_state_input_h, dec_state_input_c], 
                  [output, dec_state_output_h, dec_state_output_c], 
                  name='decoder_inference_model')
dec_model.summary()

Model: "decoder_inference_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_3 (Embedding)        (None, None, 50)     1907000     ['input_4[0][0]']                
                                                                                                  
 decoder_input_state_h (InputLa  [(None, 300)]       0           []                               
 yer)                                                                                             
                                                                                                  
 decoder_input_state_c (InputLa  [(None, 300)]       0           []         

## Main Loop

In [None]:
START_TOKEN = 'START_'
END_TOKEN = '_END'
MAX_ANSWER_LEN = 200

GRU

In [None]:
def answer(question):
  vectorized_question = np.reshape(vectorizer(question).numpy(), (1, -1))
  print(f'question: {question}\nvectorized question: {vectorized_question}')
  states = enc_model.predict(vectorized_question)

  empty_target_seq = np.reshape(vectorizer(START_TOKEN).numpy(), (1, -1))
  stop_condition = False
  decoded_translation = ''
  while not stop_condition:
      dec_outputs, dec_states = dec_model.predict([empty_target_seq, states])         

      sampled_word_index = np.argmax(dec_outputs[0, -1, :])
      sampled_word = vectorizer.get_vocabulary()[sampled_word_index]
      if sampled_word != END_TOKEN:
        decoded_translation += f' {sampled_word}'
      
      if sampled_word == END_TOKEN or len(decoded_translation.split()) > MAX_ANSWER_LEN:
          stop_condition = True

      empty_target_seq = np.zeros((1, 1))
      empty_target_seq[0, 0] = sampled_word_index
      states = dec_states

  return decoded_translation

LSTM

In [None]:
def answer(question):
  vectorized_question = np.reshape(vectorizer(question).numpy(), (1, -1))
  print(f'question: {question}')
  state_h, state_c = enc_model.predict(vectorized_question)

  empty_target_seq = np.reshape(vectorizer(START_TOKEN).numpy(), (1, -1))
  stop_condition = False
  decoded_translation = 'answer : '
  while not stop_condition:

      dec_outputs, dec_state_h, dec_state_c = dec_model.predict([empty_target_seq, state_h, state_c])         

      sampled_word_index = np.argmax(dec_outputs[0, -1, :])
      sampled_word = vectorizer.get_vocabulary()[sampled_word_index]
      if sampled_word != END_TOKEN:
        decoded_translation += f' {sampled_word}'
      
      if sampled_word == END_TOKEN or len(decoded_translation.split()) > MAX_ANSWER_LEN:
          stop_condition = True

      empty_target_seq = np.zeros((1, 1))
      empty_target_seq[0, 0] = sampled_word_index
      state_h, state_c = dec_state_h, dec_state_c

  return decoded_translation

In [None]:
print(answer('who would teach us the compilers subject?'))

question: who would teach us the compilers subject?
answer :  the best thing you can buy is in the same for a 5 year old
