# Model 01

First model.  Do something simple with segmentation or autoencoders

## Imports and Constants, etc.

In [None]:
import datetime
import importlib
import keras
from keras.layers import (Dense, SimpleRNN, Input, Conv1D, 
                          LSTM, GRU, AveragePooling3D, Conv3D, 
                          UpSampling3D, BatchNormalization)
from keras.models import Model
import nibabel as nib
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import projd
import random
import re
import scipy
import shutil
import sys
from sklearn.model_selection import train_test_split
import uuid

import matplotlib.pyplot as plt # data viz
import seaborn as sns # data viz

import imageio # display animated volumes
from IPython.display import Image # display animated volumes

from IPython.display import SVG # visualize model
from keras.utils.vis_utils import model_to_dot # visualize model


SEED = 0
EPOCHS = 10
BATCH_SIZE = 1
PATCH_SHAPE = (32, 32, 32)

MODEL_NAME = 'model_01'

DATA_DIR = Path('~/data/2018').expanduser()
NORMAL_SCANS_DIR = DATA_DIR / 'uvmmc/nifti_normals'
PROJECT_DATA_DIR = DATA_DIR / 'uvm_deep_learning_project'
PP_IMG_DIR = PROJECT_DATA_DIR / 'uvmmc' / 'preprocessed' # preprocessed scans dir
PP_MD_PATH = PROJECT_DATA_DIR / 'uvmmc' / 'preprocessed_metadata.pkl'

MODELS_DIR = PROJECT_DATA_DIR / 'models'
LOG_DIR = PROJECT_DATA_DIR / 'log'
TENSORBOARD_LOG_DIR = PROJECT_DATA_DIR / 'tensorboard'
TMP_DIR = DATA_DIR / 'tmp'

for d in [DATA_DIR, NORMAL_SCANS_DIR, PROJECT_DATA_DIR, PP_IMG_DIR, MODELS_DIR, LOG_DIR, 
          TENSORBOARD_LOG_DIR, TMP_DIR, PP_MD_PATH.parent]:
    if not d.exists():
        d.mkdir(parents=True)
        
%matplotlib inline
sns.set()


## Utilities

In [None]:

def temp_gif_path(tmp_dir=TMP_DIR):
    '''
    Used to junk up the filesystem with temporary files for animated gifs of ct scans.
    '''
    return str(tmp_dir / ('tmp_' + uuid.uuid4().hex + '.gif'))

    
def get_nifti_files(path):
    '''
    Return a list of Path objs for every .nii file within path.
    '''
    return list(path.glob('**/*.nii'))


def sample_stack(stack, rows=3, cols=3, start_with=0, show_every=3, r=0):
    '''
    Plot a grid of images (2d slices) sampled from stack.
    
    stack: 3-d voxel array.
    '''
    fig, ax = plt.subplots(rows, cols, figsize=[20, 20])
    for i in range(rows * cols):
        ind = start_with + i * show_every
        ax[i // cols, i % cols].set_title('slice %d' % ind)
        
        if r == 0:
            ax[i // cols, i % cols].imshow(stack[:, :, ind], cmap='gray')
        else:
            ax[i // cols, i % cols].imshow(rotate(stack[:, :, ind], r), cmap='gray')
        
        
        ax[i // cols, i % cols].axis('off')
    plt.show()


def make_animated_gif(path, img, start=0, stop=None, step=1):
    '''
    Create animated gif of 3d image, where each frame is a 2-d image taken from 
    iterating across the 3rd dimension.  E.g. the ith 2d image is img[:, :, i]
    path: where to save the animated gif
    img: a 3-d volume
    start: index of 3rd dimension to start iterating at.  default = 0.
    stop: index of 3rd dimension to stop at, not inclusive.  Default is None, meaning stop at img.shape[2].
    step: number of slices to skip    
    '''
    # convert to uint8 to suppress warnings from imageio
    imax = img.max()
    imin = img.min()
    img = 255 * ((img - imin) / (imax - imin)) # scale to 0..255
    img = np.uint8(img)
    
    with imageio.get_writer(path, mode='I') as writer:
        for i in range(start, img.shape[2], step):
            writer.append_data(img[:, :, i])

    
def animate_crop(img, crop=(0, 1, 0, 1, 0, 1), axis=2, step=5):
    '''
    img: a 3d volume to be cropped and animated.
    axis: 0, 1, 2: the axis to animate along.  img will be transposed s.t. this axis is the 3rd axis.
    crop: 6 element list: axis 0 start position, axis 0 end position, axis 1 start position, etc.  Each position 
      is a number in [0.0, 1.0] representing the position as a proportion of that axis.  0.0 is the beginning,
      1.0 the end, and 0.5 the middle.
    step: only include every nth frame in the animation, where each frame is a 2d slice of img.
    return: ipython Image, for display in a notebook.
    '''
    # as a proportion of the total range, range of axis 0, 1, and 2 that should be included in the volume
    prop0 = crop[0:2]
    prop1 = crop[2:4]
    prop2 = crop[4:6]
    # as specific voxel coordinates, range of axis 0, 1, and 2 that should be included in the volume
    pix0 = [int(p * img.shape[0]) for p in prop0]
    pix1 = [int(p * img.shape[1]) for p in prop1]
    pix2 = [int(p * img.shape[2]) for p in prop2]

    cropped_img = img[pix0[0]:pix0[1], pix1[0]:pix1[1], pix2[0]:pix2[1]]
    # rotate axes for animation
    cropped_img = cropped_img.transpose([0,1,2][-(2-axis):] + [0,1,2][:-(2-axis)])
    
    tmp_path = temp_gif_path()
    print('temp gif path:', tmp_path)
    make_animated_gif(tmp_path, cropped_img, step=step)
    return Image(filename=tmp_path)


def animate_scan_info_crop(scan_info, i, crop=(0, 1, 0, 1, 0, 1), axis=0, step=3):
    path = scan_info.loc[i, 'path']
    print('scan path:', path)
    img = nib.load(path).get_data()
    print('scan img shape:', img.shape)
    return animate_crop(img, crop, axis=axis, step=step)
    

def get_data_infos(paths):
    '''
    paths: paths to nifti scans.
    get file paths, image paths, other useful information about data.
    Can be randomly shuffled and split to for train and test set.
    Generator will split examples into batch sizes and get associated normalized images and labels.  
    '''
    infos = pd.DataFrame({'id': [re.sub('\.nii$', '', p.name) for p in paths], 'path': [str(p) for p in paths]})
    infos['nft'] = infos.path.apply(lambda p: nib.load(p))
    infos['header'] = infos.nft.apply(lambda nft: nft.header)
    infos['affine'] = infos.nft.apply(lambda nft: nft.affine)
    infos['pixdim'] = infos.header.apply(lambda h: h['pixdim'][1:4])
    infos['dim'] = infos.header.apply(lambda h: h['dim'][1:4])
    infos['qform_code'] = infos.header.apply(lambda h: h['qform_code'])
    infos['sform_code'] = infos.header.apply(lambda h: h['sform_code'])
    infos['sizeof_hdr'] = infos.header.apply(lambda h: h['sizeof_hdr'])
    infos['pixdim0'] = infos.pixdim.apply(lambda x: x[0])
    infos['pixdim1'] = infos.pixdim.apply(lambda x: x[1])
    infos['pixdim2'] = infos.pixdim.apply(lambda x: x[2])
    infos['dim0'] = infos.dim.apply(lambda x: x[0])
    infos['dim1'] = infos.dim.apply(lambda x: x[1])
    infos['dim2'] = infos.dim.apply(lambda x: x[2])
    infos['desc'] = infos.header.apply(lambda h: h['descrip'])
    infos['class'] = ['normal'] * infos.shape[0]
    infos.reset_index()
    return infos




## Data Preprocessing

1. Read in the original images.
2. Resample the image s.t the voxel size is 1mm x 1mm x 1mm.
3. Clip houndsfield unit values and normalize them to be between 0 and 1. (like Julian de Wit recommends for Kaggle)

Some processing code is from https://github.com/juliandewit/kaggle_ndsb2017/blob/master/step1_preprocess_luna16.py.


In [None]:


def get_image(path):
    # read the image from the filesystem
    img = nib.load(path).get_data()
    return img
   
    
def get_preprocessed_image(path):
    return np.load(path)

    
def get_processed_image(info, crop_shape=PATCH_SHAPE):
    img = get_image(info['path'])
    scan_id = info['id']

    # preprocess image
    # use 1mm^3 voxel size to reduce image size.
    spacing = (info['pixdim0'], info['pixdim1'], info['pixdim2'])
    new_spacing = (1.0, 1.0, 1.0)
    resampled_img, adj_new_spacing = resample_image(img, spacing, new_spacing)
    
    # augment image for training
    cropped_img = random_crop(resampled_img, crop_shape)
    
    
    return cropped_img, adj_new_spacing
    

def resample_image(image, spacing, new_spacing):
    '''
    image: a 3d volume
    spacing: the size of a voxel in some units.  E.g. [0.3, 0.3, 0.9]
    new_spacing: the size of a voxel after resampling, in some units.  E.g. [1.0, 1.0, 1.0]
    
    returns: resampled image and new spacing adjusted because images have integer dimensions.
    '''
    # calculate resize factor required to change image to new shape
    spacing = np.array(spacing)
    new_spacing = np.array(new_spacing)
    spacing_resize_factor = spacing / new_spacing
    new_real_shape = image.shape * spacing_resize_factor
    new_shape = np.round(new_real_shape)
    real_resize_factor = new_shape / image.shape
    
    # adjusted spacing to account for integer dimensions of resized image.
    new_spacing = spacing / real_resize_factor
    
    new_image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
    return new_image, new_spacing


def normalize_image(image):
    '''
    Normalize voxel units by clipping them to lie between -1000 and 1000 hounsfield units 
    and then scale number to between 0 and 1.
    '''
    MIN_BOUND = -1000.0 # Air: -1000, Water: 0 hounsfield units.
    MAX_BOUND = 1000.0 # Bone: 200, 700, 3000.  https://en.wikipedia.org/wiki/Hounsfield_scale
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image > 1] = 1.
    image[image < 0] = 0.
    return image


def get_preprocessed_image_path(scan_id, preprocessed_dir):
    return str(Path(preprocessed_dir, f'{scan_id}.npy'))


def preprocess_nifti_normals(src_dir=NORMAL_SCANS_DIR, dest_dir=PP_IMG_DIR, delete_existing=False, 
                             metadata_path=PP_MD_PATH):
    
    if delete_existing and dest_dir.isdir():
        print('Removing existing dest dir:', dest_dir)
        shutil.rmtree(dest_dir)
    if not dest_dir.exists():
        print('Making preprocessed images destination:', dest_dir)
        dest_dir.mkdir(parents=True)
        
    infos = get_data_infos(get_nifti_files(src_dir))
    spacings = {}
    preprocessed_paths = {}
    for i in range(len(infos)):
        info = infos.loc[i, :]
        img_path = info['path']
        img = get_image(img_path)
        scan_id = info['id']
        print('image index:', i)
        print('image id:', scan_id)
        print('image shape:', img.shape)
        print('image path:', img_path)
        
        # Standardize voxel size to 1mm^3 to reduce image size.
        spacing = (info['pixdim0'], info['pixdim1'], info['pixdim2'])
        target_spacing = (1.0, 1.0, 1.0)
        print('image spacing:', spacing)
        print('new spacing:', target_spacing)
        resampled_img, resampled_spacing = resample_image(img, spacing, target_spacing)
        print('resampled image spacing:', resampled_spacing)
        print('resampled image shape:', resampled_img.shape)
        
        normalized_img = normalize_image(resampled_img)
        print('Normalized image shape:', normalized_img.shape)
        
        # save processed image
        path = get_preprocessed_image_path(scan_id, dest_dir)
        print(f'Saving preprocessed image to {path}.')
        np.save(path, normalized_img)
        
        # track image metadata
        infos.loc[i, 'pp_path'] = str(path)
        infos.loc[i, 'pp_pixdim0'] = resampled_spacing[0] # pixdim0
        infos.loc[i, 'pp_pixdim1'] = resampled_spacing[1] # pixdim1
        infos.loc[i, 'pp_pixdim2'] = resampled_spacing[2] # pixdim2

    # save metadata
    write_preprocessed_metadata(infos)
    return infos
    
        
def write_preprocessed_metadata(infos, path=PP_MD_PATH):
    with open(path, 'wb') as fh:
        fh.write(pickle.dumps(infos))
    
    
def read_preprocessed_metadata(path=PP_MD_PATH):
    with open(path, 'rb') as fh:
        infos = pickle.loads(fh.read())
    
    return infos



### Preprocess Images and Save to Disk

In [None]:
# Uncomment to preprocess images
# infos = preprocess_nifti_normals()

### Testing and Validating Functions

In [None]:
# Test getting a raw image
data_infos = infos
img_info = data_infos.iloc[0]
img = get_image(img_info['path'])

In [None]:
animate_crop(img, axis=2, step=5)

In [None]:
# Test that the resampled image has more or less the shape we expect it to have after resizing the voxels.

img_spacing = (img_info['pixdim0'], img_info['pixdim1'], img_info['pixdim2'])
print('Shape and spacing before resampleing\t', img.shape, img_spacing)
target_img_spacing = (1., 1., 1.)
print('Target spacing:', target_img_spacing)
resampled_img, resampled_spacing = resample_image(img, img_spacing, target_img_spacing)
print ("Shape after resampling\t", resampled_img.shape, resampled_spacing)
animate_crop(resampled_img)


In [None]:
# Test reading metadata, which contains the preprocessed image spacings and file paths
infos = read_preprocessed_metadata()
pp_spacings = list(zip(infos['pp_pixdim0'], infos['pp_pixdim1'], infos['pp_pixdim2']))
pp_paths = list(infos['pp_path'])

In [None]:
pp_spacings[:5]

In [None]:
pp_paths[:5]

In [None]:
# Test that preprocessed images look reasonable when visualized
for i in range(3):
    img = get_preprocessed_image(infos.loc[i, 'pp_path'])
    scan_id = infos.loc[i, 'id']
    print(f'image {i} scan id {scan_id} shape {img.shape}')
    display(animate_crop(img))

## Data Generators

Yield batch-sized random samples of training data.

In [None]:
def random_crop(img, shape):
    '''
    Randomly crop an image to a shape.  Location is chosen at random from
    all possible crops of the given shape.
    
    img: a volume to crop
    shape: size of cropped volume.  e.g. (32, 32, 32)
    '''
    assert all(img.shape[i] >= shape[i] for i in range(len(shape)))
    
    # if img.shape[i] == 32 and shape[i] == 32, i_max == 0.
    maxes = [img.shape[i] - shape[i] for i in range(len(shape))]
    # the starting corner of the crop
    starts = [random.randint(0, m) for m in maxes]
    # Will this indexing work?
    cropped_img = img[[slice(starts[i], starts[i] + shape[i]) for i in range(len(shape))]]
    cropped_img = img[starts[0]:(starts[0] + shape[0]),
                      starts[1]:(starts[1] + shape[1]),
                      starts[2]:(starts[2] + shape[2])]
    return cropped_img
        

def augment_image(img, crop_shape=PATCH_SHAPE):
    return random_crop(img, crop_shape)


class ScanSequence(keras.utils.Sequence):

    def __init__(self, x_infos, batch_size):
        '''
        x_paths: list of paths to preprocessed images
        '''
        self.x = x_infos.reset_index()
        self.batch_size = batch_size
        # assert len(self.x) == len(self.y)

    def __len__(self):
        '''
        Return number of batches, based on batch_size
        '''
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        '''
        idx: batch index
        '''
        # loc indexing uses inclusive name-based indexing
        batch_x_paths = list(self.x.loc[idx * self.batch_size:(idx + 1) * self.batch_size - 1, 'pp_path'])
        # add channel dimension to each image.
        batch_x = [np.expand_dims(get_preprocessed_image(path), axis=-1) for path in batch_x_paths]
        
        # batch_y = self.y.iloc[idx * self.batch_size:(idx + 1) * self.batch_size, :]

        return (np.array(batch_x), np.array(batch_x))
    

def get_datagens(seed=SEED, validation_split=0.25, preprocessed_metadata_path=PP_MD_PATH, batch_size=BATCH_SIZE):
    # Data generator
    infos = read_preprocessed_metadata(preprocessed_metadata_path)
    shuffled = infos.sample(frac=1, random_state=seed)
    nrow = len(shuffled)
    idx = int(nrow * validation_split)
    val = shuffled.iloc[:idx, :].reindex()
    train = shuffled.iloc[idx:, :].reindex()
    train_gen = ScanSequence(train, batch_size)
    val_gen = ScanSequence(val, batch_size)
    return train_gen, val_gen



### Testing and Validating Functions

In [None]:
# Test that the random crop is producing what look like random crops.
img = get_preprocessed_image(pp_paths[0])
display(animate_crop(img, step=1))
for i in range(5):
    display(animate_crop(random_crop(img, PATCH_SHAPE), step=1))

In [None]:
# test getting a batch of data from ScanSequence
seq, _ = get_datagens()
print(len(seq))

In [None]:
batch_x, batch_y = seq[0]

In [None]:
# test that a batch picture looks like a preprocessed image.
print(batch_x.shape, batch_y.shape)
display(animate_crop(batch_x[0, :, :, :, 0])) # drop the example and channel dimensions
display(animate_crop(batch_y[0, :, :, :, 0]))

## Build Model


In [None]:
def build_model():
    '''
    n_x: number of input features.  The size of the vocabulary.  Each char is one-hot encoded
    n_y: number of output features.  The same as n_x for next character prediction.
    n_a: number of hidden units in rnn layer
    n_a2: number of hidden units in conv layer
    n_t: the length of each sequence.
    '''
    n_a = 32
    ## the input is a sequence of characters that have been one-hot encoded.
    x_input = Input(shape=(None, None, None, 1)) # arbitrary shape, 1 channel
    x = x_input
    x = Conv3D(n_a, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = AveragePooling3D()(x) # shape / 2, n_a filters

#     x = Conv3D(n_a * 2, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = AveragePooling3D()(x) # shape / 4, n_a * 2 filters
    
#     x = Conv3D(n_a * 4, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = AveragePooling3D()(x) # shape / 8, n_a * 4 filters

#     x = Conv3D(n_a * 2, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = UpSampling3D()(x) # shape / 4, n_a * 2 filters

#     x = Conv3D(n_a * 2, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = UpSampling3D()(x) # shape / 2, n_a filters

    x = Conv3D(n_a, kernel_size=(3, 3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = UpSampling3D()(x) # shape, 1 channel

    y = Conv3D(1, kernel_size=(3, 3, 3), padding='same')(x)
    
    model = Model(inputs=x_input, outputs=y)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
    return model
    
model = build_model()
print(model.summary())
SVG(model_to_dot(model).create(prog='dot', format='svg'))

## Train and Evaluate Model

- Add callbacks to save model every 20 epochs and to log performance stats every epoch, so we have the results saved somewhere for charting.


In [None]:
# Callbacks include ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
# Save the model
def train_model(model, train_gen, val_gen, epochs=EPOCHS, batch_size=BATCH_SIZE, models_dir=MODELS_DIR, model_name=MODEL_NAME, log_dir=LOG_DIR,
                tensorboard_log_dir=TENSORBOARD_LOG_DIR):
    # Saving model
    model_path = models_dir  /  (model_name +'_{epoch:02d}.h5')
    print('model path:', model_path)
    checkpoint_cb = keras.callbacks.ModelCheckpoint(
        str(model_path), monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, 
        mode='auto', period=1)
    
    # Stop when validation loss stops improving
    early_cb = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')
    
    # Save logs for each run to logfile
    log_path = log_dir / (model_name + '_' + datetime.datetime.now().isoformat() + '_log.csv')
    print('log path:', log_path)
    log_cb = keras.callbacks.CSVLogger(str(log_path), separator=',', append=False)
    
    # Enable Tensorboard
    print('tensorboard log dir:', tensorboard_log_dir)
    tensorboard_cb = keras.callbacks.TensorBoard(log_dir=str(tensorboard_log_dir), 
                                                 histogram_freq=0, write_graph=True, write_images=True)
    
    # Fit Model
    history = model.fit_generator(train_gen, epochs=epochs, validation_data=val_gen, 
                        callbacks=[checkpoint_cb, log_cb, tensorboard_cb], max_queue_size=1)
    return history

In [None]:
train_gen, val_gen = get_datagens()
history = train_model(model, train_gen, val_gen)

## Visualize Training Progress

In [None]:
# read metrics from the log file
metrics = pd.read_csv(log_path)

In [None]:
print(pd.concat([metrics[::10], metrics[-1:]])) # every 10th metric and the last one

In [None]:
# Plot Training and Validation Accuracy 
axes = plt.gca()
axes.set_ylim([0.0,1.0]) # Show results on 0..1 range
plt.plot(metrics["acc"])
plt.plot(metrics["val_acc"])
plt.legend(['Training Accuracy', "Validation Accuracy"])
plt.show()

# Plot Training and Validation Loss
plt.plot(metrics["loss"])
plt.plot(metrics["val_loss"])
plt.legend(['Training Loss', "Validation Loss"])
plt.show()



## Show Effect of Training on Text Generation

Use models from different training epochs to generate text.


In [None]:
def get_model_path(model_name, epoch):
    model_path = models_dir  /  (model_name + f'_{epoch:02d}.h5')
    return model_path


def weighted_sample(probs):
    '''
    probs is a 2d array where each row is a separate probability distribution for the next character
    return an index for each row corresponding to a randomly sampled probability.
    Example:
    [[0.8, 0.1, 0.1],
     [0.2, 0.5, 0.3]]
    '''
    # this has no axis argument
    # np.random.choice(len(preds), p=preds)

    # https://stackoverflow.com/questions/40474436/how-to-apply-numpy-random-choice-to-a-matrix-of-probability-values-vectorized-s
    #cum holds the cumulative distributions:
    c = probs.cumsum(axis=1)
    # Generate a set of uniformly distributed samples...
    u = np.random.rand(len(c), 1)
    #...and then see where they "fit" in c:
    choices = (u < c).argmax(axis=1)
    return choices
        
    
def max_sample(probs):
    return np.argmax(probs, axis=-1)


def seed_text(text, seq_len):
    start = np.random.randint(0, len(text) - seq_len)
    return text[start:(start + seq_len)]


def generate_text_for_epochs(model_name, epochs, text, seq_len, vocab_size, num_samples, sample_len):
    for epoch in epochs:
        path = get_model_path(model_name, epoch)
        model = keras.models.load_model(path)
        print('Epoch {}:'.format(epoch))
        for i in range(num_samples):
            seed, sample = generate_text(model, text, seq_len, vocab_size, sample_len)
            print(sample)


def generate_text(model, text, seq_len, vocab_size, output_len):
    int_to_char = datagen.get_int_to_char(vocab_size)
    char_to_int = datagen.get_char_to_int(vocab_size)
    # initial sequences to prime the generation of next characters
    seed = seed_text(text, seq_len)
    # as tensors for input to model.  shape (1, seq_len, vocab_size)
    x_seq = seed
    output = ''
    # generate output_len characters
    for i in range(output_len):
        x = datagen.sequences_to_tensor([x_seq], seq_len, char_to_int)
        preds = model.predict(x)[0] # shape (1, vocab_size)
        idx = np.random.choice(len(preds), p=preds)
        char = int_to_char[idx]
        output += char
        x_seq = x_seq[1:] + char
    return seed, output


In [None]:
generate_text_for_epochs(model_name, [20, 40, 60, 80, 100], text, SEQ_LEN, VOCAB_SIZE, NUM_GEN_TEXT_SAMPLES, GEN_SAMPLE_LEN)