# Tensorflow Data Input Pipeline

In [4]:
import tensorflow as tf
import os

### Retrieve review file paths in a tensorflow dataset

In [2]:
reviews_ds = tf.data.Dataset.list_files('reviews/*/*', shuffle=False)

In [3]:
for file in reviews_ds:
    print(file.numpy())

b'reviews\\negative\\neg_1.txt'
b'reviews\\negative\\neg_2.txt'
b'reviews\\negative\\neg_3.txt'
b'reviews\\positive\\pos_1.txt'
b'reviews\\positive\\pos_2.txt'
b'reviews\\positive\\pos_3.txt'


### Extract review text from these files. Extract label from folder name

In [5]:
def extract_review_and_label(file_path):
    return tf.io.read_file(file_path), tf.strings.split(file_path, os.path.sep)[-2]

In [6]:
reviews_ds_1 = reviews_ds.map(extract_review_and_label)

for review, label in reviews_ds_1:
    print("Review: ", review.numpy()[:50])
    print("Label: ", label.numpy())

Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b''
Label:  b'negative'
Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'
Review:  b''
Label:  b'positive'


### Filter blank reviews

In [7]:
reviews_ds_2 = reviews_ds_1.filter(lambda review, label: review!="")

for review, label in reviews_ds_2.as_numpy_iterator():
    print("Review: ", review[:50])
    print("Label: ", label)

Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'


### Using map, filter and shuffle

In [8]:
final_ds = reviews_ds.map(extract_review_and_label).filter(lambda review, label: review!="").shuffle(3)

for review, label in final_ds.as_numpy_iterator():
    print("Review: ", review[:50])
    print("Label: ", label)

Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'
