### Get Sentence feature from a pretrained model

In [1]:
import os,re
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
print ("Packages loaded.")

Packages loaded.


### Get dataset

In [2]:
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
    data = {}
    data["sentence"] = []
    data["sentiment"] = []
    for file_path in os.listdir(directory):
        with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
            data["sentence"].append(f.read())
            data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
    return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
    pos_df = load_directory_data(os.path.join(directory, "pos"))
    neg_df = load_directory_data(os.path.join(directory, "neg"))
    pos_df["polarity"] = 1
    neg_df["polarity"] = 0
    return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
    dataset = tf.keras.utils.get_file(
        fname="aclImdb.tar.gz", 
        origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
        extract=True)

    train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
    test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
    return train_df, test_df

# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)
train_df, test_df = download_and_load_datasets()
train_df.head()

Unnamed: 0,sentence,sentiment,polarity
0,"If you loved Deep Cover, you might like this f...",8,1
1,I thought watching employment videos on corpor...,2,0
2,"Starts really well, nice intro and build up fo...",3,0
3,"This film, in my opinion, is, despite it's fla...",10,1
4,This is strictly a review of the pilot episode...,2,0


### Check dataset

In [3]:
for i in range(2):
    print (i,':',train_df['sentence'][i])
for i in range(2):
    print (i,':',train_df['polarity'][i])

0 : If you loved Deep Cover, you might like this film as well. Many of the poetic interludes Fishburne recites in Deep Cover are from the lyrical script of "Once In the Life," a screen adaptation of a play that Fishburne wrote. If you love Larry as much as I do, you'll love this film that is all Larry, all hot, and all fleshed out. Of course there is gun play and illicit substance use, this is a gangster movie of sorts, after all, but the script is beautiful and the story is touching, even a little on the chick flick side.<br /><br />AMAZING film...dark, frightening, sexy, and exciting. If you ever sneaked out at night or hung out in a clubhouse, you'll get the proper impact of the cramped sets (metephorically echoing being trapped in the life). Full of clever foreshadowing and complex relationships, this film is tight..every sentiment mirrored in the set dressing and camera shots. GOOD WORK!
1 : I thought watching employment videos on corporate compliance was tedious. This movie went 

### Get feature

In [6]:
embed_module = hub.Module("https://tfhub.dev/google/nnlm-en-dim128/1")
embed_train = embed_module(tf.reshape(train_df["sentence"], shape=[-1]))
embed_test = embed_module(tf.reshape(test_df["sentence"], shape=[-1]))
with tf.train.MonitoredTrainingSession(is_chief=True) as sess:
    x_train = sess.run(embed_train)
    x_test = sess.run(embed_test)
n_train,n_test = np.shape(x_train)[0],np.shape(x_test)[0]
y_train,y_test = np.zeros((n_train,2)),np.zeros((n_test,2))
for i in range(n_train):
    y_train[train_df['polarity'][0]] = 1
for i in range(n_test):
    y_test[test_df['polarity'][0]] = 1    
print("Shapes of 'x_train' and 'x_test' are %s and %s."%
      (x_train.shape,x_test.shape)) # (result: (1, 128))    
print("Shapes of 'y_train' and 'y_test' are %s and %s."%
      (y_train.shape,y_test.shape)) # (result: (1, 128))    

KeyboardInterrupt: 