In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings

warnings.filterwarnings('ignore')

  from ._conv import register_converters as _register_converters


In [2]:
BATCH_SIZE = 64
LSTM_UNITS = 64
ITERATIONS = 20000
MAX_SEQ_LENGTH = 100
WORD_DIM = 100
TRAINING = False

# Load the dataset

In [3]:
import csv

### Load pretrained word2vec model
pretrained = pd.read_table('data/glove.6B.100d.txt', header=None, delimiter=' ', quoting=csv.QUOTE_NONE)
word_list = pretrained.loc[:,0].tolist()
word_vectors = pretrained.loc[:,1:].values

print('Number of words in the pretrained model: ', len(word_list))
print('The first word: ', word_list[0])
print('Corresponding vector: ', word_vectors[0])

Number of words in the pretrained model:  400000
The first word:  the
Corresponding vector:  [-0.038194 -0.24487   0.72812  -0.39961   0.083172  0.043953 -0.39141
  0.3344   -0.57545   0.087459  0.28787  -0.06731   0.30906  -0.26384
 -0.13231  -0.20757   0.33395  -0.33848  -0.31743  -0.48336   0.1464
 -0.37304   0.34577   0.052041  0.44946  -0.46971   0.02628  -0.54155
 -0.15518  -0.14107  -0.039722  0.28277   0.14393   0.23464  -0.31021
  0.086173  0.20397   0.52624   0.17164  -0.082378 -0.71787  -0.41531
  0.20335  -0.12763   0.41367   0.55187   0.57908  -0.33477  -0.36559
 -0.54857  -0.062892  0.26584   0.30205   0.99775  -0.80481  -3.0243
  0.01254  -0.36942   2.2167    0.72201  -0.24978   0.92136   0.034514
  0.46745   1.1079   -0.19358  -0.074575  0.23353  -0.052062 -0.22044
  0.057162 -0.15806  -0.30798  -0.41625   0.37972   0.15006  -0.53212
 -0.2055   -1.2526    0.071624  0.70565   0.49744  -0.42063   0.26148
 -1.538    -0.30223  -0.073438 -0.28312   0.37104  -0.25217   0.0162

In [4]:
### Load the youtube dataset
df = pd.read_csv('data/USvideos.csv')

df=df[df.dislikes > 0]
df=df[df.likes > 0]
df["description"] = df["description"].fillna('')

df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,2kyS6SvSYSE,17.14.11,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpg,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...
1,1ZAPwfrtAFY,17.14.11,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpg,False,False,False,"One year after the presidential election, John..."
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...
3,puqaWrEC7tY,17.14.11,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,https://i.ytimg.com/vi/puqaWrEC7tY/default.jpg,False,False,False,Today we find out if Link is a Nickelback amat...
4,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...


In [5]:
print('Total number of samples :', len(df))

Total number of samples : 24583


# Preprocessing

## Define popularity metrics

In [6]:
metrics = ["likes_per_views", "likes_to_dislikes", "log_likes_per_views", "log_likes_to_dislikes", "diff_per_views"]

df["likes_per_views"]= df["likes"]/df["views"]
df["likes_to_dislikes"]=df["likes"]/df["dislikes"]
df["log_likes_per_views"]=np.log(df["likes"]/df["views"])
df["log_likes_to_dislikes"]=np.log(df["likes"]/df["dislikes"])
df["diff_per_views"]=(df["likes"]-df["dislikes"])/df["views"]

## Clean the text features

In [7]:
import re

def clean_str(string):
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`\x00]", " ", string)     
    string = re.sub(r"\'s", " 's", string) 
    string = re.sub(r"\'ve", " 've", string) 
    string = re.sub(r"\'t", " 't", string) 
    string = re.sub(r"\'re", " 're", string) 
    string = re.sub(r"\'d", " 'd", string) 
    string = re.sub(r"\'ll", " 'll", string) 
    string = re.sub(r",", " , ", string) 
    string = re.sub(r"!", " ! ", string) 
    string = re.sub(r"\(", " ( ", string) 
    string = re.sub(r"\)", " ) ", string) 
    string = re.sub(r"\?", " ? ", string) 
    string = re.sub(r"\s{2,}", " ", string) 
    return string.lower()

df_c=df[["title","channel_title","description"]]
df_c["title"]=df_c["title"].apply(clean_str)
df_c["channel_title"]=df_c["channel_title"].apply(clean_str)
df_c["description"]=df_c["description"].apply(clean_str)

df = df.join(df_c, rsuffix='_clean')


## Load the id matrix of descriptions for word2vec model

In [8]:
import os.path
from random import randint

if os.path.isfile('data/descriptions_id_matrix.npy'):
    # directly load the id matrix
    descriptions = np.load('data/descriptions_id_matrix.npy')
else:
    # map the id of each word in each description, then save to a file
    descriptions = []
    
    for i, string in enumerate(df['description_clean']):
        if i % 1000 == 0:
            print(i)
        vec = np.zeros(MAX_SEQ_LENGTH, dtype='int32')
        for word_idx, word in enumerate(string.split()):
            if word_idx == MAX_SEQ_LENGTH:
                break
            try:
                vec[word_idx] = word_list.index(word)
            except ValueError:
                # handle unseen word by randomly pick a word to represent it
                vec[word_idx] = randint(0,len(word_list)-1)           
        descriptions.append(vec)

    descriptions = np.array(descriptions)
    np.save('data/descriptions_id_matrix', descriptions)

print(descriptions.shape)
print(descriptions)

(24583, 100)
[[182902   1629 123042 ...  10108 194282 180918]
 [    48     62     49 ...     12     68   3496]
 [  1716    192    771 ...   1864 336264   7112]
 ...
 [     0   3761    213 ...      1    144    929]
 [    77   6526     13 ...  33161 251142  14035]
 [ 77523      0     50 ...      0      0      0]]


## Transform text features by the bag-of-words model 

In [9]:
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer

features = ["description_clean", "channel_title_clean", "title_clean"]

vect = CountVectorizer()
tfidf = TfidfTransformer()
transformed_features = []
for feature in features:
    m = vect.fit_transform(df[feature])
    m = tfidf.fit_transform(m)
    transformed_features.append(m)

transformed_features

[<24583x51496 sparse matrix of type '<class 'numpy.float64'>'
 	with 2225917 stored elements in Compressed Sparse Row format>,
 <24583x2571 sparse matrix of type '<class 'numpy.float64'>'
 	with 47382 stored elements in Compressed Sparse Row format>,
 <24583x8802 sparse matrix of type '<class 'numpy.float64'>'
 	with 186159 stored elements in Compressed Sparse Row format>]

# Linear Regression

## Predicting different popularity metrics by description / channel_title / title

In [43]:
from sklearn.model_selection import cross_val_score
from sklearn import linear_model

## Compute all scores
print("                   description / channel title / title")
for metric in metrics:
    scores = []
    for i in range(len(features)):
        cv_score = cross_val_score(linear_model.LinearRegression(), transformed_features[i], df[metric], cv=5) 
        scores.append(sum(cv_score) / len(cv_score))
    print("{} : {:.4f}   {:.4f}   {:.4f}".format(metric, *scores))

    

                   description / channel title / title
likes_per_views : -3.0134   0.4704   -1.3667
likes_to_dislikes : -0.9927   0.1059   -2.0169
log_likes_per_views : -0.4002   0.4581   -0.8905
log_likes_to_dislikes : -0.5473   0.2828   -1.3608
diff_per_views : -3.0442   0.4614   -1.3247


## Predicting log_likes_per_views by description and channel_title

In [44]:
from scipy import sparse

scores = cross_val_score(linear_model.LinearRegression(),
                         sparse.hstack((transformed_features[0], transformed_features[1])),
                         df["log_likes_per_views"], cv=5)
scores

array([-44.88209404,  -0.45814802,   0.10112746,  -0.05568677,
       -35.89735534])

# SGDRegressor

## Predicting different popularity metrics by description / channel_title / title

In [51]:
print("                   description / channel title / title")
for metric in metrics:
    scores = []
    for i in range(len(features)):
        cv_score = cross_val_score(linear_model.SGDRegressor(loss='squared_loss', penalty='l2',
                                           alpha=1e-3,learning_rate='optimal',
                                           max_iter=1000, tol=None),
                                   transformed_features[i], df[metric], cv=5) 
        scores.append(sum(cv_score) / len(cv_score))
    print("{} : {:.4f}   {:.4f}   {:.4f}".format(metric, *scores))

                   description / channel title / title
likes_per_views : 0.4201   0.3773   0.2296
likes_to_dislikes : 0.1749   0.1781   0.1233
log_likes_per_views : 0.4300   0.3668   0.2414
log_likes_to_dislikes : 0.3074   0.3097   0.1934
diff_per_views : 0.4185   0.3780   0.2292


## Predicting log_likes_per_views by description and channel_title

In [53]:
scores = cross_val_score(linear_model.SGDRegressor(loss='squared_loss', penalty='l2',
                                           alpha=1e-3,learning_rate='optimal',
                                           max_iter=1000, tol=None),
                         sparse.hstack((transformed_features[0], transformed_features[1])),
                         df["log_likes_per_views"], cv=5)
scores

array([0.50922941, 0.52199136, 0.51840033, 0.41284235, 0.57921615])

## Predicting diff_per_views  by description and channel_title

In [55]:
scores = cross_val_score(linear_model.SGDRegressor(loss='squared_loss', penalty='l2',
                                           alpha=1e-3,learning_rate='optimal',
                                           max_iter=1000, tol=None),
                         sparse.hstack((transformed_features[0], transformed_features[1])),
                         df["diff_per_views"], cv=5)
scores

array([0.49068005, 0.47777817, 0.51269925, 0.50429451, 0.50616807])

## Predicting log_likes_per_views by all three features

In [80]:
scores = cross_val_score(linear_model.SGDRegressor(loss='squared_loss', penalty='l2',
                                           alpha=1e-3,learning_rate='optimal',
                                           max_iter=1000, tol=None),
                         sparse.hstack((transformed_features[0],transformed_features[1],transformed_features[2])),
                         df["diff_per_views"], cv=5)
scores

array([0.49201516, 0.49087779, 0.53277965, 0.53168177, 0.51318397])

# Random Forest Regressor

## Predicting different popularity metrics by description / channel_title / title

In [67]:
from sklearn import ensemble

print("                   description / channel title / title")
for metric in metrics:
    scores = []
    for i in range(len(features)):
        cv_score = cross_val_score(ensemble.RandomForestRegressor(n_estimators=20, max_depth=10, max_features='sqrt'),
                                   transformed_features[i], df[metric], cv=5) 
        scores.append(sum(cv_score) / len(cv_score))
    print("{} : {:.4f}   {:.4f}   {:.4f}".format(metric, *scores))

                   description / channel title / title
likes_per_views : 0.1683   0.0721   0.0434
likes_to_dislikes : 0.0363   0.0316   0.0300
log_likes_per_views : 0.1751   0.0592   0.0375
log_likes_to_dislikes : 0.1158   0.0697   0.0333
diff_per_views : 0.1650   0.0753   0.0448


# AdaBoost Regressor

## Predicting different popularity metrics by description / channel_title / title

In [76]:
print("                   description / channel title / title")
for metric in metrics:
    scores = []
    for i in range(len(features)):
        cv_score = cross_val_score(ensemble.AdaBoostRegressor(), transformed_features[i], df[metric], cv=5)
        scores.append(sum(cv_score) / len(cv_score))
    print("{} : {:.4f}   {:.4f}   {:.4f}".format(metric, *scores))

                   description / channel title / title
likes_per_views : -1.2004   -0.4160   -0.8005
likes_to_dislikes : -1.6898   -0.7399   -1.4303
log_likes_per_views : 0.0509   -0.1751   -0.3033
log_likes_to_dislikes : 0.0714   -0.0764   -0.1064
diff_per_views : -0.5223   -0.3595   -0.6445


# LSTM

## Predicting different popularity metrics by description

In [12]:
from sklearn.model_selection import train_test_split
import datetime

### Generate batch, randomly pick some rows in X, y   
def get_batch(X, y):
    n = 1 if len(y.shape) == 1 else y.shape[1]
    labels = np.zeros([BATCH_SIZE, n])
    arr = np.zeros([BATCH_SIZE, MAX_SEQ_LENGTH])
    for i in range(BATCH_SIZE):
        num = randint(0, len(y)-1) 
        arr[i], labels[i] = X[num], y.iloc[num]
    return arr, labels

In [19]:
X_train, X_test, ytrain, ytest = train_test_split(descriptions, df[metrics], test_size=0.2, random_state=42)

MAX_SEQ_LENGTH = 100
NUM_CLASSES = 1



for metric in metrics:
    print ("-------------- %s -----------------" % metric)
    y_train = ytrain[metric]
    y_test = ytest[metric]
    
    
    ############# Define the model ################
    tf.reset_default_graph()

    # define placeholder
    y = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES])
    input_X = tf.placeholder(tf.int32, [BATCH_SIZE, MAX_SEQ_LENGTH])

    # look up word vectors for the ids in each batch of data
    data = tf.Variable(tf.zeros([BATCH_SIZE, MAX_SEQ_LENGTH, WORD_DIM]),dtype=tf.float32)
    data = tf.nn.embedding_lookup(word_vectors,input_X)
    data = tf.cast(data,tf.float32)

    # lstm cell
    lstmCell = tf.contrib.rnn.BasicLSTMCell(LSTM_UNITS)
    lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
    output, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

    weight = tf.Variable(tf.random_uniform([LSTM_UNITS, NUM_CLASSES]))
    bias = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]))
    output = tf.transpose(output, [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    y_pred = (tf.matmul(last, weight) + bias)

    # sum of squared error loss
    loss = tf.reduce_mean(tf.square(y_pred-y))
    optimizer = tf.train.AdamOptimizer().minimize(loss)
    
    # Coefficient of determination scores
    residual = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
    total = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
    r2 = tf.subtract(1.0, tf.div(residual, total))


    ############ TRAINING ############

    if os.path.isdir("models/model-%s" % metric):
        sess = tf.InteractiveSession()
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint("models/model-%s" % metric))
    else:
        sess = tf.InteractiveSession()
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())

    if TRAINING:
        tf.summary.scalar('Loss', loss)
        tf.summary.scalar('Score', r2)
        merged = tf.summary.merge_all()
        logdir = "tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
        writer = tf.summary.FileWriter(logdir, sess.graph)

        for i in range(1,ITERATIONS+1):
            nextBatch, nextBatchY = get_batch(X_train, y_train)
            sess.run(optimizer, {input_X: nextBatch, y: nextBatchY})

            # Write summary to Tensorboard
            if i % 50 == 0:
                summary = sess.run(merged, {input_X: nextBatch, y : nextBatchY})
                writer.add_summary(summary, i)

            #Save the model every 10,000 training iterations
            if i % 10000 == 0:
                save_path = saver.save(sess, "models/model-%s/pretrained_lstm.ckpt" % metric, global_step=i)
                print("saved to %s" % save_path)
        writer.close()


    ############ TESTING ############    
    scores=[]
    for i in range(3000):
        nextBatchX, nextBatchY = get_batch(X_test, y_test);
        scores.append((sess.run(r2, {input_X: nextBatchX, y: nextBatchY})))
        #print("Score for batch %d : %d" % (i, score[i]))
    print("Average r^2 score for testing = %.4f \n" % (sum(scores)/len(scores)))


-------------- likes_per_views -----------------
INFO:tensorflow:Restoring parameters from models/model-likes_per_views\pretrained_lstm.ckpt-20000
Average r^2 score for testing = 0.7540 

-------------- likes_to_dislikes -----------------
INFO:tensorflow:Restoring parameters from models/model-likes_to_dislikes\pretrained_lstm.ckpt-20000
Average r^2 score for testing = 0.8381 

-------------- log_likes_per_views -----------------
INFO:tensorflow:Restoring parameters from models/model-log_likes_per_views\pretrained_lstm.ckpt-20000
Average r^2 score for testing = 0.8870 

-------------- log_likes_to_dislikes -----------------
INFO:tensorflow:Restoring parameters from models/model-log_likes_to_dislikes\pretrained_lstm.ckpt-20000
Average r^2 score for testing = 0.8923 

-------------- diff_per_views -----------------
INFO:tensorflow:Restoring parameters from models/model-diff_per_views\pretrained_lstm.ckpt-20000
Average r^2 score for testing = 0.7588 



## Predicting log_likes_per_views by channel title

In [10]:
MAX_SEQ_LENGTH = 3
NUM_CLASSES = 1

# Build the id matrix for channel titles
titles = []
for i, string in enumerate(df["channel_title_clean"]):
    vec = np.zeros(MAX_SEQ_LENGTH, dtype='int32')
    for word_idx, word in enumerate(string.split()):
        if word_idx == MAX_SEQ_LENGTH:
            break
        try:
            vec[word_idx] = word_list.index(word)
        except ValueError:
            # handle unseen word by randomly pick a word to represent it
            vec[word_idx] = randint(0,len(word_list)-1)         
    titles.append(vec)

# split train and test set
titles = np.array(titles)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(titles, df["log_likes_per_views"], test_size=0.2, random_state=42)


############# Define the model ################
tf.reset_default_graph()

# define placeholder
y = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES])
input_X = tf.placeholder(tf.int32, [BATCH_SIZE, MAX_SEQ_LENGTH])

# look up word vectors for the ids in each batch of data
data = tf.Variable(tf.zeros([BATCH_SIZE, MAX_SEQ_LENGTH, WORD_DIM]),dtype=tf.float32)
data = tf.nn.embedding_lookup(word_vectors,input_X)
data = tf.cast(data,tf.float32)

# lstm cell
lstmCell = tf.contrib.rnn.BasicLSTMCell(LSTM_UNITS)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
output, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

weight = tf.Variable(tf.random_uniform([LSTM_UNITS, NUM_CLASSES]))
bias = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]))
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
y_pred = (tf.matmul(last, weight) + bias)

# sum of squared error loss
loss = tf.reduce_mean(tf.square(y_pred-y))
optimizer = tf.train.AdamOptimizer().minimize(loss)

# Coefficient of determination scores
residual = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
total = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
r2 = tf.subtract(1.0, tf.div(residual, total))


############ TRAINING ############

if os.path.isdir("models/model-channel_title"):
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint("models/model-channel_title"))
else:
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

if TRAINING:
    tf.summary.scalar('Loss', loss)
    tf.summary.scalar('Score', r2)
    merged = tf.summary.merge_all()
    logdir = "tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
    writer = tf.summary.FileWriter(logdir, sess.graph)

    for i in range(1,100001):
        nextBatch, nextBatchY = get_batch(X_train, y_train)
        sess.run(optimizer, {input_X: nextBatch, y: nextBatchY})

        # Write summary to Tensorboard
        if i % 50 == 0:
            summary = sess.run(merged, {input_X: nextBatch, y : nextBatchY})
            writer.add_summary(summary, i)

        #Save the model every 10,000 training iterations
        if i % 10000 == 0:
            save_path = saver.save(sess, "models/model-channel_title/pretrained_lstm.ckpt", global_step=i)
            print("saved to %s" % save_path)
    writer.close()


############ TESTING ############    
scores=[]
for i in range(3000):
    nextBatchX, nextBatchY = get_batch(X_test, y_test);
    scores.append((sess.run(r2, {input_X: nextBatchX, y: nextBatchY})))
    #print("Score for batch %d : %d" % (i, score[i]))
print("Average score = %.4f \n" % (sum(scores)/len(scores)))

INFO:tensorflow:Restoring parameters from models/model-channel_title\pretrained_lstm.ckpt-100000
Average score = 0.4687 



## Predicting log_likes_to_dislikes by channel title

In [15]:
X_train, X_test, y_train, y_test = train_test_split(titles, df["log_likes_to_dislikes"], test_size=0.2, random_state=42)


############# Define the model ################
tf.reset_default_graph()

# define placeholder
y = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES])
input_X = tf.placeholder(tf.int32, [BATCH_SIZE, MAX_SEQ_LENGTH])

# look up word vectors for the ids in each batch of data
data = tf.Variable(tf.zeros([BATCH_SIZE, MAX_SEQ_LENGTH, WORD_DIM]),dtype=tf.float32)
data = tf.nn.embedding_lookup(word_vectors,input_X)
data = tf.cast(data,tf.float32)

# lstm cell
lstmCell = tf.contrib.rnn.BasicLSTMCell(LSTM_UNITS)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
output, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

weight = tf.Variable(tf.random_uniform([LSTM_UNITS, NUM_CLASSES]))
bias = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]))
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
y_pred = (tf.matmul(last, weight) + bias)

# sum of squared error loss
loss = tf.reduce_mean(tf.square(y_pred-y))
optimizer = tf.train.AdamOptimizer().minimize(loss)

# Coefficient of determination scores
residual = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
total = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
r2 = tf.subtract(1.0, tf.div(residual, total))


############ TRAINING ############

if os.path.isdir("models/model-channel_title1"):
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint("models/model-channel_title1"))
else:
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

if TRAINING:
    tf.summary.scalar('Loss', loss)
    tf.summary.scalar('Score', r2)
    merged = tf.summary.merge_all()
    logdir = "tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
    writer = tf.summary.FileWriter(logdir, sess.graph)

    for i in range(1,100001):
        nextBatch, nextBatchY = get_batch(X_train, y_train)
        sess.run(optimizer, {input_X: nextBatch, y: nextBatchY})

        # Write summary to Tensorboard
        if i % 50 == 0:
            summary = sess.run(merged, {input_X: nextBatch, y : nextBatchY})
            writer.add_summary(summary, i)

        #Save the model every 10,000 training iterations
        if i % 10000 == 0:
            save_path = saver.save(sess, "models/model-channel_title1/pretrained_lstm.ckpt", global_step=i)
            print("saved to %s" % save_path)
    writer.close()


############ TESTING ############    
scores=[]
for i in range(3000):
    nextBatchX, nextBatchY = get_batch(X_test, y_test);
    scores.append((sess.run(r2, {input_X: nextBatchX, y: nextBatchY})))
    #print("Score for batch %d : %d" % (i, score[i]))
print("Average score = %.4f \n" % (sum(scores)/len(scores)))

saved to models/model-channel_title1/pretrained_lstm.ckpt-10000
saved to models/model-channel_title1/pretrained_lstm.ckpt-20000
saved to models/model-channel_title1/pretrained_lstm.ckpt-30000
saved to models/model-channel_title1/pretrained_lstm.ckpt-40000
saved to models/model-channel_title1/pretrained_lstm.ckpt-50000
saved to models/model-channel_title1/pretrained_lstm.ckpt-60000
saved to models/model-channel_title1/pretrained_lstm.ckpt-70000
saved to models/model-channel_title1/pretrained_lstm.ckpt-80000
saved to models/model-channel_title1/pretrained_lstm.ckpt-90000
saved to models/model-channel_title1/pretrained_lstm.ckpt-100000
Average score = 0.3901 

