In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'png'
import sklearn
from sklearn.manifold import TSNE
import seaborn as sns
from pylab import rcParams
from sklearn.cluster import KMeans
from sklearn import preprocessing
rcParams['figure.figsize'] = 12, 8

In [48]:
df = pd.read_csv('OnlineNewsPopularity.csv')
# df = df.reindex(np.random.permutation(df.index)) # shuffling the data
df.describe()
# df = df.sample(5000)
features = df.columns[df.dtypes == np.float64]
target = ' shares'

In [37]:
X = preprocessing.MinMaxScaler().fit_transform(df[features])
Y = np.log(df[target])

N = X.shape[0]

split = np.round(np.array([0.7, 0.85]) * N).astype(np.int)
X_t, X_cv, X_test = np.array_split(X, split)
Y_t, Y_cv, Y_test = np.array_split(Y.reshape(-1, 1), split)

In [22]:
class BatchGenerator():
    def __init__(self, X, Y):
        self.N, self.n = X.shape
        self.data = np.hstack([X, Y])
    def gen(self, batchSize):
        np.random.shuffle(self.data)
        for i in range(0, self.N, batchSize):
            yield \
                self.data[i:i+batchSize, :self.n], \
                self.data[i:i+batchSize, self.n:]

XX = np.random.randint(0, 10, (12, 5))
YY = np.random.randint(10, 100, (12, 2))

print (XX, YY)

G = BatchGenerator(XX, YY)

for x, y in G.gen(1):
    print(x, y)
print()
for x, y in G.gen(5):
    print(x, y)

[[8 5 3 7 9]
 [4 6 4 1 4]
 [8 1 6 5 9]
 [4 6 9 2 5]
 [8 5 3 3 2]
 [9 2 1 3 7]
 [8 4 4 3 4]
 [8 5 8 1 1]
 [5 1 2 6 9]
 [6 3 8 1 5]
 [0 6 8 4 0]
 [6 5 9 7 5]] [[49 88]
 [59 59]
 [50 65]
 [47 29]
 [78 29]
 [71 39]
 [12 50]
 [90 59]
 [55 88]
 [30 64]
 [76 52]
 [32 89]]
[[9 2 1 3 7]] [[71 39]]
[[4 6 9 2 5]] [[47 29]]
[[4 6 4 1 4]] [[59 59]]
[[0 6 8 4 0]] [[76 52]]
[[6 3 8 1 5]] [[30 64]]
[[8 1 6 5 9]] [[50 65]]
[[8 5 3 3 2]] [[78 29]]
[[8 4 4 3 4]] [[12 50]]
[[8 5 3 7 9]] [[49 88]]
[[6 5 9 7 5]] [[32 89]]
[[8 5 8 1 1]] [[90 59]]
[[5 1 2 6 9]] [[55 88]]

[[0 6 8 4 0]
 [8 5 3 7 9]
 [8 1 6 5 9]
 [6 3 8 1 5]
 [8 4 4 3 4]] [[76 52]
 [49 88]
 [50 65]
 [30 64]
 [12 50]]
[[8 5 3 3 2]
 [4 6 4 1 4]
 [6 5 9 7 5]
 [8 5 8 1 1]
 [4 6 9 2 5]] [[78 29]
 [59 59]
 [32 89]
 [90 59]
 [47 29]]
[[5 1 2 6 9]
 [9 2 1 3 7]] [[55 88]
 [71 39]]


In [46]:
import tensorflow as tf

x = tf.placeholder(tf.float32, [None, 59], name="x")
y = tf.placeholder(tf.float32, [None, 1], name="y")

W_1 = tf.Variable(tf.random_normal(shape=[59,100], stddev=59**-0.5), name="weights")
b_1 = tf.Variable(tf.random_normal(shape=[1 ,100]), name="bias")
a_1 = tf.nn.tanh(tf.matmul(x, W_1) + b_1)

W_2 = tf.Variable(tf.random_normal(shape=[100,50], stddev=0.1), name="weights")
b_2 = tf.Variable(tf.random_normal(shape=[1,50]), name="bias")
a_2 = tf.nn.relu6(tf.matmul(a_1, W_2) + b_2)

W_3 = tf.Variable(tf.random_normal(shape=[50,30], stddev=0.1), name="weights")
b_3 = tf.Variable(tf.random_normal(shape=[1,1]), name="bias")
a_3 = tf.nn.relu(tf.matmul(a_2, W_3) + b_3)

cost = tf.reduce_mean(tf.square(y - a_3))
train_step = tf.train.MomentumOptimizer(1e-1, 0.0).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    T = BatchGenerator(X_t, Y_t)
    
    for epoh in range(1000):
        for xs, ys in T.gen(128):
            sess.run(train_step, feed_dict={x: xs, y: ys})
        if epoh % 10 == 0:
            training_cost = sess.run(cost, feed_dict={
                    x:X_t,
                    y:Y_t
                })
            test_cost = sess.run(cost, feed_dict={
                        x: X_cv,
                        y: Y_cv
                    })
            print ("[%3d] train: %15.10f, test: %15.10f" % \
                   (epoh, training_cost**0.5, test_cost**0.5))

[  0] train:    4.8347941635, test:    4.7975316297
[ 10] train:    3.1955308163, test:    3.1674724524
[ 20] train:    2.1438993485, test:    2.1074122587
[ 30] train:    1.6378769157, test:    1.6154305317
[ 40] train:    0.9004661545, test:    0.8458583064
[ 50] train:    0.8826373441, test:    0.8441244284
[ 60] train:    0.9047360106, test:    0.8946935848
[ 70] train:    0.8785453225, test:    0.8543541671
[ 80] train:    0.8836921428, test:    0.8405537642
[ 90] train:    0.8746578365, test:    0.8479274896
[100] train:    0.8903216799, test:    0.8425722026
[110] train:    0.8793159378, test:    0.8743849364
[120] train:    0.8712526185, test:    0.8613106516
[130] train:    0.8852426283, test:    0.8404895872
[140] train:    0.9109614989, test:    0.9336020178
[150] train:    0.8721469961, test:    0.8725087259
[160] train:    0.8602028414, test:    0.8446167274
[170] train:    0.8606152688, test:    0.8414722357
[180] train:    0.8635704971, test:    0.8676987593
[190] train:

KeyboardInterrupt: 