In [0]:
from __future__ import print_function

In [0]:
import pandas as pd
import numpy as np
import tensorflow as tf
from collections import Counter

In [0]:
from sklearn.datasets import fetch_20newsgroups

The  [20 Newsgroups data](http://qwone.com/~jason/20Newsgroups/) set is a collection of approximately **20,000 newsgroup documents**, partitioned (nearly) evenly across **20 different newsgroups**.


---


Here for simplicity we are working with just** 3 news group** - *comp.graphics,sci.space,* and *rec.sport.baseball.*

In [4]:
categories = ["comp.graphics","sci.space","rec.sport.baseball"]
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)

No handlers could be found for logger "sklearn.datasets.twenty_newsgroups"


In [5]:
type(newsgroups_train.data)

list

In [6]:
print('total texts in train:',len(newsgroups_train.data))
print('total texts in test:',len(newsgroups_test.data))

total texts in train: 1774
total texts in test: 1180


In [7]:
print('###Example Train Data:\n\n ',newsgroups_train.data[0])
print('###Example Train Target:\n ',newsgroups_train.target[0])

###Example Train Data:

  From: jk87377@lehtori.cc.tut.fi (Kouhia Juhana)
Subject: Re: More gray levels out of the screen
Organization: Tampere University of Technology
Lines: 21
Distribution: inet
NNTP-Posting-Host: cc.tut.fi

In article <1993Apr6.011605.909@cis.uab.edu> sloan@cis.uab.edu
(Kenneth Sloan) writes:
>
>Why didn't you create 8 grey-level images, and display them for
>1,2,4,8,16,32,64,128... time slices?

By '8 grey level images' you mean 8 items of 1bit images?
It does work(!), but it doesn't work if you have more than 1bit
in your screen and if the screen intensity is non-linear.

With 2 bit per pixel; there could be 1*c_1 + 4*c_2 timing,
this gives 16 levels, but they are linear if screen intensity is
linear.
With 1*c_1 + 2*c_2 it works, but we have to find the best
compinations -- there's 10 levels, but 16 choises; best 10 must be
chosen. Different compinations for the same level, varies a bit, but
the levels keeps their order.

Readers should verify what I wrote... :-)

In [8]:
set(newsgroups_train.target)

{0, 1, 2}

In [0]:
vocab = Counter()

for text in newsgroups_train.data:
    for word in text.lower().split():
        vocab[word]+=1
        
for text in newsgroups_test.data:
    for word in text.lower().split():
        vocab[word]+=1

In [10]:
len(vocab)

79162

In [11]:
vocab['the']

31639

In [0]:
#Assign unique id for each word

word2int = {}
int2word = {}

for i,word in enumerate(vocab):
        word2int[word] = i
    
int2word = dict(zip(word2int.values(), word2int.keys()))

In [13]:
word2int['the']

25512

In [14]:
int2word[25512]

u'the'

In [0]:
total_words = len(vocab)

In [0]:
def get_batch(train_data, i, batch_size):
    """
    Generate batch data from the training data to be fed during training.
    
    Arguments:
    df -- training data containing inputs and targets
    i -- batch number
    batch_size -- size of each batch
    
    Returns:
    batch_x -- A Matrix where each row denotes a training document
    batch_y -- corresponding output classes one-hot encoded
    
    """
  
    batch_x = []
    batch_y = []
    
    documents = train_data.data[i*batch_size:i*batch_size+batch_size]
    categories = train_data.target[i*batch_size:i*batch_size+batch_size]
    
    for document in documents:
        document_row = np.zeros(total_words,dtype=float)
        for word in document.lower().split():
            document_row[word2int[word]] += 1
            
        batch_x.append(document_row)   #A Matrix- Row:each document in the batch, Col:length=vocab size, 1 in those which words are present
    
    for category in categories:
        y = np.zeros((3),dtype=float)
        if category == 0:
            y[0] = 1.
        elif category == 1:
            y[1] = 1.
        else:
            y[2] = 1.
        batch_y.append(y)      #one hot encoded target class for each document in the batch
            
     
    return np.array(batch_x),np.array(batch_y)

In [0]:
def initialize_parameters(layer_dims):
    """
    Initializes the parameters (weights and biases) of the multi layer perceptron. 
    
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
                  [input_layer, hidden_layer1,  hidden_layer2, ... ..., output_layer]
    
    Returns:
    parameters -- python dictionary containing network parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l-1], layer_dims[l])
                    bl -- bias vector of shape (layer_dims[l])
    """
    
    parameters = {}
    L = len(layer_dims)            # number of layers in the network
    
    for l in range(1, L):
        dim1 = layer_dims[l-1]
        dim2 = layer_dims[l]
        parameters['W' + str(l)] = tf.Variable(tf.random_normal([dim1, dim2]))   #randomly initialized weight
        parameters['b' + str(l)] = tf.Variable(tf.zeros([dim2]))   #bias initialized with Zeros
       
                
    return parameters

In [0]:
def multi_layer_perceptron(X, parameters):
    """
    Implement forward propagation for the multi layer perceptron.
    
    Arguments:
    X -- input layer data [batch data]
    parameters -- parameters of the neural network i.e. W1, b1, W2, b2, ... , WL, bL
    
    Returns:
    AL --  output layer neurons value without using non linear activation function in output layer.
    
    """

    A = X
    L = len(parameters)//2  # number of layers in the neural network
    
    # For the first (L-1) layer implement:
    #              Z = A_prev*Wl + bl 
    #              A = relu(Z)
    
    for l in range(1, L ):
        A_prev = A 
        Z = tf.add(tf.matmul(A_prev, parameters['W' + str(l)]), parameters['b' + str(l)])
        A = tf.nn.relu(Z)
           
    
    # For the output layer implement: AL = A*WL + bL
    
    AL = tf.add(tf.matmul(A, parameters['W' + str(L)]), parameters['b' + str(L)])
    
           
    return AL

In [0]:
n_input = total_words  #input size = Vocabulary size
n_classes = 3          #output/target size 

In [0]:
# Hyper Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 150
display_step = 1

In [0]:
input_tensor = tf.placeholder(tf.float32,[None, n_input],name="input")
output_tensor = tf.placeholder(tf.float32,[None, n_classes],name="output") 

In [0]:
# Network Parameters

parameters = initialize_parameters([n_input, 100, 100, 3]) #3 layer Neural Network

In [0]:
prediction = multi_layer_perceptron(input_tensor, parameters)

In [0]:
# loss and optimizer

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [0]:
# Initializing the variables
init = tf.global_variables_initializer()

In [31]:
# Launch the graph

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        
        total_batch = int(len(newsgroups_train.data)/batch_size)
        
        # Loop over all batches
        for i in range(total_batch):
            batch_x,batch_y = get_batch(newsgroups_train,i,batch_size)
            
            # Run optimization op (backprop) and cost op (to get loss value)
            c,_ = sess.run([loss,optimizer], feed_dict={input_tensor: batch_x,output_tensor:batch_y})
            
            # Compute average loss
            avg_cost += c / total_batch
        
        # Display logs per epoch step
        print("Epoch:", '%04d' % (epoch+1), "loss=", "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

Epoch: 0001 loss= 514.761679909
Epoch: 0002 loss= 74.564135812
Epoch: 0003 loss= 16.739976254
Epoch: 0004 loss= 2.315056624
Epoch: 0005 loss= 0.429822872
Epoch: 0006 loss= 0.225238739
Epoch: 0007 loss= 0.090959659
Epoch: 0008 loss= 0.000000354
Epoch: 0009 loss= 0.000000014
Epoch: 0010 loss= 0.000000018
Optimization Finished!
