Load Data

In [1]:
import numpy as np
import pandas as pd

fname = 'facebook_comments.csv'
df_train = pd.read_csv(fname, header=None, names=['text','sentiment'], encoding='iso-8859-1' , lineterminator='\n' )

#df_train.head()

sent = {'positive' : 2, 'neutral' : 1, 'negative':0}
df_train['labels'] = df_train['sentiment'].str.strip().map(sent)

#df_train.head()

training_texts = df_train.text.values
labels = df_train.labels.values
#print(labels.shape)

#print(training_texts)
df_train.head()

Unnamed: 0,text,sentiment,labels
0,Heres a single to add to Kindle. Just read t...,neutral,1
1,If you tire of Non-Fiction.. Check out http://...,neutral,1
2,Ghost of Round Island is supposedly nonfiction.,neutral,1
3,Why is Barnes and Nobles version of the Kindle...,negative,0
4,@Maria: Do you mean the Nook? Be careful bo...,positive,2


Preprocess Data

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer



vectorizer=TfidfVectorizer(stop_words='english', max_features=500)
instances=vectorizer.fit_transform(training_texts)
X=instances
Y=np.array(labels)

print(X.shape,',',Y.shape)


(1999, 500) , (1999,)


Traditional machine Learning Models: Random Forest

In [3]:
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier


kfold=KFold(n_splits=10,shuffle=True,random_state=2020)
rf_model=RandomForestClassifier(random_state=2020,max_depth=2,criterion='entropy')
rf_cvscores= []


for train,test in kfold.split(X):
  rf_model.fit(X[train],Y[train])
  rf_acc=rf_model.score(X[test],Y[test])
  rf_cvscores.append(rf_acc)


print("Random Forest - mean: %.4f%%  (std:+/- %.4f%%)" % (np.mean(rf_cvscores)*100, np.std(rf_cvscores)*100))


Random Forest - mean: 64.1332%  (std:+/- 2.0919%)


Fully Connected FeedForward Neural Network

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

import torch.optim as optim



Build the train loader and validation loader

In [5]:
epochs=5
learn=1e-4
indim=X.shape[1]
outdim=3
drate=0.7
batch_size=20

In [6]:
X_tensor=torch.from_numpy(X.toarray())
Y_tensor=torch.from_numpy(Y)


dataset=TensorDataset(X_tensor,Y_tensor)
train_size=int(0.8*len(dataset))
val_size=len(dataset)-train_size
train_dataset, val_dataset=torch.utils.data.random_split(dataset,[train_size,val_size])
#creating training loader and validation loader
train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=batch_size,shuffle=True)


Build the Network

In [7]:
class SentimentNetwork(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate):
    
        super(SentimentNetwork,self).__init__()
        self.fc1=nn.Linear(500,100)
        self.dropout_rate=nn.Dropout(dropout_rate)
        self.fc2=nn.Linear(100,50)
        self.fc3=nn.Linear(50,3)
    
    def forward(self,x):
        x=F.relu(self.fc1(x))
        x=self.dropout_rate(x)
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
# create a model
#model = SentimentNetwork(indim,outdim,drate)
#print(model)


In [8]:
model = SentimentNetwork(indim, outdim, drate)
#print(model)
optimizer = optim.SGD(model.parameters(), lr=learn)
criterion = nn.CrossEntropyLoss() 

#model=model.float() 
print(model)

SentimentNetwork(
  (fc1): Linear(in_features=500, out_features=100, bias=True)
  (dropout_rate): Dropout(p=0.7, inplace=False)
  (fc2): Linear(in_features=100, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=3, bias=True)
)


In [9]:
# define a training process function
from torch.autograd import Variable
def train(model, train_loader, optimizer, criterion):
    acc=0
    epoch_loss, epoch_acc = 0.0,0.0 # the loss and accuracy for each epoch
    model.train()
    for idx,(batch_x,batch_y) in enumerate(train_loader):
  ####for i,data in enumerate(train_loader,0):
    ####inputs,labels=data
    ####inputs,labels=Variable(inputs),Variable(labels)

  #(batch_x,batch_y) in train_loader:
        batch_x=Variable(batch_x)
        batch_y=Variable(batch_y)

  #predictions.detach().numpu()
  #calculate the loss using predicted output and the trurh
  #acc=calculate the accuracy from this batch_x

  #update parameters: optimiser.step()
          #zero  gradient
        optimizer.zero_grad()


  #get the predicted outputs for the batch_x
        output=model(batch_x.float())

#Calculate loss using the above defined criteria using predicted output and the truth
        loss=criterion(output.float(),batch_y.long())
  
  
  #backpropogate #loss.backward()
        loss.backward()
        optimizer.step()
        
  #prediction for the model      
        prediction = output.data.max(1)[1]      
        acc = prediction.eq(batch_y.data).sum()
      #print(loss)
      #return loss
 #Calculate loss for Epoch     
        epoch_loss += loss.item()/20
#Calculate Accuracy for Epoch        
        epoch_acc += acc.item()/20     
      #print(epoch_loss)
    return epoch_loss/idx, epoch_acc/idx
  #epoch+=loss.item()
  #epoch_acc+acc 

    
def evaluate(model, val_loader, criterion):
    acc=0
    epoch_loss, epoch_acc = 0.0,0.0  
  
    model.eval()
  
    with torch.no_grad():
  
   # the loss and accuracy for each epoch
        
        for batch_x,batch_y in val_loader:
            batch_x=Variable(batch_x)
            batch_y=Variable(batch_y)
      #zero  gradient
      #get the predicted outputs for the batch_x
      #predictions.detach().numpu()
      #calculate the loss using predicted output and the trurh
      #acc=calculate the accuracy from this batch_x
    
      #update parameters: optimiser.step()
            #optimizer.zero_grad()

  #Create model based on batch_x values   
            output=model(batch_x.float())
#calculate the loss for the validation set using predicted output and the truth
            loss=criterion(output.float(),batch_y.long())
            
            prediction =output.data.max(1)[1]      
 #Calculate accuracy for the model           
            acc = prediction.eq(batch_y.data).sum()
            
          
    #   print(loss)
    #Calculate the 
            epoch_loss += loss.item()
            epoch_acc += acc.item()      
      #  print(epoch_loss)

    return epoch_loss/len(val_loader.dataset), epoch_acc/len(val_loader.dataset)
    



In [10]:

#epochs=5
for epoch in range(epochs):
    train_loss, train_acc =train(model, train_loader, optimizer,criterion)
    valid_loss, valid_acc=evaluate(model, val_loader, criterion)

    
    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')
    print(f'\t Val. Loss: {valid_loss:.4f} |  Val. Acc: {valid_acc:.4f}')

Epoch: 01
	Train Loss: 0.0528 | Train Acc: 0.6500
	 Val. Loss: 0.0522 |  Val. Acc: 0.6325
Epoch: 02
	Train Loss: 0.0527 | Train Acc: 0.6506
	 Val. Loss: 0.0521 |  Val. Acc: 0.6325
Epoch: 03
	Train Loss: 0.0527 | Train Acc: 0.6513
	 Val. Loss: 0.0520 |  Val. Acc: 0.6325
Epoch: 04
	Train Loss: 0.0526 | Train Acc: 0.6513
	 Val. Loss: 0.0520 |  Val. Acc: 0.6325
Epoch: 05
	Train Loss: 0.0526 | Train Acc: 0.6513
	 Val. Loss: 0.0519 |  Val. Acc: 0.6325
