# Predicting rating from sentiment polarity

In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("https://raw.githubusercontent.com/lyqht/googleplaystore-analytics/master/data/reviews_joined.csv", index_col=0)
df.dropna(inplace=True)
df.reset_index(inplace=True)
df.drop("index", axis=1,inplace=True)
df.drop_duplicates(subset=["Preprocessed_Review","App"],inplace=True, keep="first")
df['Price'] = df['Price'].str.replace('$', '')
df['Price'] = df['Price'].astype(float)
df.columns

HTTPError: HTTP Error 404: Not Found

In [4]:
df.sample(2)

NameError: name 'df' is not defined

NORMALIZE ALL INPUTS FROM 0-1

LABEL - SENTIMENT_RATING FROM 0-1
LABEL - CATEGORY/ GENRES MULTICLASSENCODER

In [0]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

columns_to_normalize = ["Average_Rating", "Installs", "Price", "Size", "Sentiment_Rating"]
for col in columns_to_normalize:
  data_scaled = min_max_scaler.fit_transform(df[col].to_numpy().reshape(-1,1))
  df[col] = data_scaled

columns_to_encode = ["Category", "Genres"]
for col in columns_to_encode:
  df[col] = LabelEncoder().fit_transform(df[col])

df.head(2)

In [0]:
print("Number of apps that have reviews")
df["App"].nunique()

In [0]:
num_reviews_per_app = list(df.groupby(["App","Average_Rating"]).size())
min_num_reviews = int(np.percentile(num_reviews_per_app,25))
#min_num_reviews = 0 # temporirily edited for debugging 
print(f"25th percentile of the count of reviews :", min_num_reviews)

25th percentile of the count of reviews : 26


### Using 28 reviews from each valid app for prediction

In [0]:
num_reviews_per_app = 28
value_counts = df.App.value_counts()
to_keep = value_counts[value_counts >= num_reviews_per_app].index
print("Number of Apps that have at least ",num_reviews_per_app, "reviews :", len(to_keep))

df = df[df.App.isin(to_keep)]

Number of Apps that have at least  28 reviews : 380


In [0]:
unique_apps = to_keep

print("Creating an array containing arrays of reviews of different apps")
reviews_by_app = [df[df["App"] == unique_apps[i]]["Preprocessed_Review"].to_numpy() for i in range(len(to_keep))]
reviews_by_app
print("Creating an array containing arrays of reviews' sentiment polarity of different apps")
review_sentiment_by_app = [df[df["App"] == unique_apps[i]]["Sentiment_Rating"].to_numpy() for i in range(len(to_keep))]

print("Creating an array containing the actual average rating of different apps")
avr_rating_per_app = [df[df["App"] == unique_apps[i]]["Average_Rating"].to_numpy() for i in range(len(to_keep))]

Creating an array containing arrays of reviews of different apps
Creating an array containing arrays of reviews' sentiment polarity of different apps
Creating an array containing the actual average rating of different apps


Using the sentiment polarity as the `x` input with a simple neural network model consisting of 2 layers.



In [0]:
from torch import tensor
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split

class SentimentDataset(Dataset):
  global num_reviews_per_app
  def __init__(self, x, y):
    self.samples = x
    self.labels = y
  
  def __len__(self):
    return len(self.samples)
  
  def __getitem__(self,idx):
    item = self.samples[idx]
    item = np.random.choice(item, size=num_reviews_per_app) # sampling 28 reviews from each app
    return tensor(item, dtype=torch.float), tensor(self.labels[idx][0], dtype=torch.float)

x = review_sentiment_by_app
y = avr_rating_per_app

train_size = int(0.7*len(x))
val_size = len(x) - train_size

data = SentimentDataset(review_sentiment_by_app, avr_rating_per_app)
trainset, valset = random_split(data, [train_size, val_size])
BATCH_SIZE = 2
train_dataloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_dataloader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

print("Training dataloader has ", len(train_dataloader), "batches of ", BATCH_SIZE)
print("Validation dataloader has ", len(val_dataloader), "batches of ", BATCH_SIZE)

Training dataloader has  186 batches of  2
Validation dataloader has  80 batches of  2


In [0]:
# !pip install tensorboard
# !tensorboard --logdir=runs
# from torch.utils.tensorboard import SummaryWriter
# import torchvision

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

class Net(nn.Module):
  def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = nn.Linear(n_feature, n_hidden)   # hidden layer
        self.predict = nn.Linear(n_hidden, n_output)   # output layer

  def forward(self, x):
        x = F.relu(self.hidden(x))  # activation function for hidden layer
        #print("Relu x: ",x)
        x = self.predict(x)             # linear output
        #print("Predict x: ",x)
        return x

INPUT_SIZE = num_reviews_per_app
OUTPUT_SIZE = 1 # regression to reach average rating
HIDDEN_SIZE = 100 # arbitrary
learning_rate = 0.2

net = Net(n_feature=INPUT_SIZE, n_hidden=HIDDEN_SIZE, n_output=OUTPUT_SIZE)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
loss_func = nn.MSELoss()
# writer = SummaryWriter()

num_epochs = 100
losses = []
for i in range(num_epochs):
  for batch_idx, samples in enumerate(train_dataloader):
    x,y = samples

    prediction = net(x)
    loss = loss_func(prediction, y)
    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

    # for plotting
  if i % 10 == 0:
      print("Epoch ", i, ", Loss: ", loss)
  losses.append(loss)
  prediction = 0

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(range(len(losses)),losses)

ZeroDivisionError: ignored

In [0]:
review_sentiment_by_app

In [0]:
avr_rating_per_app

In [0]:
len(samples)

In [0]:
losses = []
for batch_idx, samples in enumerate(val_dataloader):
  x, y = samples
  prediction = net(x)
  print("Given reviews:\n", x)
  for i in range(len(x)):
    print("Average of given review", sum(x[i])/28)
  print("Actual Average Rating:\n", y)
  print("Predicted:\n ", prediction)
  loss = loss_func(prediction, y)
  losses.append(loss)
  
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(range(len(losses)),losses)

# Predicting rating from word vectors

Encoding and Decoding 
https://bastings.github.io/annotated_encoder_decoder/


In [0]:
from torchnlp.word_to_vector import FastText



ModuleNotFoundError: ignored

## Forming vectors using Doc2Vec

In [0]:
from tqdm import tqdm
from gensim.models import doc2vec
from sklearn import utils
import gensim
from gensim.models.doc2vec import TaggedDocument 
import re

def label_sentences(corpus):
    labeled = []
    for i, v in enumerate(corpus):
        label = str(i)
        labeled.append(doc2vec.TaggedDocument(v,[label]))
    return labeled

all_data = label_sentences(df.Tokens)
model_dbow = gensim.models.Doc2Vec(dm = 0,vector_size = 300, negative = 5,min_count =1,alpha = 0.065)
model_dbow.build_vocab([x for x in tqdm(all_data)])

for epoch in range(30):
    model_dbow.train(utils.shuffle([x for x in tqdm(all_data)]),total_examples = len(all_data),epochs = 1)
    model_dbow.alpha -=0.002
    model_dbow.min_alpha = model_dbow.alpha

def get_vectors(model,corpus_size,vectors_size,index):
    vectors = np.zeros((corpus_size,vectors_size))
    for i in range(0,corpus_size):
        prefix = str(i)
        vectors[i] = model.docvecs[prefix]
    return vectors[index] 

100%|██████████| 15457/15457 [00:00<00:00, 2183976.99it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1615251.69it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1613121.60it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1618033.27it/s]
100%|██████████| 15457/15457 [00:00<00:00, 3419555.72it/s]
100%|██████████| 15457/15457 [00:00<00:00, 3468772.44it/s]
100%|██████████| 15457/15457 [00:00<00:00, 2218124.98it/s]
100%|██████████| 15457/15457 [00:00<00:00, 3074616.19it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1964169.93it/s]
100%|██████████| 15457/15457 [00:00<00:00, 2069306.00it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1645591.21it/s]
100%|██████████| 15457/15457 [00:00<00:00, 966809.68it/s]
100%|██████████| 15457/15457 [00:00<00:00, 3313470.15it/s]
100%|██████████| 15457/15457 [00:00<00:00, 3129379.59it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1653523.69it/s]
100%|██████████| 15457/15457 [00:00<00:00, 2512259.05it/s]
100%|██████████| 15457/15457 [00:00<00:00, 1803425.88it/s

In [0]:
df['Tagged_Document'] = all_data
df['vectors'] = [model_dbow.docvecs[str(i)] for i in range(0,len(df))]
print("Creating an array containing arrays of vectors of different apps")
vectors_by_app = [df[df["App"] == unique_apps[i]]["vectors"].to_numpy() for i in range(len(to_keep))]

Creating an array containing arrays of vectors of different apps


In [0]:
vectors_by_app[150].shape

(38,)

## Forming word vectors using CountVectorizer

In [0]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
df['Review_Vectorizer'] = df.Preprocessed_Review.apply(lambda x : str(x))
df['Review_Vectorizer'].head()
review_vectorizer_list = list
(df['Review_Vectorizer'])
review_vectorizer_list
train_vectors = vectorizer.fit_transform(review_vectorizer_list).toarray()
#numpy.unique(train_vectors[10])
data_transposed = zip(train_vectors)
df_vec = pd.DataFrame(data_transposed,columns = ['vectorizer new'])
df_vec.head()
df['vectorizer_new'] = df_vec['vectorizer new']
df.head()


TypeError: ignored

In [0]:
print("Creating an array containing arrays of vectors of different apps")
vectorizer_by_app = [df[df["App"] == unique_apps[i]]["vectorizer_new"].to_numpy() for i in range(len(to_keep))]
#vectorizer_by_app 

df['Tokens_New'] = df.Tokens.apply(lambda x : ast.literal_eval(x))
df['Tokens_New'].head()
print("Creating an array containing arrays of Tokens of different apps")
tokens_by_app = [df[df["App"] == unique_apps[i]]["Tokens_New"].to_numpy() for i in range(len(to_keep))]
tokens_by_app 

Creating an array containing arrays of vectors of different apps


KeyError: ignored

Now using the preprocessed_reivew text as the `x` input, and a model composed of the EmbeddingBag layer and a LinearLayer.

- `nn.EmbeddingBag` computes the mean value of a “bag” of embeddings. 
- The text entries have different lengths, but `nn.EmbeddingBag` requires no padding here since the text lengths are saved in offsets.
- Additionally, since `nn.EmbeddingBag` accumulates the average across the embeddings on the fly, nn.EmbeddingBag can enhance the performance and memory efficiency to process a sequence of tensors.


In [0]:
from torch import tensor
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split
import numpy
#from torchtext.data.utils import ngrams_iterator
from torchtext.data.utils import get_tokenizer
import ast 

class TextDataset(Dataset):
  def __init__(self, x, y):
    self.samples = x
    self.labels = y
  
  def __len__(self):
    return len(self.samples)
  
  def __getitem__(self,idx):
    item = self.samples[idx]
    item = np.random.choice(item, size=28) # sampling 28 reviews from each app
    item = np.array(list(item))#,dtype="float32")
    return item, tensor(self.labels[idx][0], dtype=torch.float)

def generate_batch(batch):
  batch_size = len(batch)
  labels = []
  texts = []
  for i in range(batch_size):
    entry = batch[i]
    labels.append(entry[1])

    # make sure the vector arrays are in same lengths and the elements are in same types
    texts.append(entry[0])
  print(texts)

  # torch.Tensor.cumsum returns the cumulative sum of elements in the dimension dim.
  offsets = [0] + [len(entry) for entry in texts]
  offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
  #ast.literal_eval(x)
  text = tensor(ast.literal_eval(texts), dtype=torch.float)
  '''tokenizer = get_tokenizer("spacy")
  text_list = []
  with torch.no_grad():
    for i in texts :
      for j in i : 
        j = j.strip('')
        text_list.append(torch.tensor([[token] for token in (tokenizer(j))]))'''
  #text = tensor(text_list, dtype=torch.float)


  return text, offsets, labels

x = tokens_by_app 
y = avr_rating_per_app

train_size = int(0.7*len(x))
val_size = len(x) - train_size

dataset = TextDataset(x, y)
trainset, valset = random_split(dataset, [train_size, val_size])
BATCH_SIZE = 5
train_dataloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, collate_fn = generate_batch)
val_dataloader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True, collate_fn = generate_batch)

print("Training dataloader has ", len(train_dataloader), "batches of ", BATCH_SIZE)
print("Validation dataloader has ", len(val_dataloader), "batches of ", BATCH_SIZE)

for batch_idx, (text,offsets, label) in enumerate(train_dataloader):
  # print("Text")
  print(text)

NameError: ignored

In [0]:
INPUT_SIZE = num_reviews_per_app
OUTPUT_SIZE = 1 # regression to reach average rating
HIDDEN_SIZE = 30 # arbitrary
learning_rate = 0.2

net = Net(n_feature=INPUT_SIZE, n_hidden=HIDDEN_SIZE, n_output=OUTPUT_SIZE)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
loss_func = nn.MSELoss()
# writer = SummaryWriter()

num_epochs = 100
losses = []
for i in range(num_epochs):
  for batch_idx, samples in enumerate(train_dataloader):
    #x, y,_ = samples
    #x,y = samples

    prediction = net(x)
    loss = loss_func(prediction, y)
    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

    # for plotting
  if i % 10 == 0:
      print("Epoch ", i, ", Loss: ", loss)
  losses.append(loss)

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(range(len(losses)),losses)

In [0]:
class TextSentiment(nn.Module):
  def __init__(self, num_embeddings, embed_dim, output_size):
    """
    num_embeddings => vocab_size /  size of the dictionary of embeddings
    
    """
    super().__init__()
    self.embedding = nn.Embedding(num_embeddings, embed_dim, output_size, sparse=True)
    self.fc = nn.Linear(embed_dim, output_size)
    self.init_weights()

  def init_weights(self):
    initrange = 0.5
    self.embedding.weight.data.uniform_(-initrange, initrange)
    self.fc.weight.data.uniform_(-initrange, initrange)
    self.fc.bias.data.zero_()
  
  
  def forward(self, text, offsets):
    print(text.dtype)
    embedded = self.embedding(text)
    return self.fc(embedded)

# Declaring input dimensions
joint_tokens = df["Tokens"].str.cat(sep=" ")
vocabulary = set(joint_tokens)
vocab_size = len(vocabulary)

EMBED_DIM = 32
OUTPUT_SIZE = 1 # regression to reach average rating
learning_rate = 0.2

net = TextSentiment(num_embeddings = vocab_size, embed_dim= EMBED_DIM, output_size=OUTPUT_SIZE)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
loss_func = nn.MSELoss()
# writer = SummaryWriter()

num_epochs = 100
losses = []
for i in range(num_epochs):
  if i % 10 == 0:
      print("Epoch ", i, ", Loss: ", loss)
  for batch_idx, (text,offsets, label) in enumerate(train_dataloader):
    # x, y = samples
    optimizer.zero_grad()   # clear gradients for next train
    print(type(text))
    print(text.shape)
    print(type(offsets))
    print(offsets.shape)
    prediction = net(text, offsets)
    loss = loss_func(prediction.reshape(-1), y)
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

    # for plotting
  losses.append(loss.item())
plt.plot(range(len(losses)),losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")

In [0]:
xx = torch.rand(5)
offset = torch.tensor([0,5]).long()
net.embedding(xx,offset)

In [0]:
offset.dtype

In [0]:
[df[df["App"] == unique_apps[i]]["Category"] for i in range(len(to_keep))].value_counts()

# Prediction of Category Based on the Review


In [0]:
print("Creating an array containing the actual category of different apps")
cat_per_app = [df[df["App"] == unique_apps[i]]["Category"].to_numpy() for i in range(len(to_keep))]
cat_per_app

Creating an array containing the actual category of different apps


[array([14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14]),
 array([15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]),
 array([15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 

In [0]:
from torch import tensor
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split

class SentimentDataset(Dataset):
  global num_reviews_per_app
  def __init__(self, x, y):
    self.samples = x
    self.labels = y
  
  def __len__(self):
    return len(self.samples)
  
  def __getitem__(self,idx):
    item = np.concatenate(self.samples[idx])
    print(item)
    print(self.labels[idx])
    print(self.labels[idx][0])
    item = np.random.choice(item, size=num_reviews_per_app) # sampling 28 reviews from each app
    return tensor(item), tensor(self.labels[idx])

x = vectors_by_app
y = cat_per_app

train_size = int(0.7*len(x))
val_size = len(x) - train_size

data = SentimentDataset(x, y)
trainset, valset = random_split(data, [train_size, val_size])
BATCH_SIZE = 1
train_dataloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_dataloader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

print("Training dataloader has ", len(train_dataloader), "batches of ", BATCH_SIZE)
print("Validation dataloader has ", len(val_dataloader), "batches of ", BATCH_SIZE)

Training dataloader has  266 batches of  1
Validation dataloader has  114 batches of  1


In [0]:
num_reviews_per_app=28
# !pip install tensorboard
# !tensorboard --logdir=runs
# from torch.utils.tensorboard import SummaryWriter
# import torchvision

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

class Net(nn.Module):
  def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = nn.Linear(n_feature, n_hidden)   # hidden layer
        self.predict = nn.Linear(n_hidden, n_output)   # output layer

  def forward(self, x):
        #x = F.softmax(self.hidden(x))  # activation function for hidden layer
        #print("Relu x: ",x)
        #x = self.predict(x)             # linear output
        #print("Predict x: ",x)
        x = F.relu(self.hidden(x))
        #x = F.relu(self.fc2(x))
        x = self.predict(x)
        print(x)
        print(F.log_softmax(x))
        #return F.log_softmax(x)
        return x

INPUT_SIZE = num_reviews_per_app
OUTPUT_SIZE = 33 # number of class#from 0 to 29 so is 30
HIDDEN_SIZE = 100 # arbitrary
learning_rate = 0.2

net = Net(n_feature=INPUT_SIZE, n_hidden=HIDDEN_SIZE, n_output=OUTPUT_SIZE)
#net = Net()
print(net)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
#optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
loss_func = nn.CrossEntropyLoss()
# writer = SummaryWriter()

num_epochs = 30
losses = []
for i in range(num_epochs):
  for batch_idx, samples in enumerate(train_dataloader):
    x,y = samples
    print(x.shape)
    print(y.shape)


    prediction = net(x)
    print('prediction shape')
    print(prediction.shape)
    #prediction =  Variable(torch.FloatTensor(prediction)).view(1, -1)
    targets = y
    print(prediction.shape)
    #targets = Variable(torch.LongTensor(targets))
    '''targetnp=targets.numpy()
    idxs=np.where(targetnp>0)[1]
    new_targets=torch.LongTensor(idxs)
    print(new_targets.shape)
    print(prediction.shape)
    targets2 = prediction
    targetnp2=targets2.detach().numpy()
    idxs2=np.where(targetnp2>0)[1]
    prediction2=torch.LongTensor(idxs2)'''
    #output = Variable(torch.FloatTensor([0,0,0,1])).view(1, -1)
    #target = Variable(torch.LongTensor([3]))
    
  
    #prediction = prediction.long()
    #targets = targets.squeeze_()
    #print(prediction)
    #print(prediction.shape)
    print('target shape')
    print(targets.shape)
    print(targets[0][0].item())
    #tensor_label = torch(targets[0][0].item(),dtype=torch.int8)
    tensor_label =torch.LongTensor([targets[0][0].item()])
    print('tensor label shape') 
    print(tensor_label.shape)
    #prediction should be in 1,23 and tensor label should be in 1 
    loss = loss_func(prediction, tensor_label)
    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

    # for plotting
  if i % 10 == 0:
      print("Epoch ", i, ", Loss: ", loss)
  losses.append(loss)
  prediction = 0

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(range(len(losses)),losses)

Net(
  (hidden): Linear(in_features=28, out_features=100, bias=True)
  (predict): Linear(in_features=100, out_features=33, bias=True)
)
[-0.12380484  0.04445869 -0.08380479 ... -0.07721745 -0.02847598
 -0.13613334][-0.20339766 -0.06574988  0.06206845 ... -0.10448398 -0.13152325
 -0.06144286]
[26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26 26
 26 26 26 26 26 26 26]
[11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
 11 11 11 11 11 11]
26

11
[ 0.0941687   0.04624471 -0.00732248 ...  0.01808923  0.01209445
 -0.08686742][-0.05955182 -0.09044777 -0.01289445 ... -0.01443164 -0.03507165
 -0.0346244 ]

[11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
 11 11 11 11 11 11 11 11 11 11 11 11 11 11][13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13
 13 13 13 13 13 13 13 13 13 13 13 13 13 13]
13

11
[-0.03521821  0.04041544  0.0752724  ...  0.06274316 -0.06233925
  0.00588131][-0.06466306  0.04576398  0.0747908



[23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
 23 23 23 23 23 23 23 23 23]
9
23
[-0.01111027  0.01933112 -0.00103742 ... -0.03559701 -0.10547588
 -0.03537222]
[-0.0231632   0.05086423  0.09366831 ... -0.13729815 -0.06594101
  0.08421199]
[30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30
 30 30 30 30 30 30 30 30 30][7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7]

30
[-0.06871215  0.08682346  0.16771384 ... -0.02171116  0.01495088
 -0.04566601]7

[-0.0604359   0.0900523   0.0034044  ... -0.16405721 -0.16386484
  0.07850894][27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27 27
 27 27 27 27 27 27 27 27 27 27 27 27]

27
[8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]
8
[-0.04446188 -0.05542402  0.01779428 ...  0.09253684 -0.01944218
 -0.04765617][-0.08978745  0.00928276 -0.03345981 ... -0.07231844  0.06442212
  0.07952902]
[23 23 23 23 23 23 23 23 23 23 23 23 23 23

In [0]:
losses = []
accuracies = []
correct = 0
for batch_idx, samples in enumerate(val_dataloader):
  x, y = samples
  prediction = net(x)
  print("Given reviews:\n", x)
  #for i in range(len(x)):
  #  print("Average of given review", sum(x[i])/28)
  #print("Actual Category:\n", y)
  #print("Predicted:\n ", prediction)
  tensor_label =torch.LongTensor([y[0][0].item()])
  loss = loss_func(prediction, tensor_label)
  losses.append(loss)
  #print('torch.max(y, 1)[0]')
  #print(torch.max(y, 1)[0])
  #pred = torch.max(prediction,1).item()
  print(prediction)
  pred = prediction.data.max(1)[1]  # get the index of the max log-probability
  print("Actual Category:\n", y)
  print("Predicted:\n ", pred)
  #print(pred)
  #print(y)
  if y[0][0]== pred:
    correct +=1
    # pred[(y[0][0]).sum()
  print('accuracy')
  print(100. * correct / len(val_dataloader.dataset))
  accuracy = 100. * correct / len(val_dataloader.dataset)
  accuracies.append(accuracy)
  #test_loss /= len(val_dataloader.dataset)
  #print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
print(accuracies)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(range(len(losses)),losses)

[-0.04431748 -0.10654274 -0.09194145 ...  0.06979758 -0.1378038
  0.06455243][ 0.06606796  0.07576496 -0.01907044 ... -0.25952768 -0.11863025
  0.05350294]

[14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14][16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
 16 16 16 16 16 16 16]
16

14
[-0.01626758  0.07292935  0.15934527 ... -0.07937424  0.04976081
 -0.01685117]
[20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20]
[-0.17818528 -0.05162055 -0.14495607 ... -0.07670759 -0.12329042
 -0.10976963]
[14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14]20

14
[-0.01423476  0.10285572  0.17902394 ... -0.03940263 -0.11676308
 -0.06156648]
[-0.





[20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
 20 20 20 20 20 20 20 20 20 20 20 20 20]4

20
[-0.02632702  0.00688519  0.07995927 ... -0.15506177  0.0002705
  0.10294226][-0.06277712  0.06754905  0.17996056 ...  0.02509395 -0.06247399
 -0.24482973]
[20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
 20 20 20 20 20 20 20 20 20 20 20 20 20 20]

20
[12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
 12 12 12 12 12]
12
[-0.08371194 -0.08180342 -0.11787029 ...  0.02744976 -0.04760771
 -0.09280917][-0.05068815  0.05643233 -0.0665794  ... -0.14227156 -0.15189098
  0.02783809]

[15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15
 15 15 15 15 15 15 15 15 15 15 15 15][14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14
 14 14 14 14 14 14 14 

RuntimeError: ignored