In [1]:
from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, '../preprocessing/') #need this in order to get to the other file in other directory

#can comment out the ones you aren't using to save a little bit of time
from covidPreprocess import getCoronaVocabulary, getCoronaText, getCoronaVocabulary2
from liarPreprocess import getLiarVocabulary, getLiarText
from fnnPreprocess import getFNNVocabulary, getFNNText
# from fnnCovidCombinedPreprocess import getFNNCoronaVocabulary, getFNNCoronaText

In [2]:
class SimpleNeuralNet(nn.Module):

    def __init__(self, input_size, hidden_size):
        super(SimpleNeuralNet, self).__init__()
        #Written based off of the tutorial at
        #https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/feedforward_neural_network/main.py#L37-L49
        self.hidden1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()   
        self.oupt = nn.Linear(hidden_size, 1)  
#         self.softmax = nn.Softmax(dim = 0)

    def forward(self, x):
        out = torch.tanh(self.hidden1(x))
#         out = self.relu(out)
#         out = self.oupt(out)
        out = torch.sigmoid(self.oupt(out))
#         out = self.softmax(out)
        return out

In [3]:
def trainAndTestSimpleModelAndGetProbs(dataset: str, num_epochs = 5, learning_rate = 0.001, print_epoch_mod = 5):
    '''
    gets around 63-71% for corona and Liar datasets, around 80-83% on FNN
    
    used this article for help in writing the tensor parts of code so it works with the model
    https://medium.com/analytics-vidhya/part-1-sentiment-analysis-in-pytorch-82b35edb40b8
    '''
    torch.manual_seed(1)
    if dataset == 'corona':
        X,Y = getCoronaText() #this function will give us the text array (not document term matrix) and Y
        X_train,Y_train, vectorizer_train = getCoronaVocabulary(True)
    elif dataset == 'liar':
        X,Y = getLiarText()
        X_train,Y_train, vectorizer_train = getLiarVocabulary(True)
    elif dataset == 'fnn':
        X,Y = getFNNText()
        X_train,Y_train, vectorizer_train = getFNNVocabulary(True)
    elif dataset == 'combined':
        X,Y = getFNNCoronaText()
        X_train,Y_train, vectorizer_train = getFNNCoronaVocabulary(True)
    
    #transform our testing dataset to match the vocabulary for the training dataset
    #transform will return the document-term matrix for X based on training dataset
    x_test = vectorizer_train.transform(X)
    
    #sample test on logistic classifier
    '''classifier = LogisticRegression()
    classifier.fit(X_train,Y_train)
    score = classifier.score(x_test,Y)
    print(score)'''
    
    vocabsize = X_train.shape[1]
    
    
    #transform our training and test data into tensors for the classifier to learn off of
    X_tensor = torch.from_numpy(X_train.todense()).float()
    Y_tensor = torch.from_numpy(np.array(Y_train)).float()
    
    X_test_tensor = torch.from_numpy(x_test.todense()).float()
    Y_test_tensor = torch.from_numpy(np.array(Y))
    
    device = torch.device('cpu')
    #use TensorDataset to be able to use our DataLoader
    train_data = torch.utils.data.TensorDataset(X_tensor, Y_tensor)
#     train_loader = torch.utils.data.DataLoader(train_data,batch_size=16, shuffle=True)
    train_loader = torch.utils.data.DataLoader(train_data,batch_size=16, shuffle=False)
    train_loader_batch_size_1 = torch.utils.data.DataLoader(train_data,batch_size=1, shuffle=False)
    
    test_data = torch.utils.data.TensorDataset(X_test_tensor, Y_test_tensor)
#     test_loader = torch.utils.data.DataLoader(test_data,batch_size=16, shuffle=True)
#     test_loader = torch.utils.data.DataLoader(test_data,batch_size=16, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_data,batch_size=1, shuffle=False)
    
    #initialize our model
    model = SimpleNeuralNet(vocabsize, 200).to(device)
#     model = SimpleNeuralNet(1, 200).to(device)
#     criterion = nn.CrossEntropyLoss()
    loss_fn = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    
    
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (x_batch, labels) in enumerate(train_loader):
    
            # Forward pass
            # The forward process computes the loss of each iteration on each sample
            model.train()
            y_pred = model(x_batch)
            #need to transform labels to long datatype using .long() or it complains it's an int
#             loss = criterion(y_pred, labels.long())
            loss = loss_fn(y_pred, labels.reshape(-1, 1))
    
            # Backward pass, using the optimizer to update the parameters
            optimizer.zero_grad()
            loss.backward()    #compute gradients
            optimizer.step()   #initiate gradient descent
    
     
            # Below, an epoch corresponds to one pass through all of the samples.
            # Each training step corresponds to a parameter update using 
            # a gradient computed on a minibatch of 100 samples 
            if (i + 1) % print_epoch_mod == 0: 
                #leaving it on 5 for corona dataset, probably want to change to % 50 or % 100
                # for the other datasets so don't get spammed 
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
    
    # Test the model
    # In the test phase, we don't need to compute gradients (the model has already been learned)
    prob_dict = dict()
    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, label in test_loader:
#             outputs = model(inputs)
#             print('INPUTS', inputs)
#             print('LABELS', labels)
#             print('OUTPUTS.DATA', outputs.data)
#             _, predicted = torch.max(outputs.data, 1)
#             print('_', _)
#             print('PREDICTED', predicted)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
            output = model(inputs)
            total += 1
            prob_dict[inputs] = (label, output)
            if label >= 0.5 and output >= 0.5:
                correct += 1
            elif label < 0.5 and output < 0.5:
                correct += 1
            
        print('Test accuracy of the network: {} %'.format(100 * correct / total))
        test_accuracy = 100 * correct / total
        
    # Print out training accuracy
    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, label in train_loader_batch_size_1:
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
            output = model(inputs)
            total += 1
            if label >= 0.5 and output >= 0.5:
                correct += 1
            elif label < 0.5 and output < 0.5:
                correct += 1
                
        print('Train accuracy of the network: {} %'.format(100 * correct / total))
        train_accuracy = 100 * correct / total
    
    return test_accuracy, train_accuracy, model, vectorizer_train

In [4]:
test_accuracy, train_accuracy, model, vectorizer_train = trainAndTestSimpleModelAndGetProbs('corona', num_epochs=50)

Epoch [1/50], Step [5/19], Loss: 0.3543
Epoch [1/50], Step [10/19], Loss: 0.5111
Epoch [1/50], Step [15/19], Loss: 0.2169
Epoch [2/50], Step [5/19], Loss: 0.0395
Epoch [2/50], Step [10/19], Loss: 0.1832
Epoch [2/50], Step [15/19], Loss: 0.0935
Epoch [3/50], Step [5/19], Loss: 0.0094
Epoch [3/50], Step [10/19], Loss: 0.0684
Epoch [3/50], Step [15/19], Loss: 0.0311
Epoch [4/50], Step [5/19], Loss: 0.0038
Epoch [4/50], Step [10/19], Loss: 0.0362
Epoch [4/50], Step [15/19], Loss: 0.0157
Epoch [5/50], Step [5/19], Loss: 0.0021
Epoch [5/50], Step [10/19], Loss: 0.0217
Epoch [5/50], Step [15/19], Loss: 0.0092
Epoch [6/50], Step [5/19], Loss: 0.0013
Epoch [6/50], Step [10/19], Loss: 0.0140
Epoch [6/50], Step [15/19], Loss: 0.0059
Epoch [7/50], Step [5/19], Loss: 0.0009
Epoch [7/50], Step [10/19], Loss: 0.0097
Epoch [7/50], Step [15/19], Loss: 0.0041
Epoch [8/50], Step [5/19], Loss: 0.0007
Epoch [8/50], Step [10/19], Loss: 0.0071
Epoch [8/50], Step [15/19], Loss: 0.0030
Epoch [9/50], Step [5/19

In [5]:
import preprocessingFunctions as pf

In [6]:
def predict_model(model, vec, raw_text):
    text = pf.getTermMatrixTestData(raw_text, vec).todense()
    
    X_test_tensor = torch.from_numpy(text).float()
    output_prob = model(X_test_tensor)
    
    return output_prob

In [7]:
def getTermMatrixTestData2(textToTransform: str, vectorizer):
    noComText = pf.replaceCommas(textToTransform)
    return vectorizer.fit_transform([noComText])

In [9]:
X_test_text, Y_test, vectorizer_test = getCoronaVocabulary2()

In [10]:
prob_dict = dict()
for i in range(len(X_test_text)):
    output_prob = predict_model(model, vectorizer_train, X_test_text[i])
    prob_dict[X_test_text[i]] = (Y_test[i], output_prob[0].item())

In [11]:
prob_dict_rank_prob = sorted(prob_dict.items(), key=(lambda x: x[1][1]))
prob_dict_rank_prob = np.array(prob_dict_rank_prob)

In [14]:
for entry in prob_dict_rank_prob[:10]:
    print(entry[0])
    print('ACTUAL LABEL:', entry[1][0])
    print('OUTPUT PROBABILITY:', entry[1][1])

The breakdown of the global liberal world order and its foundations. What is happening now is a global breakdown of the world order. It does not matter at all whether the nature of the coronavirus is artificial or not, nor is it even of principal importance whether, if it is artificial, it was deliberately released by the “world government” or not. The epidemic has begun - it is a fact. Now the main thing is to trace how the "world government" has reacted to it.To clarify, the "world government" is the totality of global political and economic elites and the intellectuals and media (mediacrats) that serve them. Such a "world government" necessarily exists, because on a global scale there are strictly-defined, fundamental norms that determine the basic parameters of politics, economics and ideology.In the economy, the only recognized norm is capitalism, the market economy (which is disputed only by North Korea - not, and this is very important, by China, which presents its own version o

In [15]:
for entry in prob_dict_rank_prob[-10:]:
    print(entry[0])
    print('ACTUAL LABEL:', entry[1][0])
    print('OUTPUT PROBABILITY:', entry[1][1])

The breakdown of the global liberal world order and its foundations. What is happening now is a global breakdown of the world order. It does not matter at all whether the nature of the coronavirus is artificial or not, nor is it even of principal importance whether, if it is artificial, it was deliberately released by the “world government” or not. The epidemic has begun - it is a fact. Now the main thing is to trace how the "world government" has reacted to it.To clarify, the "world government" is the totality of global political and economic elites and the intellectuals and media (mediacrats) that serve them. Such a "world government" necessarily exists, because on a global scale there are strictly-defined, fundamental norms that determine the basic parameters of politics, economics and ideology.In the economy, the only recognized norm is capitalism, the market economy (which is disputed only by North Korea - not, and this is very important, by China, which presents its own version o

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 1.0
COVID-19 poses a risk not only to the health of older adults who contract the disease but also to those without the health care resources and social structures that contribute to overall wellness. Before becoming a professor, Sarah Szanton made house calls to older adults as a nurse practitioner. On her visits, she saw how an older person's home environment can contribute to health outcomes. Now, as the Endowed Professor for Health Equity and Social Justice at the Johns Hopkins School of Nursing and the director of the Center for Innovative Care in Aging, Szanton works to identify solutions to narrow racial and socioeconomic disparities for older people.Szanton joined one of her PhD student mentees, Sarah LaFave, to discuss the challenges that COVID-19 poses for older adults. This conversation has been edited for length and clarity.How is the COVID-19 pandemic affecting older people differently than younger generations?Older adults are more likely to have dire outcomes from the vi