Project Name: **Classification of Abstracts from arXiv publications into their most relevant category**

Course: **CIS 545**

Project Members: **Arvind Balaji Narayan, Bharathrushab Manthripragada, Gopik Anand**

**Model Used: TextCNN**

TextCNN is the Convolution Neural Network that is specifically used for tasks concerning text classification. These require word embeddings to semantically map similar word structures used in the corpus. As a result, we use pre-trained Glove embeddings to output an embedding matrix that takes in the generated word embeddings as input.

Package Installations

In [1]:
import random
import copy
import time
import pandas as pd
import numpy as np
import gc
import re
import torch as t

#import spacy
from tqdm import tqdm_notebook, tnrange
from tqdm.auto import tqdm

tqdm.pandas(desc='Progress')
from collections import Counter

from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import os 

# cross validation and metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

from sklearn.preprocessing import StandardScaler
from multiprocessing import  Pool
from functools import partial
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.5 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.7 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 40.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.50.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 41.2 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 35.6 MB/s 
Collecting click==8.0
  Downloading click-8.0.0-py3-none-any.whl (96 kB)
[K     |████████████████████████████████| 96 kB 4.9 MB/s 
Building

In [3]:
!pip install kaggle



Loading the arXiv Dataset 

In [4]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [5]:
!kaggle datasets download -d Cornell-University/arxiv

Downloading arxiv.zip to /content
100% 1.04G/1.04G [00:13<00:00, 77.9MB/s]
100% 1.04G/1.04G [00:13<00:00, 81.0MB/s]


In [6]:
!chmod 600 /root/.kaggle/kaggle.json

In [7]:
!ls

arxiv.zip  kaggle.json	sample_data


In [8]:
!unzip /content/arxiv.zip

Archive:  /content/arxiv.zip
  inflating: arxiv-metadata-oai-snapshot.json  


In [9]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

In [10]:
import tensorflow as tf
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import get_linear_schedule_with_warmup
import random
from sklearn.preprocessing import LabelEncoder

In [11]:
data_file = '/content/arxiv-metadata-oai-snapshot.json'

In [12]:
def get_metadata():
    with open(data_file, 'r') as f:
        for line in f:
            yield line

Listing all Categories in cat_map

In [13]:

cat_map =      {'astro-ph': 'Astrophysics',
                'astro-ph.CO': 'Cosmology and Nongalactic Astrophysics',
                'astro-ph.EP': 'Earth and Planetary Astrophysics',
                'astro-ph.GA': 'Astrophysics of Galaxies',
                'astro-ph.HE': 'High Energy Astrophysical Phenomena',
                'astro-ph.IM': 'Instrumentation and Methods for Astrophysics',
                'astro-ph.SR': 'Solar and Stellar Astrophysics',
                'cond-mat.dis-nn': 'Disordered Systems and Neural Networks',
                'cond-mat.mes-hall': 'Mesoscale and Nanoscale Physics',
                'cond-mat.mtrl-sci': 'Materials Science',
                'cond-mat.other': 'Other Condensed Matter',
                'cond-mat.quant-gas': 'Quantum Gases',
                'cond-mat.soft': 'Soft Condensed Matter',
                'cond-mat.stat-mech': 'Statistical Mechanics',
                'cond-mat.str-el': 'Strongly Correlated Electrons',
                'cond-mat.supr-con': 'Superconductivity',
                'cs.AI': 'Artificial Intelligence',
                'cs.AR': 'Hardware Architecture',
                'cs.CC': 'Computational Complexity',
                'cs.CE': 'Computational Engineering, Finance, and Science',
                'cs.CG': 'Computational Geometry',
                'cs.CL': 'Computation and Language',
                'cs.CR': 'Cryptography and Security',
                'cs.CV': 'Computer Vision and Pattern Recognition',
                'cs.CY': 'Computers and Society',
                'cs.DB': 'Databases',
                'cs.DC': 'Distributed, Parallel, and Cluster Computing',
                'cs.DL': 'Digital Libraries',
                'cs.DM': 'Discrete Mathematics',
                'cs.DS': 'Data Structures and Algorithms',
                'cs.ET': 'Emerging Technologies',
                'cs.FL': 'Formal Languages and Automata Theory',
                'cs.GL': 'General Literature',
                'cs.GR': 'Graphics',
                'cs.GT': 'Computer Science and Game Theory',
                'cs.HC': 'Human-Computer Interaction',
                'cs.IR': 'Information Retrieval',
                'cs.IT': 'Information Theory',
                'cs.LG': 'Machine Learning',
                'cs.LO': 'Logic in Computer Science',
                'cs.MA': 'Multiagent Systems',
                'cs.MM': 'Multimedia',
                'cs.MS': 'Mathematical Software',
                'cs.NA': 'Numerical Analysis',
                'cs.NE': 'Neural and Evolutionary Computing',
                'cs.NI': 'Networking and Internet Architecture',
                'cs.OH': 'Other Computer Science',
                'cs.OS': 'Operating Systems',
                'cs.PF': 'Performance',
                'cs.PL': 'Programming Languages',
                'cs.RO': 'Robotics',
                'cs.SC': 'Symbolic Computation',
                'cs.SD': 'Sound',
                'cs.SE': 'Software Engineering',
                'cs.SI': 'Social and Information Networks',
                'cs.SY': 'Systems and Control',
                'econ.EM': 'Econometrics',
                'eess.AS': 'Audio and Speech Processing',
                'eess.IV': 'Image and Video Processing',
                'eess.SP': 'Signal Processing',
                'gr-qc': 'General Relativity and Quantum Cosmology',
                'hep-ex': 'High Energy Physics - Experiment',
                'hep-lat': 'High Energy Physics - Lattice',
                'hep-ph': 'High Energy Physics - Phenomenology',
                'hep-th': 'High Energy Physics - Theory',
                'math.AC': 'Commutative Algebra',
                'math.AG': 'Algebraic Geometry',
                'math.AP': 'Analysis of PDEs',
                'math.AT': 'Algebraic Topology',
                'math.CA': 'Classical Analysis and ODEs',
                'math.CO': 'Combinatorics',
                'math.CT': 'Category Theory',
                'math.CV': 'Complex Variables',
                'math.DG': 'Differential Geometry',
                'math.DS': 'Dynamical Systems',
                'math.FA': 'Functional Analysis',
                'math.GM': 'General Mathematics',
                'math.GN': 'General Topology',
                'math.GR': 'Group Theory',
                'math.GT': 'Geometric Topology',
                'math.HO': 'History and Overview',
                'math.IT': 'Information Theory',
                'math.KT': 'K-Theory and Homology',
                'math.LO': 'Logic',
                'math.MG': 'Metric Geometry',
                'math.MP': 'Mathematical Physics',
                'math.NA': 'Numerical Analysis',
                'math.NT': 'Number Theory',
                'math.OA': 'Operator Algebras',
                'math.OC': 'Optimization and Control',
                'math.PR': 'Probability',
                'math.QA': 'Quantum Algebra',
                'math.RA': 'Rings and Algebras',
                'math.RT': 'Representation Theory',
                'math.SG': 'Symplectic Geometry',
                'math.SP': 'Spectral Theory',
                'math.ST': 'Statistics Theory',
                'math-ph': 'Mathematical Physics',
                'nlin.AO': 'Adaptation and Self-Organizing Systems',
                'nlin.CD': 'Chaotic Dynamics',
                'nlin.CG': 'Cellular Automata and Lattice Gases',
                'nlin.PS': 'Pattern Formation and Solitons',
                'nlin.SI': 'Exactly Solvable and Integrable Systems',
                'nucl-ex': 'Nuclear Experiment',
                'nucl-th': 'Nuclear Theory',
                'physics.acc-ph': 'Accelerator Physics',
                'physics.ao-ph': 'Atmospheric and Oceanic Physics',
                'physics.app-ph': 'Applied Physics',
                'physics.atm-clus': 'Atomic and Molecular Clusters',
                'physics.atom-ph': 'Atomic Physics',
                'physics.bio-ph': 'Biological Physics',
                'physics.chem-ph': 'Chemical Physics',
                'physics.class-ph': 'Classical Physics',
                'physics.comp-ph': 'Computational Physics',
                'physics.data-an': 'Data Analysis, Statistics and Probability',
                'physics.ed-ph': 'Physics Education',
                'physics.flu-dyn': 'Fluid Dynamics',
                'physics.gen-ph': 'General Physics',
                'physics.geo-ph': 'Geophysics',
                'physics.hist-ph': 'History and Philosophy of Physics',
                'physics.ins-det': 'Instrumentation and Detectors',
                'physics.med-ph': 'Medical Physics',
                'physics.optics': 'Optics',
                'physics.plasm-ph': 'Plasma Physics',
                'physics.pop-ph': 'Popular Physics',
                'physics.soc-ph': 'Physics and Society',
                'physics.space-ph': 'Space Physics',
                'q-bio.BM': 'Biomolecules',
                'q-bio.CB': 'Cell Behavior',
                'q-bio.GN': 'Genomics',
                'q-bio.MN': 'Molecular Networks',
                'q-bio.NC': 'Neurons and Cognition',
                'q-bio.OT': 'Other Quantitative Biology',
                'q-bio.PE': 'Populations and Evolution',
                'q-bio.QM': 'Quantitative Methods',
                'q-bio.SC': 'Subcellular Processes',
                'q-bio.TO': 'Tissues and Organs',
                'q-fin.CP': 'Computational Finance',
                'q-fin.EC': 'Economics',
                'q-fin.GN': 'General Finance',
                'q-fin.MF': 'Mathematical Finance',
                'q-fin.PM': 'Portfolio Management',
                'q-fin.PR': 'Pricing of Securities',
                'q-fin.RM': 'Risk Management',
                'q-fin.ST': 'Statistical Finance',
                'q-fin.TR': 'Trading and Market Microstructure',
                'quant-ph': 'Quantum Physics',
                'stat.AP': 'Applications',
                'stat.CO': 'Computation',
                'stat.ME': 'Methodology',
                'stat.ML': 'Machine Learning',
                'stat.OT': 'Other Statistics',
                'stat.TH': 'Statistics Theory'}

Data Wrangling

In [14]:
titles = []
abstracts = []
categories = []

# Consider all categories in the `category_map` to be used during training and prediction
paper_categories = np.array(list(cat_map.keys())).flatten()

metadata = get_metadata()
for paper in tqdm(metadata):
    paper_dict = json.loads(paper)
    category = paper_dict.get('categories')
    try:
        try:
            year = int(paper_dict.get('journal-ref')[-4:])    ### Example Format: "Phys.Rev.D76:013009,2007"
        except:
            year = int(paper_dict.get('journal-ref')[-5:-1])    ### Example Format: "Phys.Rev.D76:013009,(2007)"

        if category in paper_categories and 2013<year<2022:
            titles.append(paper_dict.get('title'))
            abstracts.append(paper_dict.get('abstract'))
            categories.append(paper_dict.get('categories'))
    except:
        pass 

len(titles), len(abstracts), len(categories)

0it [00:00, ?it/s]

(102970, 102970, 102970)

In [15]:
papers = pd.DataFrame({
    'title': titles,
    'abstract': abstracts,
    'categories': categories
})
papers.head(5)

Unnamed: 0,title,abstract,categories
0,On the Cohomological Derivation of Yang-Mills ...,We present a brief review of the cohomologic...,physics.gen-ph
1,Bohmian Mechanics at Space-Time Singularities....,We develop an extension of Bohmian mechanics...,quant-ph
2,A Procedure to Solve the Eigen Solution to Dir...,"In this paper, we provide a procedure to sol...",physics.gen-ph
3,What happens to geometric phase when spin-orbi...,Spin-orbit interaction lifts accidental band...,cond-mat.other
4,Functions of State for Spinor Gas in General R...,The energy momentum tensor of perfect fluid ...,physics.gen-ph


In [16]:
papers['abstract'] = papers['abstract'].apply(lambda x: x.replace("\n",""))
papers['abstract'] = papers['abstract'].apply(lambda x: x.strip())
papers['text'] = papers['title'] + '. ' + papers['abstract']

In [17]:
papers.head(5)

Unnamed: 0,title,abstract,categories,text
0,On the Cohomological Derivation of Yang-Mills ...,We present a brief review of the cohomological...,physics.gen-ph,On the Cohomological Derivation of Yang-Mills ...
1,Bohmian Mechanics at Space-Time Singularities....,We develop an extension of Bohmian mechanics t...,quant-ph,Bohmian Mechanics at Space-Time Singularities....
2,A Procedure to Solve the Eigen Solution to Dir...,"In this paper, we provide a procedure to solve...",physics.gen-ph,A Procedure to Solve the Eigen Solution to Dir...
3,What happens to geometric phase when spin-orbi...,Spin-orbit interaction lifts accidental band d...,cond-mat.other,What happens to geometric phase when spin-orbi...
4,Functions of State for Spinor Gas in General R...,The energy momentum tensor of perfect fluid is...,physics.gen-ph,Functions of State for Spinor Gas in General R...


In [18]:
df = papers[["text","categories"]].copy()
df

Unnamed: 0,text,categories
0,On the Cohomological Derivation of Yang-Mills ...,physics.gen-ph
1,Bohmian Mechanics at Space-Time Singularities....,quant-ph
2,A Procedure to Solve the Eigen Solution to Dir...,physics.gen-ph
3,What happens to geometric phase when spin-orbi...,cond-mat.other
4,Functions of State for Spinor Gas in General R...,physics.gen-ph
...,...,...
102965,Complementarity and the nature of uncertainty ...,quant-ph
102966,Alternative Derivation of the Hu-Paz-Zhang Mas...,quant-ph
102967,Guiding Neutral Atoms with a Wire. We demonstr...,quant-ph
102968,Limits for entanglement measures. We show that...,quant-ph


In [19]:
label_encoder = LabelEncoder()
label_encoder.fit(df['categories'])

LabelEncoder()

In [20]:
df['categories_encoded'] = df['categories'].apply(lambda x: label_encoder.transform([x])[0])
df

Unnamed: 0,text,categories,categories_encoded
0,On the Cohomological Derivation of Yang-Mills ...,physics.gen-ph,112
1,Bohmian Mechanics at Space-Time Singularities....,quant-ph,141
2,A Procedure to Solve the Eigen Solution to Dir...,physics.gen-ph,112
3,What happens to geometric phase when spin-orbi...,cond-mat.other,10
4,Functions of State for Spinor Gas in General R...,physics.gen-ph,112
...,...,...,...
102965,Complementarity and the nature of uncertainty ...,quant-ph,141
102966,Alternative Derivation of the Hu-Paz-Zhang Mas...,quant-ph,141
102967,Guiding Neutral Atoms with a Wire. We demonstr...,quant-ph,141
102968,Limits for entanglement measures. We show that...,quant-ph,141


Train-Test Split

In [21]:
x_train, x_test, y_train, y_test = train_test_split(df['text'], df['categories_encoded'], random_state=2018, test_size=0.3)

In [22]:
embed_size = 300 # how big is each word vector
max_features = 120000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 750 # max number of words in a question to use
batch_size = 512 # how many samples to process at once
n_epochs = 5 # how many times to iterate over all samples
n_splits = 5 # Number of K-fold Splits
SEED = 10
debug = 0

In [23]:
from sklearn import model_selection
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

Tokenization

In [24]:
## Tokenize the sentences
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(x_train))
x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)

## Pad the sentences 
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test= pad_sequences(x_test, maxlen=maxlen)

In [25]:
print("X_test: ",x_train, x_train.shape, x_train.shape[1])
print("X_test: ",x_test)

X_test:  [[    0     0     0 ...     2    23   473]
 [    0     0     0 ...   151    15   485]
 [    0     0     0 ...   285  2166   421]
 ...
 [    0     0     0 ...   110   336   472]
 [    0     0     0 ...   158  1641  1364]
 [    0     0     0 ...     3  1545 39504]] (72079, 750) 750
X_test:  [[    0     0     0 ...     4  7198  9446]
 [    0     0     0 ...    48  1212 63000]
 [    0     0     0 ...    34  1063   263]
 ...
 [    0     0     0 ...   443   533 40492]
 [    0     0     0 ...  4009    38  1834]
 [    0     0     0 ...     2     1   342]]


Loading Glove Embeddings

In [26]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove.6B.zip
!ls -lat

--2022-05-02 17:11:50--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2022-05-02 17:11:50--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2022-05-02 17:11:50--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2022-0

In [27]:
## FUNCTIONS TAKEN FROM https://www.kaggle.com/gmhost/gru-capsule

def load_glove(word_index):
    EMBEDDING_FILE = '/content/glove.6B.300d.txt'
    def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')[:300]
    embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))
    
    all_embs = np.stack(embeddings_index.values())
    emb_mean,emb_std = -0.005838499,0.48782197
    embed_size = all_embs.shape[1]

    nb_words = min(max_features, len(word_index)+1)
    embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
    for word, i in word_index.items():
        if i >= max_features: continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None: 
            embedding_matrix[i] = embedding_vector
        else:
            embedding_vector = embeddings_index.get(word.capitalize())
            if embedding_vector is not None: 
                embedding_matrix[i] = embedding_vector
    return embedding_matrix

In [28]:
if debug:
    embedding_matrix = np.random.randn(120000,300)
else:
    embedding_matrix = load_glove(tokenizer.word_index)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [29]:
np.shape(embedding_matrix)

(120000, 300)

Model Definition - TextCNN

In [30]:
import torch

In [31]:
class TextCNN_Classifier(nn.Module):
    
    def __init__(self):
        super(TextCNN_Classifier, self).__init__()
        #kernel size will be filter _size * embedding size
        # we will have 5 filter covering these many words at a time
        self.filter_1 = 1 
        self.filter_2 = 2
        self.filter_3 = 3
        self.filter_4 = 4
        self.filter_5 = 5
        num_filters = 15 # no of output channels
        number_classes = 147
        self.embedding = nn.Embedding(max_features, embed_size)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False
        self.conv_1 = nn.Conv2d(1,num_filters,(self.filter_1, embed_size))
        self.conv_2 = nn.Conv2d(1,num_filters,(self.filter_2, embed_size))
        self.conv_3 = nn.Conv2d(1,num_filters,(self.filter_3, embed_size))
        self.conv_4 = nn.Conv2d(1,num_filters,(self.filter_4, embed_size))
        self.conv_5 = nn.Conv2d(1,num_filters,(self.filter_5, embed_size))
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(5*num_filters, number_classes) # number of layers is 5

    def forward(self, x):
        x = self.embedding(x)  
        x =  x.unsqueeze(1)
        x1 =  F.max_pool1d( F.relu(self.conv_1(x)).squeeze(3) , F.relu(self.conv_1(x)).squeeze(3).size(2)).squeeze(2)
        x2 =  F.max_pool1d( F.relu(self.conv_2(x)).squeeze(3) , F.relu(self.conv_2(x)).squeeze(3).size(2)).squeeze(2)
        x3 =  F.max_pool1d( F.relu(self.conv_3(x)).squeeze(3) , F.relu(self.conv_3(x)).squeeze(3).size(2)).squeeze(2)
        x4 =  F.max_pool1d( F.relu(self.conv_4(x)).squeeze(3) , F.relu(self.conv_4(x)).squeeze(3).size(2)).squeeze(2)
        x5 =  F.max_pool1d( F.relu(self.conv_5(x)).squeeze(3) , F.relu(self.conv_5(x)).squeeze(3).size(2)).squeeze(2)
        x = torch.cat((x1,x2,x3,x4,x5),1)
        x = self.dropout(x)
        x = self.fc1(x) 
        return x

Training and Testing

In [34]:
n_epochs = 10
model = TextCNN_Classifier()
loss_fn = nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
model.cuda()


# Load train and test in CUDA Memory
x_train_2 = torch.tensor((x_train), dtype=torch.long).cuda()
y_train_2 = torch.tensor(y_train.values, dtype=torch.long).cuda()
x_cv = torch.tensor(x_test, dtype=torch.long).cuda()
y_cv = torch.tensor(y_test.values, dtype=torch.long).cuda()

# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train_2, y_train_2)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)

# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)

train_loss = []
valid_loss = []

for epoch in range(n_epochs):
    # Set model to train configuration
    model.train()
    avg_loss = 0.  
    for i, (x, y) in enumerate(train_loader):
        # Predict/Forward Pass
        y_pred = model(x)
        # Compute loss
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
    
    # Set model to validation configuration -Doesn't get trained here
    model.eval()        

    val_preds = np.zeros((len(x_cv),147))
    #y_pred_2 =[]
    y_true =[]
    preds = []
    labels = []
    for i, (x, y) in enumerate(valid_loader):
      y_pred_2=torch.argmax(model(x), dim = 1).tolist()
      y_pred_2 = map(int, y_pred_2)
      preds.extend(list(y_pred_2))
      labels.extend(y.tolist())
    
    val_accuracy = (np.sum(np.array(preds) == np.array(labels)))/len(preds)
    f1_scorE_sklearn = f1_score(preds,labels,average="weighted")
    print("f1 score is: ")
    print(f1_scorE_sklearn)
    print('Epoch {}/{} \t val_acc={:.4f} \t f1_score={:.4f}'.format(
                epoch + 1, n_epochs, val_accuracy, f1_scorE_sklearn))


f1 score is: 
0.5598598800483839
Epoch 1/10 	 val_acc=0.4957 	 f1_score=0.5599
f1 score is: 
0.6413361017140062
Epoch 2/10 	 val_acc=0.6010 	 f1_score=0.6413
f1 score is: 
0.67047095514559
Epoch 3/10 	 val_acc=0.6362 	 f1_score=0.6705
f1 score is: 
0.6919983161398513
Epoch 4/10 	 val_acc=0.6619 	 f1_score=0.6920
f1 score is: 
0.6986327800614305
Epoch 5/10 	 val_acc=0.6697 	 f1_score=0.6986
f1 score is: 
0.7032231062260706
Epoch 6/10 	 val_acc=0.6779 	 f1_score=0.7032
f1 score is: 
0.705701166322944
Epoch 7/10 	 val_acc=0.6827 	 f1_score=0.7057
f1 score is: 
0.71266297568873
Epoch 8/10 	 val_acc=0.6909 	 f1_score=0.7127
f1 score is: 
0.7150574408775657
Epoch 9/10 	 val_acc=0.6938 	 f1_score=0.7151
f1 score is: 
0.7159971507564344
Epoch 10/10 	 val_acc=0.6956 	 f1_score=0.7160
