# **Loading tools and dataset**

In [1]:
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as nparxiv_data




In [2]:
arxiv_data=pd.read_csv('arxiv_data_210930-054931.csv')

In [3]:
arxiv_data.head()

Unnamed: 0,terms,titles,abstracts
0,['cs.LG'],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"['cs.LG', 'cs.AI']",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"['cs.LG', 'cs.CR', 'stat.ML']",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"['cs.LG', 'cs.CR']",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,['cs.LG'],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...


# **Data Cleaning and Preprocessing**

In [4]:
arxiv_data.shape

(56181, 3)

In [5]:
arxiv_data.isnull().sum()

terms        0
titles       0
abstracts    0
dtype: int64

In [6]:
arxiv_data

Unnamed: 0,terms,titles,abstracts
0,['cs.LG'],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"['cs.LG', 'cs.AI']",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"['cs.LG', 'cs.CR', 'stat.ML']",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"['cs.LG', 'cs.CR']",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,['cs.LG'],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...
...,...,...,...
56176,"['cs.CV', 'cs.IR']",Mining Spatio-temporal Data on Industrializati...,Despite the growing availability of big data i...
56177,"['cs.LG', 'cs.AI', 'cs.CL', 'I.2.6; I.2.7']",Wav2Letter: an End-to-End ConvNet-based Speech...,This paper presents a simple end-to-end model ...
56178,['cs.LG'],Deep Reinforcement Learning with Double Q-lear...,The popular Q-learning algorithm is known to o...
56179,"['stat.ML', 'cs.LG', 'math.OC']",Generalized Low Rank Models,Principal components analysis (PCA) is a well-...


# ** =======Section 2========**

# **Recommendation System**

In [7]:
arxiv_data.drop(columns = ["terms","abstracts"], inplace = True)

In [8]:
arxiv_data

Unnamed: 0,titles
0,Multi-Level Attention Pooling for Graph Neural...
1,Decision Forests vs. Deep Networks: Conceptual...
2,Power up! Robust Graph Convolutional Network v...
3,Releasing Graph Neural Networks with Different...
4,Recurrence-Aware Long-Term Cognitive Network f...
...,...
56176,Mining Spatio-temporal Data on Industrializati...
56177,Wav2Letter: an End-to-End ConvNet-based Speech...
56178,Deep Reinforcement Learning with Double Q-lear...
56179,Generalized Low Rank Models


# **Sentence Transformers**

In [9]:
from sentence_transformers import SentenceTransformer,util

  from tqdm.autonotebook import tqdm, trange


In [10]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [11]:
sentences = arxiv_data['titles']

In [12]:
embeddings = model.encode(sentences)

In [13]:
embeddings

array([[ 0.06643412, -0.04954597,  0.06388088, ...,  0.00106303,
        -0.1215638 , -0.06962783],
       [ 0.09212257, -0.07606944,  0.06572863, ..., -0.08565164,
        -0.09266546,  0.00725291],
       [-0.08162688,  0.02428937,  0.01888741, ...,  0.00806161,
        -0.05129534, -0.05874001],
       ...,
       [-0.09695333,  0.00057092,  0.07726484, ..., -0.01443817,
        -0.04748217,  0.06130559],
       [ 0.00768869, -0.10124185,  0.08909854, ..., -0.08199864,
        -0.05649742,  0.09007055],
       [ 0.06078521, -0.08312804, -0.00907777, ..., -0.0314818 ,
         0.05713108,  0.05696886]], dtype=float32)

In [14]:
embeddings.shape

(56181, 384)

# **Print the embeddings**

In [15]:
c=0

for sentence,embedding in zip(sentences,embeddings):
    print("Sentence:", sentence)
    print("Embedding length:", len(embedding)) # list of floats
    print("")
    if c>=5:
        break
    c+=1

Sentence: Multi-Level Attention Pooling for Graph Neural Networks: Unifying Graph Representations with Multiple Localities
Embedding length: 384

Sentence: Decision Forests vs. Deep Networks: Conceptual Similarities and Empirical Differences at Small Sample Sizes
Embedding length: 384

Sentence: Power up! Robust Graph Convolutional Network via Graph Powering
Embedding length: 384

Sentence: Releasing Graph Neural Networks with Differential Privacy Guarantees
Embedding length: 384

Sentence: Recurrence-Aware Long-Term Cognitive Network for Explainable Pattern Classification
Embedding length: 384

Sentence: Lifelong Graph Learning
Embedding length: 384



# **Save files**

In [16]:
import pickle

with open("Models/embeddings.pkl",'wb') as f:
    pickle.dump(embeddings,f)
    
with open('Models/sentences.pkl', 'wb') as f:
    pickle.dump(sentences, f)
    
with open('Models/rec_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# **Recommendation for similar papers**

In [17]:
# load save files
embeddings = pickle.load(open('Models/embeddings.pkl','rb'))
sentences = pickle.load(open('Models/sentences.pkl','rb'))
rec_model = pickle.load(open('Models/rec_model.pkl','rb'))

In [18]:
import torch

def recommendation(input_paper):
    pass

In [None]:
input_paper = input("Enter the title of any paper you like")
