# Neural citation network for local citation recommendation

In [1]:
from ncn.evaluation import Evaluator
from ncn.data import get_datasets
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

In [None]:
path_to_weights = "/home/timo/Downloads/best_model_bn_TDNN/NCN_7_17_10.pt"
path_to_data = "/home/timo/DataSets/KD_arxiv_CS/arxiv_data.csv"

In [None]:
data = get_datasets(path_to_data)

In [2]:
def display_dict(d):
    """Display a dict nice and pretty."""
    for key, value in d.items():
        print(f"Citation rank {key}|\t {value}")

In [None]:
def display_attention(candidate, translation, attention):
    
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    
    attention = attention.squeeze(1).cpu().detach().numpy()
    
    cax = ax.matshow(attention, cmap='bone')
   
    ax.tick_params(labelsize=15)
    ax.set_xticklabels([''] + ['<sos>'] + [t.lower() for t in tokenize_de(candidate)] + ['<eos>'], 
                       rotation=45)
    ax.set_yticklabels([''] + translation)

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()
    plt.close()

## Data: Basic statistics
1. Removed 8260 triplets of paper data due to empty/missing files.  
2. Removed 1 data sample throwing regex error.  
3. Removed 161670 context samples where information was missing/could not be parsed from files.   
* This leaves __502353 context - citation pairs__ with full information.
* __Context vocabulary__ size after processing: __72046__.  
* __Title vocabulary__ size after processing: __43208__.  
* Number of __citing authors__: __28200__.  
* Number of __cited authors__: __169236__. 

![Context and title length distributions](assets/title_context_distribution.jpg)

## Data preprocessing with torchtext Fields

In [None]:
context = "Neural networks are really cool, especially if they are convolutional."
authors = "Jim Foo, Bruce Lee"

## Bucketting: What it is and why do we need it?

Citation rank 0|	 Visualizing and understanding convolutional networks
Citation rank 1|	 Imagenet classification with   deep convolutional neural networks
Citation rank 2|	 Visualizing and understanding convolutional neural   networks
Citation rank 3|	 Group equivariant convolutional networks
Citation rank 4|	 Two-stream convolutional networks for action   recognition in videos


## Parameter tuning
![Context and title length distributions](assets/lecun.jpeg)

## Demo


In [None]:
evaluator = Evaluator(path_to_weights, data, evaluate=False)

In [None]:
recomms = evaluator.recommend(context, authors)

In [None]:
display_dict(recomms)

## Documentation
![Context and title length distributions](assets/Documentation.png)