#### Instance attribution
By running this code, you can retrieve the text to paste in the boxes and graph + table for the instance attribution Figma Prototype. 
Input: python list of claims and influence/annotation id pair 

Example:
```
[('claim_to_check',[(influence (float), 'annotationid'),(),()]),...]

In [None]:
# imports
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from IPython.core.display import display
import json


In [None]:
# retrieves data points from annotation id and returns in this format:
'''
[['claim_to_check',
  [(influence(float), data_point),(influence(float), data_point]), ... ]), 
['claim_to_check',[(influence(float), data_point), ... ]),
... ]

data_point_example = {'annotation_id': 'String',
     'classification': 'String',
     'docids': ['String'],
     'evidences': [[{'docid': 'Sting',
        'end_sentence': int,
        'end_token': int,
        'start_sentence': int,
        'start_token': int,
        'text': 'String'}], ... more evidences ...]}
'''
def getWithDataPoints(influence_output, path_to_fever_data):
    output_with_data = []
    #return array of array of pairs (influence, datapoint dict)
    for claim, datapoints in influence_output:
        influence_data_pairs = []
        for influence, annotation_id in datapoints:
            with open(path_to_fever_data) as json_file:
                documents = list(json_file)
                for doc in documents:
                    curr_dict = json.loads(doc)
                    if curr_dict['annotation_id'] == annotation_id :
                        influence_data_pairs.append((influence, curr_dict))
        output_with_data.append([claim, influence_data_pairs])
    return output_with_data

In [None]:
def printImportantData(claim, influence_data_pairs):
    
    print('Claim: ', claim)
    
    influence_values = np.empty(len(influence_data_pairs))

    for i, data in enumerate(influence_data_pairs) : 
        print('Influence: '+ str(round(influence_values[i], 3)))
        print('Classification: ' + str(data[1]['classification']))
        print('Query: ' + data[1]['query'])
        print('Context: ' + data[1]['evidences'][0][0]['text'])


In [None]:
def displayPlot(claim, influence_data_pairs):

    influence_values = np.empty(len(influence_data_pairs))
    queries = []
    table = []
    for i, data in enumerate(influence_data_pairs) : 
        influence_values[i] = data[0] if data[1]['classification'] == 'SUPPORTS' else -1*data[0]
        queries.append(data[1]['query'])
        table.append([queries[i],data[1]['evidences'][0][0]['text']])

    head = ["Query", "Context Snippet from Wikipedia"]
    df = pd.DataFrame(table, columns=head)
    
    abs_influence_values = abs(influence_values)

    influence_plot = go.Figure(data=go.Scatter(x=queries, y=influence_values,
        mode='markers',
        # *100 for scaling
        marker_size=abs_influence_values*100,
        marker_cmin=-1,
        marker_cmax=1,
        marker_color=influence_values,
        # same green and red as traffic light in figma
        marker_colorscale=['rgb(239, 83, 80)','rgb(76, 175, 80)'],
        marker_colorbar_bgcolor= 'rgb(255,255,255)'
    ))
    influence_plot.update_layout(
        xaxis_title="Query", 
        yaxis_title="Influence Score",
    )
    influence_plot.update_yaxes(range=[-1, 1])
    influence_plot.show()    
    display(df)

In [None]:
# After running this function the output can be used to copy/paste into Instance attribution Prototypes in Figma 
# Uncomment for example
# knn_data = [('Nestor Carbonell played Godzilla in Lost.', [(0.4629100498862757, '162940'), (0.4629100498862757, '162939'), (0.43301270189221935, '162931'), (0.4082482904638631, '152253'), (0.4082482904638631, '14839')]), ('Ice-T was a rapper.', [(0.5999999999999999, '62005'), (0.47809144373375745, '127907'), (0.4743416490252569, '214700'), (0.46188021535170054, '7004'), (0.4472135954999579, '99268')])]

claim_influence_pairs = getWithDataPoints(knn_data, path_to_fever_data)

for (claim , influence_data_pairs) in claim_influence_pairs:
    printImportantData(claim, influence_data_pairs)
    displayPlot(claim,influence_data_pairs)

FileNotFoundError: [Errno 2] No such file or directory: 'src/expred/dataset/fever/train.jsonl'