In [17]:
import pandas as pd
import os
import glob
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import sklearn.metrics
from collections import defaultdict
import shutil
import pickle

pd.reset_option('all')

As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead.

: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.



In [70]:
table = '/Users/rijulvohra/Documents/work/Novartis-ISI/table_linker_pipeline/'
table_name = 'mathematician.csv'
wikify_column_name = "Mathematician"

In [71]:
#intermediate files
canonical = 'canonical.csv'
candidates = "candidates.csv"
feature_votes = "feature_votes.csv"
score_file = "scores.csv"
model_name = 'rf_tuned_ranking.pkl'
temp_dir = os.path.join(os.path.join(table,'temp'))
table_path = os.path.join(table,table_name)
if not(os.path.exists(temp_dir)):
    os.makedirs(temp_dir)
embedding_file = os.path.join(temp_dir,'graph_embedding_transe.tsv')

## Canonicalize

In [72]:
canonical_file = os.path.join(temp_dir,canonical)
!tl canonicalize -c "$wikify_column_name" --add-context $table_path > $canonical_file

In [73]:
pd.read_csv(canonical_file, nrows = 10)

Unnamed: 0,column,row,label,context
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...
1,0,1,Anthony To-Ming Lau,1969|University of British Columbia|Edmond E. ...
2,0,2,Edmond E. Granirer,1962|Hebrew University|Harry Kesten|On Amenabl...
3,0,3,Harry Kesten,1958|Cornell University|Mark Kac|Symmetric Ran...
4,0,4,Mark Kac,1937|University of Lw?w|Hugo Steinhaus|unknown
5,0,5,Hugo Steinhaus,1911|Georg-August-Universit?t G?ttingen|David ...
6,0,6,David Hilbert,1885|Universit?t K?nigsberg|C. L. Ferdinand Li...
7,0,7,C. L. Ferdinand Lindemann,1873|Friedrich-Alexander-Universit?t Erlangen-...
8,0,8,C. Felix Klein,1868|Rheinische Friedrich-Wilhelms-Universit?t...
9,0,9,Julius Pl?cker,1823|Philipps - Universit?t Marburg|Christian ...


## Candidate Generation

In [74]:
%%time
candidates_file = os.path.join(temp_dir,candidates)
aux_field = 'graph_embeddings_transe'
!tl clean -c label -o label_clean $canonical_file \
        / --url http://ckg06:9200 --index wikidatadwd-augmented-02 get-fuzzy-augmented-matches -c label_clean \
        --auxiliary-fields $aux_field \
        --auxiliary-folder $temp_dir \
        / --url http://ckg06:9200 --index wikidatadwd-augmented-02 get-exact-matches \
        -c label_clean --auxiliary-fields $aux_field \
        --auxiliary-folder $temp_dir > $candidates_file
                
for field in aux_field.split(','):
    aux_list = []
    for f in glob.glob(f'{temp_dir}/*{aux_field}.tsv'):
        aux_list.append(pd.read_csv(f, sep='\t', dtype=object))
    aux_df = pd.concat(aux_list).drop_duplicates(subset=['qnode']).rename(columns={aux_field: 'embedding'})
    aux_df.to_csv(f'{temp_dir}/{aux_field}.tsv', sep='\t', index=False)

CPU times: user 2.36 s, sys: 810 ms, total: 3.17 s
Wall time: 1min 28s


In [75]:
pd.read_csv(candidates_file,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q98543829,F Keith Taylor,,fuzzy-augmented,UK election candidate,3.539613e-09,23.37343
1,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q22087035,Keith Taylor,,fuzzy-augmented,British political scientist,3.539613e-09,23.102688
2,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385100,Keith Taylor,,fuzzy-augmented,Canadian poet,3.539613e-09,22.614727
3,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5079714,Charles Keith Taylor,,fuzzy-augmented,Canadian politician,3.539613e-09,22.286064
4,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q102164684,Keith Frederick Taylor,,fuzzy-augmented,Ph.D. University of Alberta 1975,1.120288e-08,22.128487
5,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q1738098,Keith Taylor,Keith Richard Taylor,fuzzy-augmented,British politician (born 1953),3.712462e-09,21.732487
6,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385099,Keith Taylor,,fuzzy-augmented,American football player,3.539613e-09,21.732487
7,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385101,Keith Taylor,Dennis More|Cadmus Evans,fuzzy-augmented,Australian science fiction and fantasy writer,3.539613e-09,20.802166
8,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q53567269,Keith A. Taylor,,fuzzy-augmented,,3.539613e-09,19.758533
9,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5541266,George Keith Taylor,,fuzzy-augmented,United States federal judge,3.539613e-09,19.758533


## Feature Voting

In [76]:
%%time
feature_votes_file = os.path.join(temp_dir,feature_votes)
!tl smallest-qnode-number $candidates_file \
    / string-similarity -i --method monge_elkan:tokenizer=word -o monge_elkan \
    / string-similarity -i --method jaccard:tokenizer=word -c kg_descriptions context -o des_cont_jaccard \
    / string-similarity -i --method jaro_winkler -o jaro_winkler \
    / feature-voting -c "pagerank,smallest_qnode_number,monge_elkan,des_cont_jaccard" > $feature_votes_file

CPU times: user 88.1 ms, sys: 38.5 ms, total: 127 ms
Wall time: 5.39 s


In [77]:
pd.read_csv(feature_votes_file,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q98543829,F Keith Taylor,,fuzzy-augmented,UK election candidate,3.539613e-09,23.37343,0,0.95,0.0,0.906349,1
1,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q22087035,Keith Taylor,,fuzzy-augmented,British political scientist,3.539613e-09,23.102688,0,0.666667,0.0,0.96,0
2,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385100,Keith Taylor,,fuzzy-augmented,Canadian poet,3.539613e-09,22.614727,0,0.666667,0.0,0.96,0
3,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5079714,Charles Keith Taylor,,fuzzy-augmented,Canadian politician,3.539613e-09,22.286064,0,0.666667,0.0,0.605556,0
4,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q102164684,Keith Frederick Taylor,,fuzzy-augmented,Ph.D. University of Alberta 1975,1.120288e-08,22.128487,0,0.845679,0.6,0.871082,1
5,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q1738098,Keith Taylor,Keith Richard Taylor,fuzzy-augmented,British politician (born 1953),3.712462e-09,21.732487,0,0.666667,0.0,0.96,0
6,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385099,Keith Taylor,,fuzzy-augmented,American football player,3.539613e-09,21.732487,0,0.666667,0.0,0.96,0
7,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385101,Keith Taylor,Dennis More|Cadmus Evans,fuzzy-augmented,Australian science fiction and fantasy writer,3.539613e-09,20.802166,0,0.666667,0.0,0.96,0
8,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q53567269,Keith A. Taylor,,fuzzy-augmented,,3.539613e-09,19.758533,0,0.888889,0.0,0.944762,0
9,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5541266,George Keith Taylor,,fuzzy-augmented,United States federal judge,3.539613e-09,19.758533,0,0.666667,0.0,0.773369,0


## Compute Embedding Score using Column Vector Strategy

In [78]:
%%time
f_embedding_file = os.path.join(temp_dir,'fuzzy_augmented_graph_embeddings_transe.tsv')
score = os.path.join(temp_dir,score_file)
!tl score-using-embedding $feature_votes_file --column-vector-strategy centroid-of-singletons \
-o graph-embedding-score --embedding-file $f_embedding_file \
> $score

Qnodes to lookup: 2611
Qnodes from file: 2571
CPU times: user 24.7 ms, sys: 17.5 ms, total: 42.2 ms
Wall time: 1.6 s


In [79]:
pd.read_csv(score,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q98543829,F Keith Taylor,,fuzzy-augmented,UK election candidate,3.539613e-09,23.37343,0,0.95,0.0,0.906349,1,0.178267
1,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q22087035,Keith Taylor,,fuzzy-augmented,British political scientist,3.539613e-09,23.102688,0,0.666667,0.0,0.96,0,0.357342
2,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385100,Keith Taylor,,fuzzy-augmented,Canadian poet,3.539613e-09,22.614727,0,0.666667,0.0,0.96,0,0.416312
3,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5079714,Charles Keith Taylor,,fuzzy-augmented,Canadian politician,3.539613e-09,22.286064,0,0.666667,0.0,0.605556,0,0.407771
4,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q102164684,Keith Frederick Taylor,,fuzzy-augmented,Ph.D. University of Alberta 1975,1.120288e-08,22.128487,0,0.845679,0.6,0.871082,1,0.266951
5,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q1738098,Keith Taylor,Keith Richard Taylor,fuzzy-augmented,British politician (born 1953),3.712462e-09,21.732487,0,0.666667,0.0,0.96,0,0.296455
6,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385099,Keith Taylor,,fuzzy-augmented,American football player,3.539613e-09,21.732487,0,0.666667,0.0,0.96,0,0.310881
7,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385101,Keith Taylor,Dennis More|Cadmus Evans,fuzzy-augmented,Australian science fiction and fantasy writer,3.539613e-09,20.802166,0,0.666667,0.0,0.96,0,0.33296
8,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q53567269,Keith A. Taylor,,fuzzy-augmented,,3.539613e-09,19.758533,0,0.888889,0.0,0.944762,0,0.312174
9,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5541266,George Keith Taylor,,fuzzy-augmented,United States federal judge,3.539613e-09,19.758533,0,0.666667,0.0,0.773369,0,0.37572


## Generate Additional Features required for Model Prediction

In [80]:
## TODO: Need to add these features as cli commands in Table Linker

def create_singleton_feature(df):
        d = df[df['method'] == 'exact-match'].groupby(['column','row'])[['kg_id']].count()
        l = list(d[d['kg_id'] == 1].index)
        singleton_feat = []
        for i,row in df.iterrows():
            col_num,row_num = row['column'],row['row']
            if (col_num,row_num) in l:
                singleton_feat.append(1)
            else:
                singleton_feat.append(0)
        df['singleton'] = singleton_feat
        return df
        
def generate_reciprocal_rank(df):
    final_list = []
    grouped_obj = df.groupby(['row', 'column'])
    for cell in grouped_obj:
        reciprocal_rank = list(1/cell[1]['graph-embedding-score'].rank())
        cell[1]['reciprocal_rank'] = reciprocal_rank
        final_list.extend(cell[1].to_dict(orient='records'))
    odf = pd.DataFrame(final_list)
    return odf

features_df = pd.read_csv(score)
features_df = create_singleton_feature(features_df)
features_df['num_char'] = features_df['kg_labels'].apply(lambda x: len(x) if not(pd.isna(x)) else 0)
features_df['num_tokens'] = features_df['kg_labels'].apply(lambda x: len(x.split()) if not(pd.isna(x)) else 0)
features_df = generate_reciprocal_rank(features_df)
features_df.head()

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q98543829,F Keith Taylor,,fuzzy-augmented,UK election candidate,...,0,0.95,0.0,0.906349,1,0.178267,0,14,3,0.125
1,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q22087035,Keith Taylor,,fuzzy-augmented,British political scientist,...,0,0.666667,0.0,0.96,0,0.357342,0,12,2,0.014706
2,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385100,Keith Taylor,,fuzzy-augmented,Canadian poet,...,0,0.666667,0.0,0.96,0,0.416312,0,12,2,0.011236
3,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5079714,Charles Keith Taylor,,fuzzy-augmented,Canadian politician,...,0,0.666667,0.0,0.605556,0,0.407771,0,20,3,0.011905
4,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q102164684,Keith Frederick Taylor,,fuzzy-augmented,Ph.D. University of Alberta 1975,...,0,0.845679,0.6,0.871082,1,0.266951,0,22,3,0.037037


## Final Ranking Score Predicted by Model

In [81]:
features = ['pagerank','retrieval_score','monge_elkan',
            'des_cont_jaccard','jaro_winkler','graph-embedding-score',
            'singleton','num_char','num_tokens','reciprocal_rank']
final_score = os.path.join(temp_dir,'final_score.csv')
model = pickle.load(open(model_name,'rb'))
data = features_df[features]
predicted_score = model.predict(data)
features_df['model_prediction'] = predicted_score
features_df.to_csv(final_score,index=False)

In [82]:
pd.read_csv(final_score,nrows=10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank,model_prediction
0,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q98543829,F Keith Taylor,,fuzzy-augmented,UK election candidate,...,0.95,0.0,0.906349,1,0.178267,0,14,3,0.125,-0.992
1,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q22087035,Keith Taylor,,fuzzy-augmented,British political scientist,...,0.666667,0.0,0.96,0,0.357342,0,12,2,0.014706,-0.387868
2,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385100,Keith Taylor,,fuzzy-augmented,Canadian poet,...,0.666667,0.0,0.96,0,0.416312,0,12,2,0.011236,-0.674692
3,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5079714,Charles Keith Taylor,,fuzzy-augmented,Canadian politician,...,0.666667,0.0,0.605556,0,0.407771,0,20,3,0.011905,-0.804083
4,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q102164684,Keith Frederick Taylor,,fuzzy-augmented,Ph.D. University of Alberta 1975,...,0.845679,0.6,0.871082,1,0.266951,0,22,3,0.037037,-0.874778
5,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q1738098,Keith Taylor,Keith Richard Taylor,fuzzy-augmented,British politician (born 1953),...,0.666667,0.0,0.96,0,0.296455,0,12,2,0.025641,-0.607381
6,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385099,Keith Taylor,,fuzzy-augmented,American football player,...,0.666667,0.0,0.96,0,0.310881,0,12,2,0.023256,-0.645166
7,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q6385101,Keith Taylor,Dennis More|Cadmus Evans,fuzzy-augmented,Australian science fiction and fantasy writer,...,0.666667,0.0,0.96,0,0.33296,0,12,2,0.017241,-0.649071
8,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q53567269,Keith A. Taylor,,fuzzy-augmented,,...,0.888889,0.0,0.944762,0,0.312174,0,15,3,0.022727,-0.828452
9,0,0,Keith F. Taylor,1975|University of Alberta|Anthony To-Ming Lau...,Keith F. Taylor,Q5541266,George Keith Taylor,,fuzzy-augmented,United States federal judge,...,0.666667,0.0,0.773369,0,0.37572,0,19,3,0.013889,-0.759337


## Get Top5 KG Links

In [83]:
linked_table_name = "topk-" + table_name
output_path = os.path.join(temp_dir,linked_table_name)
!tl get-kg-links -c model_prediction -l label -k 5 $final_score > $output_path
pd.read_csv(output_path, nrows = 10)

Unnamed: 0,column,row,label,kg_id,kg_label,ranking_score
0,0,0,Keith F. Taylor,Q22087035|Q1738098|Q6385099|Q100933635|Q6385101,Keith Taylor|Keith Taylor|Keith Taylor|Keith T...,-0.39|-0.61|-0.65|-0.65|-0.65
1,0,1,Anthony To-Ming Lau,Q93431753|Q5033813|Q15901790|Q102168953|Q24835859,Anthony To-Ming Lau|Canti Lau|Lau Wai Ming|Tin...,-0.01|-0.37|-0.62|-0.62|-0.63
2,0,10,Christian Ludwig Gerling,Q1080895|Q1080897|Q72995|Q17521996|Q27580437,Christian Ludwig Gerling|Christian Ludwig Gerl...,0.04|-0.54|-0.6|-0.7|-0.72
3,0,11,Carl Friedrich Gauss,Q6722|Q1036512|Q87280488|Q1036458|Q18911469,Carl Friedrich Gauss|Carl Friedrich Gauss Priz...,0.08|0.03|-0.03|-0.4|-0.46
4,0,12,Johann Friedrich Pfaff,Q77361|Q94917687|Q97264|Q123315|Q21223689,Johann Friedrich Pfaff|Johann Friedrich Pfaff|...,0.12|-0.15|-0.41|-0.59|-0.61
5,0,13,Abraham Gotthelf Kaestner,Q61813|Q24231370|Q4730637|Q94865315|Q24050378,"Abraham Gotthelf Kästner|Kaestner, Abraham Got...",0.51|0.2|-0.06|-0.67|-0.67
6,0,14,Christian August Hausen,Q1078821|Q21137947|Q1078815|Q4820676|Q214099,"Christian August Hausen|Hausen, Christian Augu...",0.1|-0.39|-0.54|-0.68|-0.7
7,0,15,Christian Andreas Siber,Q102233334|Q1078783|Q5109282|Q105604|Q102950252,Christian Andreas Siber|Christian Andreas Sibe...,0.73|0.08|-0.29|-0.35|-0.48
8,0,16,Rudolf Lipschitz,Q77322|Q105466755|Q1782725|Q105466757|Q105466754,Rudolf Lipschitz|Henriette Lipschitz|Konstanti...,0.82|-0.74|-0.8|-0.81|-0.81
9,0,17,Gustav Peter Lejeune Dirichlet,Q29193|Q2070240|Q975727|Q3750494|Q1254296,Johann Peter Gustav Lejeune Dirichlet|Florian ...,0.3|-0.12|-0.62|-0.65|-0.68


## Join to Produce final result

In [84]:
final_output = 'linked-' + table_name
path = os.path.join(table,final_output)
!tl join -f $table_path --csv -c ranking_score $output_path > $path

In [85]:
df = pd.read_csv(path)
df

Unnamed: 0,Mathematician,Year of PhD,Granting Institution,Supervisor,Thesis,kg_id,kg_label,score
0,Keith F. Taylor,1975,University of Alberta,Anthony To-Ming Lau,The Structure of the Regular Representation of...,Q22087035|Q1738098|Q6385099|Q100933635|Q6385101,Keith Taylor|Keith Taylor|Keith Taylor|Keith T...,-0.39|-0.61|-0.65|-0.65|-0.65
1,Anthony To-Ming Lau,1969,University of British Columbia,Edmond E. Granirer,Topological Semigroups,Q93431753|Q5033813|Q15901790|Q102168953|Q24835859,Anthony To-Ming Lau|Canti Lau|Lau Wai Ming|Tin...,-0.01|-0.37|-0.62|-0.62|-0.63
2,Edmond E. Granirer,1962,Hebrew University,Harry Kesten,On Amenable Semigroups with a Finite Dimension...,Q102110100|Q2070883|Q5338859|Q36846677|Q5234312,Edmond E. Granirer|Edmond Kramer|Edmond E. Cha...,0.71|-0.81|-0.82|-0.84|-0.84
3,Harry Kesten,1958,Cornell University,Mark Kac,Symmetric Random Walks on Groups,Q635373|Q1663338|Q66311454|Q64482048|Q5666887,Harry Kesten|Ingrid Kasten|Haim Kesten|Harry K...,-0.07|-0.72|-0.73|-0.74|-0.74
4,Mark Kac,1937,University of Lw?w,Hugo Steinhaus,unknown,Q736380|Q104806349|Q203354|Q11770406|Q103865156,Mark Kac|Mira Kac|Finnish markka|Markus Kac|Ka...,0.59|-0.3|-0.44|-0.52|-0.57
5,Hugo Steinhaus,1911,Georg-August-Universit?t G?ttingen,David Hilbert,Neue Anwendungen des Dirichlet'schen Prinzips,Q226072|Q95324688|Q12348999|Q4798943|Q95715292,Hugo Steinhaus|Hugo Steinhauser|Hugo Steinhaue...,0.89|-0.7|-0.71|-0.76|-0.82
6,David Hilbert,1885,Universit?t K?nigsberg,C. L. Ferdinand Lindemann,?ber invariante Eigenschaften specieller bin?r...,Q41585|Q94846426|Q17044081|Q5234127|Q60232478,David Hilbert|David Leberecht Fürchtegott Hilb...,0.52|-0.17|-0.36|-0.63|-0.66
7,C. L. Ferdinand Lindemann,1873,Friedrich-Alexander-Universit?t Erlangen-N?rnberg,C. Felix Klein,?ber unendlich kleine Bewegungen und ?ber Kraf...,Q77203|Q5541739|Q95700945|Q21297020|Q95319,Ferdinand von Lindemann|George Lindemann|Anna ...,-0.17|-0.76|-0.77|-0.78|-0.79
8,C. Felix Klein,1868,Rheinische Friedrich-Wilhelms-Universit?t Bonn,Julius Pl?cker and Rudolf Lipschitz,?ber die Transformation der allgemeinen Gleich...,Q76641|Q27526592|Q58181951|Q67207502|Q104489551,Felix Klein|Felix Klein|Felix Klein|C Klein|C ...,-0.61|-0.7|-0.71|-0.73|-0.73
9,Julius Pl?cker,1823,Philipps - Universit?t Marburg,Christian Gerling,Generalem analyeseos applicationem ad ea quae ...,Q57561|Q94787045|Q223433|Q37907|Q41503145,Julius Plücker|Julius Löcker|PL/I|.pl|Daniel J...,0.02|-0.84|-0.86|-0.86|-0.87


## CleanUp temporary files

In [36]:
shutil.rmtree(temp_dir)