In [27]:
import pandas as pd
import os
import glob
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import sklearn.metrics
from collections import defaultdict
import shutil
import pickle

pd.reset_option('all')

As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead.

: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.



In [28]:
table = '/Users/rijulvohra/Documents/work/Novartis-ISI/table_linker_pipeline/'
table_name = 'cricketers.csv'
wikify_column_name = "cricketers"

In [29]:
#intermediate files
canonical = 'canonical.csv'
candidates = "candidates.csv"
feature_votes = "feature_votes.csv"
score_file = "scores.csv"
model_name = 'rf_tuned_ranking.pkl'
temp_dir = os.path.join(os.path.join(table,'temp'))
table_path = os.path.join(table,table_name)
if not(os.path.exists(temp_dir)):
    os.makedirs(temp_dir)
embedding_file = os.path.join(temp_dir,'graph_embedding_complex.tsv')

## Canonicalize

In [30]:
canonical_file = os.path.join(temp_dir,canonical)
!tl canonicalize -c "$wikify_column_name" --add-context $table_path > $canonical_file

In [31]:
pd.read_csv(canonical_file, nrows = 10)

Unnamed: 0,column,row,label,context
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88
1,0,1,Tendulkar,mumbai indians|137|24/04/1973
2,0,2,Dhoni,chennai super kings|154|7/7/81
3,0,3,Jasprit Bumrah,mumbai indians|154|6/12/93
4,0,4,Ajinkya Rahane,rajasthan royals|134|6/6/88
5,0,5,Rohit Sharma,mumbai indians|159|30/04/1987
6,0,6,Bhuvneshwar Kumar,deccan chargers|154|5/2/90
7,0,7,Ravindra Jadeja,chennai super kings|132|6/12/88
8,0,8,Rishabh Pant,delhi capitals|136|4/8/97
9,0,9,Shikhar Dhawan,delhi capitals|157|5/12/85


## Candidate Generation

In [32]:
%%time
candidates_file = os.path.join(temp_dir,candidates)
aux_field = 'graph_embedding_complex'
!tl clean -c label -o label_clean $canonical_file \
        / --url http://ckg06:9200 --index wikidatadwd-augmented-01 get-fuzzy-augmented-matches -c label_clean \
        --auxiliary-fields $aux_field \
        --auxiliary-folder $temp_dir \
        / --url http://ckg06:9200 --index wikidatadwd-augmented-01 get-exact-matches \
        -c label_clean --auxiliary-fields $aux_field \
        --auxiliary-folder $temp_dir > $candidates_file
                
for field in aux_field.split(','):
    aux_list = []
    for f in glob.glob(f'{temp_dir}/*{aux_field}.tsv'):
        aux_list.append(pd.read_csv(f, sep='\t', dtype=object))
    aux_df = pd.concat(aux_list).drop_duplicates(subset=['qnode']).rename(columns={aux_field: 'embedding'})
    aux_df.to_csv(f'{temp_dir}/{aux_field}.tsv', sep='\t', index=False)

CPU times: user 936 ms, sys: 345 ms, total: 1.28 s
Wall time: 26.7 s


In [33]:
pd.read_csv(candidates_file,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Franziska Kohli|Fraenzi Maegert-Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628


## Feature Voting

In [34]:
%%time
feature_votes_file = os.path.join(temp_dir,feature_votes)
!tl smallest-qnode-number $candidates_file \
    / string-similarity -i --method monge_elkan:tokenizer=word -o monge_elkan \
    / string-similarity -i --method jaccard:tokenizer=word -c kg_descriptions context -o des_cont_jaccard \
    / string-similarity -i --method jaro_winkler -o jaro_winkler \
    / feature-voting -c "pagerank,smallest_qnode_number,monge_elkan,des_cont_jaccard" > $feature_votes_file

CPU times: user 66.7 ms, sys: 31 ms, total: 97.7 ms
Wall time: 4.29 s


In [35]:
pd.read_csv(feature_votes_file,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385,0,1.0,0.0,1.0,1
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463,0,0.733333,0.0,0.569697,0
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463,0,0.5,0.0,0.55711,0
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134,0,0.5,0.0,0.890909,0
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416,0,0.483333,0.0,0.857576,0
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405,0,0.0,0.0,0.0,0
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748,0,0.5,0.0,0.0,0
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Franziska Kohli|Fraenzi Maegert-Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713,1,0.488889,0.0,0.365763,1
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628,0,0.685185,0.0,0.642677,0
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628,0,0.733333,0.0,0.865909,0


## Compute Embedding Score using Column Vector Strategy

In [36]:
%%time
score = os.path.join(temp_dir,score_file)
!tl score-using-embedding $feature_votes_file --column-vector-strategy centroid-of-singletons \
-o graph-embedding-score --embedding-file $embedding_file \
> $score

Qnodes to lookup: 1260
Qnodes from file: 1241
CPU times: user 19.5 ms, sys: 16.2 ms, total: 35.7 ms
Wall time: 1.2 s


In [37]:
pd.read_csv(score,nrows = 10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385,0,1.0,0.0,1.0,1,0.886983
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463,0,0.733333,0.0,0.569697,0,0.469873
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463,0,0.5,0.0,0.55711,0,0.586232
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134,0,0.5,0.0,0.890909,0,0.0
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416,0,0.483333,0.0,0.857576,0,0.513397
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405,0,0.0,0.0,0.0,0,0.215469
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748,0,0.5,0.0,0.0,0,0.354691
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Franziska Kohli|Fraenzi Maegert-Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713,1,0.488889,0.0,0.365763,1,0.474148
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628,0,0.685185,0.0,0.642677,0,0.557991
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628,0,0.733333,0.0,0.865909,0,0.826771


## Generate Additional Features required for Model Prediction

In [38]:
## TODO: Need to add these features as cli commands in Table Linker

def create_singleton_feature(df):
        d = df[df['method'] == 'exact-match'].groupby(['column','row'])[['kg_id']].count()
        l = list(d[d['kg_id'] == 1].index)
        singleton_feat = []
        for i,row in df.iterrows():
            col_num,row_num = row['column'],row['row']
            if (col_num,row_num) in l:
                singleton_feat.append(1)
            else:
                singleton_feat.append(0)
        df['singleton'] = singleton_feat
        return df
        
def generate_reciprocal_rank(df):
    final_list = []
    grouped_obj = df.groupby(['row', 'column'])
    for cell in grouped_obj:
        reciprocal_rank = list(1/cell[1]['graph-embedding-score'].rank())
        cell[1]['reciprocal_rank'] = reciprocal_rank
        final_list.extend(cell[1].to_dict(orient='records'))
    odf = pd.DataFrame(final_list)
    return odf

features_df = pd.read_csv(score)
features_df = create_singleton_feature(features_df)
features_df['num_char'] = features_df['kg_labels'].apply(lambda x: len(x) if not(pd.isna(x)) else 0)
features_df['num_tokens'] = features_df['kg_labels'].apply(lambda x: len(x.split()) if not(pd.isna(x)) else 0)
features_df = generate_reciprocal_rank(features_df)
features_df.head()

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,...,0,1.0,0.0,1.0,1,0.886983,1,11,2,0.00995
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,...,0,0.733333,0.0,0.569697,0,0.469873,1,11,2,0.029412
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),...,0,0.5,0.0,0.55711,0,0.586232,1,13,2,0.015625
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,...,0,0.5,0.0,0.890909,0,0.0,1,5,1,0.666667
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,...,0,0.483333,0.0,0.857576,0,0.513397,1,6,1,0.02


## Final Ranking Score Predicted by Model

In [39]:
features = ['pagerank','retrieval_score','monge_elkan',
            'des_cont_jaccard','jaro_winkler','graph-embedding-score',
            'singleton','num_char','num_tokens','reciprocal_rank']
final_score = os.path.join(temp_dir,'final_score.csv')
model = pickle.load(open(model_name,'rb'))
data = features_df[features]
predicted_score = model.predict(data)
features_df['model_prediction'] = predicted_score
features_df.to_csv(final_score,index=False)

In [40]:
pd.read_csv(final_score,nrows=10)

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank,model_prediction
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,...,1.0,0.0,1.0,1,0.886983,1,11,2,0.00995,0.986138
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,...,0.733333,0.0,0.569697,0,0.469873,1,11,2,0.029412,-0.67212
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),...,0.5,0.0,0.55711,0,0.586232,1,13,2,0.015625,-0.675805
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,...,0.5,0.0,0.890909,0,0.0,1,5,1,0.666667,-0.944242
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,...,0.483333,0.0,0.857576,0,0.513397,1,6,1,0.02,-0.75146
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,...,0.0,0.0,0.0,0,0.215469,1,0,0,0.333333,-1.0
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,...,0.5,0.0,0.0,0,0.354691,1,5,1,0.2,-1.0
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Franziska Kohli|Fraenzi Maegert-Kohli,fuzzy-augmented,Swiss snowboarder,...,0.488889,0.0,0.365763,1,0.474148,1,19,2,0.026316,-0.880722
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,...,0.685185,0.0,0.642677,0,0.557991,1,24,2,0.017241,-0.822762
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,...,0.733333,0.0,0.865909,0,0.826771,1,11,2,0.010204,-0.971697


## Get Top5 KG Links

In [41]:
linked_table_name = "topk-" + table_name
output_path = os.path.join(temp_dir,linked_table_name)
!tl get-kg-links -c model_prediction -l label -k 5 $final_score > $output_path
pd.read_csv(output_path, nrows = 10)

Unnamed: 0,column,row,label,kg_id,kg_label,ranking_score
0,0,0,Virat Kohli,Q213854|Q64003869|Q55644101|Q65228712|Q87403146,Virat Kohli|Naman Kohli|Kohli|Sumit Kohli|Raji...,0.99|-1.0|-1.0|-1.0|-1.0
1,0,1,Tendulkar,Q9488|Q22327439|Q2672321|Q49737582|Q91748675,Sachin Tendulkar|Arjun Tendulkar|Diospyros mel...,0.13|0.06|-1.0|-0.99|-0.99
2,0,10,Cheteshwar Pujara,Q142613|Q102379497|Q31517550|Q27194688|Q23873081,Cheteshwar Pujara|Shishir Pujala|Cerro Pucara|...,0.95|-1.0|-1.0|-1.0|-1.0
3,0,11,Ishant Sharma,Q3522062|Q16012878|Q23945357|Q2717698|Q22713152,Ishant Sharma|Har Sharma|Ishani|Lphant|380607 ...,0.99|0.02|-1.0|-1.0|-1.0
4,0,12,Mohammad Shami,Q7487535|Q58176303|Q7496860|Q4519997|Q14134085,"Shami Hospital|Rashid Shami Suwaid|Shimi, Iran...",-1.0|-1.0|-1.0|-1.0|-1.0
5,0,2,Dhoni,Q5269736|Q5269735|Q101197030|Q8560098|Q8069508,Dhoni|Dhoni|Dhoni|Dionisio Rodríguez Martín|Ze...,0.26|0.22|0.07|-1.0|-1.0
6,0,3,Jasprit Bumrah,Q16227998|Q4940541|Q182344|Q12365097|Q9182019,Jasprit Bumrah|Bombay Burmah Trading Corporati...,0.99|-1.0|-1.0|-1.0|-1.0
7,0,4,Ajinkya Rahane,Q137669|Q100357731|Q7224757|Q23880018|Q7599139,Ajinkya Rahane|Rabahne|Raphanus sativus subsp....,0.97|-1.0|-1.0|-1.0|-1.0
8,0,5,Rohit Sharma,Q21622845|Q26436595|Q21622847|Q26837282|Q65233196,Rohit Sharma|Rohit Sharma|Rohit Sharma|Rohit S...,0.75|0.75|0.73|0.63|0.61
9,0,6,Bhuvneshwar Kumar,Q2003153|Q613943|Q7144196|Q16278100|Q477265,Bhuvneshwar Kumar|Deyr el-Qamar|Patal Bhuvanes...,0.92|-1.0|-1.0|-1.0|-1.0


## Join to Produce final result

In [42]:
final_output = 'linked-' + table_name
path = os.path.join(table,final_output)
!tl join -f $table_path --csv -c ranking_score $output_path > $path

In [43]:
df = pd.read_csv(path)
df

Unnamed: 0,cricketers,teams,weight,dob,kg_id,kg_label,score
0,Virat Kohli,royal challengers bangalore,152,5/11/88,Q213854|Q64003869|Q55644101|Q65228712|Q87403146,Virat Kohli|Naman Kohli|Kohli|Sumit Kohli|Raji...,0.99|-1.0|-1.0|-1.0|-1.0
1,Tendulkar,mumbai indians,137,24/04/1973,Q9488|Q22327439|Q2672321|Q49737582|Q91748675,Sachin Tendulkar|Arjun Tendulkar|Diospyros mel...,0.13|0.06|-1.0|-0.99|-0.99
2,Dhoni,chennai super kings,154,7/7/81,Q5269736|Q5269735|Q101197030|Q8560098|Q8069508,Dhoni|Dhoni|Dhoni|Dionisio Rodríguez Martín|Ze...,0.26|0.22|0.07|-1.0|-1.0
3,Jasprit Bumrah,mumbai indians,154,6/12/93,Q16227998|Q4940541|Q182344|Q12365097|Q9182019,Jasprit Bumrah|Bombay Burmah Trading Corporati...,0.99|-1.0|-1.0|-1.0|-1.0
4,Ajinkya Rahane,rajasthan royals,134,6/6/88,Q137669|Q100357731|Q7224757|Q23880018|Q7599139,Ajinkya Rahane|Rabahne|Raphanus sativus subsp....,0.97|-1.0|-1.0|-1.0|-1.0
5,Rohit Sharma,mumbai indians,159,30/04/1987,Q21622845|Q26436595|Q21622847|Q26837282|Q65233196,Rohit Sharma|Rohit Sharma|Rohit Sharma|Rohit S...,0.75|0.75|0.73|0.63|0.61
6,Bhuvneshwar Kumar,deccan chargers,154,5/2/90,Q2003153|Q613943|Q7144196|Q16278100|Q477265,Bhuvneshwar Kumar|Deyr el-Qamar|Patal Bhuvanes...,0.92|-1.0|-1.0|-1.0|-1.0
7,Ravindra Jadeja,chennai super kings,132,6/12/88,Q2721457|Q3350524|Q37031669|Q7296726|Q21366217,Ravindra Jadeja|Rabindra Bharati University|Ra...,0.97|-1.0|-1.0|-1.0|-1.0
8,Rishabh Pant,delhi capitals,136,4/8/97,Q21622311|Q20685078|Q2049824|Q21622313|Q7336016,Rishabh Pant|Nilamber Pant|Pant|Tanay Pant|Ris...,0.99|-1.0|-1.0|-1.0|-1.0
9,Shikhar Dhawan,delhi capitals,157,5/12/85,Q7487024|Q4764634|Q16196258|Q28135727|Q90450762,Shikhar Dhawan|Anil Dhawan|Arjun Shekhar|The D...,0.99|-1.0|-1.0|-1.0|-1.0


## CleanUp temporary files

In [44]:
shutil.rmtree(temp_dir)