In [37]:
import pandas as pd
import os
import glob
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import sklearn.metrics
from collections import defaultdict
import shutil
import pickle

pd.reset_option('all')


: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.



In [41]:
input_file_path = '/Users/amandeep/Github/wikidata-wikifier/wikifier/sample_files/cricketers.csv'
wikify_column_name = "cricketers"
output_path = '/Users/amandeep/Github/wikidata-wikifier/wikifier/sample_files/output'
es_index = 'wikidatadwd-augmented-02'
es_url = 'http://ckg06:9200'

In [42]:
temp_dir = f'{output_path}/temp'

In [48]:
!mkdir -p $output_path
!mkdir -p $temp_dir

In [49]:
#intermediate files
canonical = f'{temp_dir}/canonical.csv'
candidates = f"{temp_dir}/candidates.csv"
feature_votes = f"{temp_dir}/feature_votes.csv"
score_file = f"{temp_dir}/scores.csv"
model_name = 'rf_tuned_ranking.pkl'

embedding_file = f'{temp_dir}/graph_embedding_complex.tsv'
aux_field = 'graph_embedding_complex'
final_score = f'{temp_dir}/final_score.csv'
top_k_file = f"{temp_dir}/topk-{table_name}" 
final_output = f"{output_path}/linked-{table_name}" 

## Peak at the input file

In [50]:
pd.read_csv(input_file_path)

Unnamed: 0,cricketers,teams,weight,dob
0,Virat Kohli,royal challengers bangalore,152,5/11/88
1,Tendulkar,mumbai indians,137,24/04/1973
2,Dhoni,chennai super kings,154,7/7/81
3,Jasprit Bumrah,mumbai indians,154,6/12/93
4,Ajinkya Rahane,rajasthan royals,134,6/6/88
5,Rohit Sharma,mumbai indians,159,30/04/1987
6,Bhuvneshwar Kumar,deccan chargers,154,5/2/90
7,Ravindra Jadeja,chennai super kings,132,6/12/88
8,Rishabh Pant,delhi capitals,136,4/8/97
9,Shikhar Dhawan,delhi capitals,157,5/12/85


## Canonicalize

In [51]:
!tl canonicalize \
-c "$wikify_column_name" \
--add-context \
{input_file_path} > {canonical}

In [52]:
pd.read_csv(canonical, nrows = 10)

Unnamed: 0,column,row,label,context
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88
1,0,1,Tendulkar,mumbai indians|137|24/04/1973
2,0,2,Dhoni,chennai super kings|154|7/7/81
3,0,3,Jasprit Bumrah,mumbai indians|154|6/12/93
4,0,4,Ajinkya Rahane,rajasthan royals|134|6/6/88
5,0,5,Rohit Sharma,mumbai indians|159|30/04/1987
6,0,6,Bhuvneshwar Kumar,deccan chargers|154|5/2/90
7,0,7,Ravindra Jadeja,chennai super kings|132|6/12/88
8,0,8,Rishabh Pant,delhi capitals|136|4/8/97
9,0,9,Shikhar Dhawan,delhi capitals|157|5/12/85


## Candidate Generation

In [53]:
%%time
!tl clean -c label -o label_clean {canonical} / \
--url $es_url --index $es_index \
get-fuzzy-augmented-matches -c label_clean \
--auxiliary-fields {aux_field} \
--auxiliary-folder $temp_dir / \
--url $es_url --index $es_index \
get-exact-matches -c label_clean \
--auxiliary-fields {aux_field} \
--auxiliary-folder {temp_dir} > {candidates}

CPU times: user 455 ms, sys: 154 ms, total: 609 ms
Wall time: 22 s


In [54]:
for field in aux_field.split(','):
    aux_list = []
    for f in glob.glob(f'{temp_dir}/*{aux_field}.tsv'):
        aux_list.append(pd.read_csv(f, sep='\t', dtype=object))
    aux_df = pd.concat(aux_list).drop_duplicates(subset=['qnode']).rename(columns={aux_field: 'embedding'})
    aux_df.to_csv(f'{temp_dir}/{aux_field}.tsv', sep='\t', index=False)

In [55]:
pd.read_csv(candidates, nrows = 10).fillna("")

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Fraenzi Maegert-Kohli|Franziska Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628


## Feature Voting

In [56]:
%%time
!tl smallest-qnode-number {candidates} \
    / string-similarity -i --method monge_elkan:tokenizer=word -o monge_elkan \
    / string-similarity -i --method jaccard:tokenizer=word -c kg_descriptions context -o des_cont_jaccard \
    / string-similarity -i --method jaro_winkler -o jaro_winkler \
    / feature-voting -c "pagerank,smallest_qnode_number,monge_elkan,des_cont_jaccard" > {feature_votes}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['smallest_qnode_number'] = tmp_df['smallest_qnode_number']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group['votes'] = tmp_df.loc[:, [f'vote_{ft}' for ft in feature_col_names]].sum(axis=1)
CPU times: user 67.9 ms, sys: 37.1 ms, total: 105 ms
Wall time: 4.61 s


In [57]:
pd.read_csv(feature_votes, nrows = 10).fillna("")

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385,0,1.0,0.0,1.0,1
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463,0,0.733333,0.0,0.569697,0
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463,0,0.5,0.0,0.55711,0
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134,0,0.5,0.0,0.890909,0
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416,0,0.483333,0.0,0.857576,0
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405,0,0.0,0.0,0.0,0
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748,0,0.5,0.0,0.0,0
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Fraenzi Maegert-Kohli|Franziska Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713,1,0.488889,0.0,0.365763,1
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628,0,0.685185,0.0,0.642677,0
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628,0,0.733333,0.0,0.865909,0


## Compute Embedding Score using Column Vector Strategy

In [58]:
!tl score-using-embedding $feature_votes \
--column-vector-strategy centroid-of-singletons \
-o graph-embedding-score --embedding-file $embedding_file \
> $score_file

Qnodes to lookup: 1260
Qnodes from file: 1241


In [59]:
pd.read_csv(score_file, nrows = 10).fillna("")

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,pagerank,retrieval_score,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,3.983031e-09,36.39385,0,1.0,0.0,1.0,1,0.886983
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,5.918546e-09,23.48463,0,0.733333,0.0,0.569697,0,0.469873
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),3.740191e-09,23.48463,0,0.5,0.0,0.55711,0,0.586232
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,0.0,20.582134,0,0.5,0.0,0.890909,0,0.0
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,6.890132e-09,20.520416,0,0.483333,0.0,0.857576,0,0.513397
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,3.539613e-09,19.623405,0,0.0,0.0,0.0,0,0.215469
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,3.539613e-09,19.601748,0,0.5,0.0,0.0,0,0.354691
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Fraenzi Maegert-Kohli|Franziska Kohli,fuzzy-augmented,Swiss snowboarder,3.539613e-09,19.233713,1,0.488889,0.0,0.365763,1,0.474148
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,3.539613e-09,19.010628,0,0.685185,0.0,0.642677,0,0.557991
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,3.539613e-09,19.010628,0,0.733333,0.0,0.865909,0,0.826771


## Generate Additional Features required for Model Prediction

In [60]:
## TODO: Need to add these features as cli commands in Table Linker

def create_singleton_feature(df):
        d = df[df['method'] == 'exact-match'].groupby(['column','row'])[['kg_id']].count()
        l = list(d[d['kg_id'] == 1].index)
        singleton_feat = []
        for i,row in df.iterrows():
            col_num,row_num = row['column'],row['row']
            if (col_num,row_num) in l:
                singleton_feat.append(1)
            else:
                singleton_feat.append(0)
        df['singleton'] = singleton_feat
        return df
        
def generate_reciprocal_rank(df):
    final_list = []
    grouped_obj = df.groupby(['row', 'column'])
    for cell in grouped_obj:
        reciprocal_rank = list(1/cell[1]['graph-embedding-score'].rank())
        cell[1]['reciprocal_rank'] = reciprocal_rank
        final_list.extend(cell[1].to_dict(orient='records'))
    odf = pd.DataFrame(final_list)
    return odf

features_df = pd.read_csv(score_file)
features_df = create_singleton_feature(features_df)
features_df['num_char'] = features_df['kg_labels'].apply(lambda x: len(x) if not(pd.isna(x)) else 0)
features_df['num_tokens'] = features_df['kg_labels'].apply(lambda x: len(x.split()) if not(pd.isna(x)) else 0)
features_df = generate_reciprocal_rank(features_df)
features_df.head().fillna("")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,smallest_qnode_number,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,...,0,1.0,0.0,1.0,1,0.886983,1,11,2,0.00995
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,...,0,0.733333,0.0,0.569697,0,0.469873,1,11,2,0.030303
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),...,0,0.5,0.0,0.55711,0,0.586232,1,13,2,0.015873
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,...,0,0.5,0.0,0.890909,0,0.0,1,5,1,0.666667
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,...,0,0.483333,0.0,0.857576,0,0.513397,1,6,1,0.020833


## Final Ranking Score Predicted by Model

In [61]:
features = ['pagerank','retrieval_score','monge_elkan',
            'des_cont_jaccard','jaro_winkler','graph-embedding-score',
            'singleton','num_char','num_tokens','reciprocal_rank']

model = pickle.load(open(model_name,'rb'))
data = features_df[features]
predicted_score = model.predict(data)
features_df['model_prediction'] = predicted_score
features_df.to_csv(final_score,index=False)



In [62]:
pd.read_csv(final_score, nrows=10).fillna("")

Unnamed: 0,column,row,label,context,label_clean,kg_id,kg_labels,kg_aliases,method,kg_descriptions,...,monge_elkan,des_cont_jaccard,jaro_winkler,votes,graph-embedding-score,singleton,num_char,num_tokens,reciprocal_rank,model_prediction
0,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q213854,Virat Kohli,Cheeku,fuzzy-augmented,Indian cricket player,...,1.0,0.0,1.0,1,0.886983,1,11,2,0.00995,0.986138
1,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q102354285,Marie Virat,,fuzzy-augmented,Ph. D. 2009,...,0.733333,0.0,0.569697,0,0.469873,1,11,2,0.030303,-0.67012
2,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16027751,Bernard Virat,,fuzzy-augmented,French biologist (1921-2003),...,0.5,0.0,0.55711,0,0.586232,1,13,2,0.015873,-0.675805
3,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q7907059,VIRAT,,fuzzy-augmented,,...,0.5,0.0,0.890909,0,0.0,1,5,1,0.666667,-0.944242
4,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q2978459,Virata,Virat,fuzzy-augmented,character from the epic Mahabharata,...,0.483333,0.0,0.857576,0,0.513397,1,6,1,0.020833,-0.753794
5,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16682735,,,fuzzy-augmented,,...,0.0,0.0,0.0,0,0.215469,1,0,0,0.333333,-1.0
6,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q6426050,Kohli,,fuzzy-augmented,,...,0.5,0.0,0.0,0,0.354691,1,5,1,0.2,-1.0
7,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q46251,Fränzi Mägert-Kohli,Fraenzi Maegert-Kohli|Franziska Kohli,fuzzy-augmented,Swiss snowboarder,...,0.488889,0.0,0.365763,1,0.474148,1,19,2,0.027778,-0.880722
8,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q16434086,Wirat Wachirarattanawong,,fuzzy-augmented,,...,0.685185,0.0,0.642677,0,0.557991,1,24,2,0.017544,-0.822762
9,0,0,Virat Kohli,royal challengers bangalore|152|5/11/88,Virat Kohli,Q19899153,Virat Singh,,fuzzy-augmented,Indian cricketer,...,0.733333,0.0,0.865909,0,0.826771,1,11,2,0.010204,-0.971697


## Get Top5 KG Links

In [63]:
!tl get-kg-links -c model_prediction -l label -k 3 $final_score > $top_k_file

In [64]:
pd.read_csv(top_k_file, nrows = 10)

Unnamed: 0,column,row,label,kg_id,kg_label,ranking_score
0,0,0,Virat Kohli,Q213854|Q7260793|Q17590127,Virat Kohli|Purab Kohli|Vishal Anand,0.99|-0.5|-0.54
1,0,1,Tendulkar,Q9488|Q22327439|Q24906003,Sachin Tendulkar|Arjun Tendulkar|Ramesh Tendulkar,0.13|0.06|-0.24
2,0,10,Cheteshwar Pujara,Q142613|Q105044382|Q87122863,Cheteshwar Pujara|List of international cricke...,0.95|-0.16|-0.82
3,0,11,Ishant Sharma,Q3522062|Q16012878|Q3527610,Ishant Sharma|Har Sharma|Gopal Sharma,0.99|0.02|-0.23
4,0,12,Mohammad Shami,Q7487531|Q101423667|Q59530987,Mohammed Shami|Mohammed Hassan Shami|Ghazi Shami,-0.05|-0.24|-0.43
5,0,2,Dhoni,Q5269736|Q5269735|Q101197030,Dhoni|Dhoni|Dhoni,0.26|0.22|0.07
6,0,3,Jasprit Bumrah,Q16227998|Q47495210|Q65233904,Jasprit Bumrah|Manjit Bumrah|Jaikrit Singh Rawat,0.99|-0.64|-0.72
7,0,4,Ajinkya Rahane,Q137669|Q82298759|Q100746902,Ajinkya Rahane|Ajinkya A Rane|Nene Ajinkya,0.97|-0.68|-0.74
8,0,5,Rohit Sharma,Q21622845|Q26436595|Q21622847,Rohit Sharma|Rohit Sharma|Rohit Sharma,0.75|0.75|0.73
9,0,6,Bhuvneshwar Kumar,Q2003153|Q4902309|Q23883912,Bhuvneshwar Kumar|Bhuvneshwari Kumari|Nishu Kumar,0.92|-0.19|-0.45


## Join to Produce final result

In [65]:
!tl join -f $input_file_path --csv -c ranking_score $top_k_file > $final_output

In [66]:
pd.read_csv(final_output).fillna("")

Unnamed: 0,cricketers,teams,weight,dob,kg_id,kg_label,score
0,Virat Kohli,royal challengers bangalore,152,5/11/88,Q213854|Q7260793|Q17590127,Virat Kohli|Purab Kohli|Vishal Anand,0.99|-0.5|-0.54
1,Tendulkar,mumbai indians,137,24/04/1973,Q9488|Q22327439|Q24906003,Sachin Tendulkar|Arjun Tendulkar|Ramesh Tendulkar,0.13|0.06|-0.24
2,Dhoni,chennai super kings,154,7/7/81,Q5269736|Q5269735|Q101197030,Dhoni|Dhoni|Dhoni,0.26|0.22|0.07
3,Jasprit Bumrah,mumbai indians,154,6/12/93,Q16227998|Q47495210|Q65233904,Jasprit Bumrah|Manjit Bumrah|Jaikrit Singh Rawat,0.99|-0.64|-0.72
4,Ajinkya Rahane,rajasthan royals,134,6/6/88,Q137669|Q82298759|Q100746902,Ajinkya Rahane|Ajinkya A Rane|Nene Ajinkya,0.97|-0.68|-0.74
5,Rohit Sharma,mumbai indians,159,30/04/1987,Q21622845|Q26436595|Q21622847,Rohit Sharma|Rohit Sharma|Rohit Sharma,0.75|0.75|0.73
6,Bhuvneshwar Kumar,deccan chargers,154,5/2/90,Q2003153|Q4902309|Q23883912,Bhuvneshwar Kumar|Bhuvneshwari Kumari|Nishu Kumar,0.92|-0.19|-0.45
7,Ravindra Jadeja,chennai super kings,132,6/12/88,Q2721457|Q7286041|Q95653584,Ravindra Jadeja|Rajendrasinhji Jadeja|Ravindra...,0.97|-0.39|-0.39
8,Rishabh Pant,delhi capitals,136,4/8/97,Q21622311|Q8046520|Q24572636,Rishabh Pant|Yadav Pant|Prakash Pant,0.99|-0.47|-0.55
9,Shikhar Dhawan,delhi capitals,157,5/12/85,Q7487024|Q53500200|Q48359296,Shikhar Dhawan|Anuj Dhawan|Vijay Dhawan,0.99|-0.54|-0.56


## Clean up temporary files

In [67]:
shutil.rmtree(temp_dir)