In [183]:
import pandas as pd
import spacy

from tqdm import tqdm

Models: https://spacy.io/models/en#en_core_web_md

Entities: https://spacy.io/api/annotation#section-named-entities

In [2]:
nlp = spacy.load('en_core_web_md')

In [179]:
NE = {
    "B-EVENT": "B-event",
    "I-EVENT": "I-event",
    "B-LOC": "B-geo",
    "I-LOC": "I-geo",
    "B-GPE": "B-geo",
    "I-GPE": "I-geo",
    "B-PRODUCT": "B-obj",
    "I-PRODUCT": "I-obj",
    "B-WORK_OF_ART": "B-obj",
    "I-WORK_OF_ART": "I-obj",
    "B-ORG": "B-org",
    "I-ORG": "I-org",
    "B-PERSON": "B-per",
    "I-PERSON": "I-per",
    "B-TIME": "B-time",
    "I-TIME": "I-time",
    "B-DATE": "B-time",
    "I-DATE": "O",
    "B-NORP": "B-gpe",
    "I-NORP": "I-gpe",
    "B-FAC": "O",
    "I-FAC": "O",
    "B-LAW": "O",
    "I-LAW": "O",
    "B-LANGUAGE": "O",
    "I-LANGUAGE": "O",
    "B-PERCENT": "O",
    "I-PERCENT": "O",
    "B-MONEY": "O",
    "I-MONEY": "O",
    "B-QUANTITY": "O",
    "I-QUANTITY": "O",
    "B-ORDINAL": "O",
    "I-ORDINAL": "O",
    "B-CARDINAL": "O",
    "I-CARDINAL": "O",
    "O": "O"
}

In [4]:
df = pd.read_csv("../datasets/TrainNER.csv", sep=';',encoding='cp1250', names=["Sentence","Word","Tag","Category"])

In [5]:
df.head(10)

Unnamed: 0,Sentence,Word,Tag,Category
0,Sentence: 1,President,NNP,B-per
1,,Karzai,NNP,I-per
2,,thanked,VBD,O
3,,his,PRP$,O
4,,allies,NNS,O
5,,for,IN,O
6,,their,PRP$,O
7,,help,NN,O
8,,in,IN,O
9,,battling,VBG,O


In [6]:
df['OriginalPrediction'] = "O"

In [27]:
indexes = df[df['Sentence'].isnull() == False].index.values.tolist()
indexes.append(df.shape[0]-1)
print(len(indexes))

9001


In [28]:
for i in tqdm(range(len(indexes)-1)):
    start = indexes[i]
    end = indexes[i+1]
    init_tokens = df[(df.index >= start) & (df.index < end)]['Word'].values.tolist()
    s = ' '.join(init_tokens)
    doc = nlp(s)
    ents = []
    for ent in doc.ents:
        l = ent.text.split(' ')
        if len(l) == 1:
            ents.append([ent.text, 'B-' + ent.label_])
        else:
            ents.append([l[0], 'B-' + ent.label_])
            for i in range(1,len(l)):
                ents.append([l[i], 'I-' + ent.label_])
    i = 0
    for j in range(len(init_tokens)):
        if i < len(ents) and init_tokens[j] == ents[i][0]:
            df.loc[df.index == start+j, 'OriginalPrediction'] = ents[i][1]
            i += 1
    df[(df.index >= start) & (df.index < end)]



  0%|          | 0/5 [00:00<?, ?it/s][A[A

 20%|██        | 1/5 [00:00<00:00,  6.97it/s][A[A

 60%|██████    | 3/5 [00:00<00:00,  6.79it/s][A[A

100%|██████████| 5/5 [00:00<00:00,  8.15it/s][A[A

[A[A

In [30]:
df.tail()

Unnamed: 0,Sentence,Word,Tag,Category,OriginalPrediction
196641,,and,CC,O,O
196642,,foreign,JJ,O,O
196643,,investment,NN,O,O
196644,,.,.,O,O
196645,,,,,O


In [77]:
df.to_csv("../datasets/TrainNER_Pred", sep=';', encoding='utf-8')

In [180]:
df['Prediction'] = df.apply (lambda row: NE[row['OriginalPrediction']],axis=1)

In [181]:
results = {
    "Size": df.shape[0],
    "CorrectPredictionsWithO": df.loc[df['Category'] == df['Prediction']].shape[0],
    "CorrectEntity": df.loc[(df['Category'] != "O") & (df['Category'] == df['Prediction'])].shape[0],
    "AllEntities": df.loc[df['Category'] != "O"].shape[0],
    "AllPredictions": df.loc[df['Prediction'] != "O"].shape[0]
}
results

{'Size': 196646,
 'CorrectPredictionsWithO': 180117,
 'CorrectEntity': 17973,
 'AllEntities': 30036,
 'AllPredictions': 29225}

In [182]:
print("Absoulte accuracy: {0:.2f}%".format(results["CorrectPredictionsWithO"] / results["Size"] * 100))
print("Entity based accuracy: {0:.2f}%".format(results["CorrectEntity"] / results["AllEntities"] * 100))
print("Predicted entity based accuracy: {0:.2f}%".format(results["CorrectEntity"] / results["AllPredictions"] * 100))

Absoulte accuracy: 91.59%
Entity based accuracy: 59.84%
Predicted entity based accuracy: 61.50%


In [113]:
results["CorrectPredictionsWithO"] / results["Size"]

0.8725323678081426

In [76]:
df.loc[(df['Prediction'] != "O") & (df['Category'] != "O") & (df['Category'] != df['Prediction'])]

Unnamed: 0,Sentence,Word,Tag,Category,OriginalPrediction,Prediction,SentenceID
1,,Karzai,NNP,I-per,B-GPE,B-gpe,0
48,,Afghanistan,NNP,I-geo,B-GPE,B-gpe,2
87,Sentence: 5,Burma,NNP,B-geo,B-GPE,B-gpe,4
96,,Khin,NNP,I-per,B-PERSON,B-per,4
131,,Thura,NNP,I-org,B-PERSON,B-per,5
132,,Shwe,NNP,I-org,I-PERSON,I-per,5
133,,Mahn,NNP,I-org,I-PERSON,I-per,5
155,Sentence: 7,Thura,NNP,B-org,B-PERSON,B-per,6
156,,Shwe,NNP,I-org,I-PERSON,I-per,6
157,,Mahn,NNP,I-org,I-PERSON,I-per,6


In [75]:
for i in tqdm(range(len(indexes)-1)):
    start = indexes[i]
    end = indexes[i+1]
    for index in range(start,end):
        df.loc[df.index == index, 'SentenceID'] = i






  0%|          | 0/9000 [00:00<?, ?it/s][A[A[A[A[A




  0%|          | 1/9000 [00:00<17:38,  8.50it/s][A[A[A[A[A




  0%|          | 2/9000 [00:00<21:46,  6.89it/s][A[A[A[A[A




  0%|          | 3/9000 [00:00<25:37,  5.85it/s][A[A[A[A[A




  0%|          | 4/9000 [00:00<22:38,  6.62it/s][A[A[A[A[A




  0%|          | 5/9000 [00:00<25:25,  5.89it/s][A[A[A[A[A




  0%|          | 6/9000 [00:01<29:44,  5.04it/s][A[A[A[A[A




  0%|          | 7/9000 [00:01<28:04,  5.34it/s][A[A[A[A[A




  0%|          | 8/9000 [00:01<25:33,  5.87it/s][A[A[A[A[A




  0%|          | 9/9000 [00:01<22:47,  6.57it/s][A[A[A[A[A




  0%|          | 10/9000 [00:01<21:15,  7.05it/s][A[A[A[A[A




  0%|          | 11/9000 [00:01<20:27,  7.33it/s][A[A[A[A[A




  0%|          | 12/9000 [00:01<22:11,  6.75it/s][A[A[A[A[A




  0%|          | 13/9000 [00:02<22:31,  6.65it/s][A[A[A[A[A




  0%|          | 14/9000 [00:02<21:15,  7.05it/

  3%|▎         | 271/9000 [00:41<22:57,  6.34it/s][A[A[A[A[A




  3%|▎         | 272/9000 [00:42<23:18,  6.24it/s][A[A[A[A[A




  3%|▎         | 273/9000 [00:42<21:34,  6.74it/s][A[A[A[A[A




  3%|▎         | 274/9000 [00:42<20:01,  7.26it/s][A[A[A[A[A




  3%|▎         | 275/9000 [00:42<20:27,  7.11it/s][A[A[A[A[A




  3%|▎         | 276/9000 [00:42<19:09,  7.59it/s][A[A[A[A[A




  3%|▎         | 277/9000 [00:42<21:18,  6.82it/s][A[A[A[A[A




  3%|▎         | 278/9000 [00:42<22:42,  6.40it/s][A[A[A[A[A




  3%|▎         | 279/9000 [00:43<22:39,  6.41it/s][A[A[A[A[A




  3%|▎         | 280/9000 [00:43<22:26,  6.48it/s][A[A[A[A[A




  3%|▎         | 281/9000 [00:43<22:32,  6.45it/s][A[A[A[A[A




  3%|▎         | 282/9000 [00:43<22:32,  6.45it/s][A[A[A[A[A




  3%|▎         | 284/9000 [00:43<21:13,  6.85it/s][A[A[A[A[A




  3%|▎         | 285/9000 [00:43<21:21,  6.80it/s][A[A[A[A[A




  3%|▎         | 286

  6%|▌         | 537/9000 [01:21<22:41,  6.21it/s][A[A[A[A[A




  6%|▌         | 538/9000 [01:22<22:36,  6.24it/s][A[A[A[A[A




  6%|▌         | 539/9000 [01:22<22:47,  6.19it/s][A[A[A[A[A




  6%|▌         | 540/9000 [01:22<24:10,  5.83it/s][A[A[A[A[A




  6%|▌         | 541/9000 [01:22<24:53,  5.66it/s][A[A[A[A[A




  6%|▌         | 542/9000 [01:22<23:18,  6.05it/s][A[A[A[A[A




  6%|▌         | 544/9000 [01:22<20:52,  6.75it/s][A[A[A[A[A




  6%|▌         | 545/9000 [01:23<20:31,  6.86it/s][A[A[A[A[A




  6%|▌         | 546/9000 [01:23<23:54,  5.90it/s][A[A[A[A[A




  6%|▌         | 547/9000 [01:23<26:25,  5.33it/s][A[A[A[A[A




  6%|▌         | 548/9000 [01:23<28:01,  5.03it/s][A[A[A[A[A




  6%|▌         | 549/9000 [01:23<24:17,  5.80it/s][A[A[A[A[A




  6%|▌         | 550/9000 [01:24<23:00,  6.12it/s][A[A[A[A[A




  6%|▌         | 551/9000 [01:24<24:16,  5.80it/s][A[A[A[A[A




  6%|▌         | 552

  9%|▉         | 807/9000 [02:06<26:57,  5.07it/s][A[A[A[A[A




  9%|▉         | 808/9000 [02:06<29:20,  4.65it/s][A[A[A[A[A




  9%|▉         | 809/9000 [02:06<26:07,  5.23it/s][A[A[A[A[A




  9%|▉         | 810/9000 [02:06<26:27,  5.16it/s][A[A[A[A[A




  9%|▉         | 811/9000 [02:06<24:12,  5.64it/s][A[A[A[A[A




  9%|▉         | 812/9000 [02:06<22:36,  6.03it/s][A[A[A[A[A




  9%|▉         | 813/9000 [02:07<21:09,  6.45it/s][A[A[A[A[A




  9%|▉         | 815/9000 [02:07<19:02,  7.16it/s][A[A[A[A[A




  9%|▉         | 816/9000 [02:07<20:07,  6.78it/s][A[A[A[A[A




  9%|▉         | 817/9000 [02:07<23:28,  5.81it/s][A[A[A[A[A




  9%|▉         | 818/9000 [02:07<23:05,  5.91it/s][A[A[A[A[A




  9%|▉         | 819/9000 [02:07<21:33,  6.33it/s][A[A[A[A[A




  9%|▉         | 820/9000 [02:08<23:26,  5.82it/s][A[A[A[A[A




  9%|▉         | 822/9000 [02:08<22:22,  6.09it/s][A[A[A[A[A




  9%|▉         | 824

 12%|█▏        | 1070/9000 [02:47<22:06,  5.98it/s][A[A[A[A[A




 12%|█▏        | 1071/9000 [02:47<20:40,  6.39it/s][A[A[A[A[A




 12%|█▏        | 1073/9000 [02:47<20:19,  6.50it/s][A[A[A[A[A




 12%|█▏        | 1074/9000 [02:48<21:12,  6.23it/s][A[A[A[A[A




 12%|█▏        | 1075/9000 [02:48<21:52,  6.04it/s][A[A[A[A[A




 12%|█▏        | 1076/9000 [02:48<21:55,  6.02it/s][A[A[A[A[A




 12%|█▏        | 1077/9000 [02:48<21:32,  6.13it/s][A[A[A[A[A




 12%|█▏        | 1078/9000 [02:48<19:46,  6.67it/s][A[A[A[A[A




 12%|█▏        | 1079/9000 [02:48<20:57,  6.30it/s][A[A[A[A[A




 12%|█▏        | 1080/9000 [02:49<20:43,  6.37it/s][A[A[A[A[A




 12%|█▏        | 1081/9000 [02:49<23:19,  5.66it/s][A[A[A[A[A




 12%|█▏        | 1082/9000 [02:49<21:25,  6.16it/s][A[A[A[A[A




 12%|█▏        | 1083/9000 [02:49<20:29,  6.44it/s][A[A[A[A[A




 12%|█▏        | 1084/9000 [02:49<21:51,  6.03it/s][A[A[A[A[A




 12%|█

 15%|█▍        | 1323/9000 [03:31<26:57,  4.75it/s][A[A[A[A[A




 15%|█▍        | 1324/9000 [03:31<23:38,  5.41it/s][A[A[A[A[A




 15%|█▍        | 1325/9000 [03:31<23:05,  5.54it/s][A[A[A[A[A




 15%|█▍        | 1326/9000 [03:31<22:15,  5.75it/s][A[A[A[A[A




 15%|█▍        | 1328/9000 [03:31<19:01,  6.72it/s][A[A[A[A[A




 15%|█▍        | 1329/9000 [03:31<19:38,  6.51it/s][A[A[A[A[A




 15%|█▍        | 1331/9000 [03:32<18:52,  6.77it/s][A[A[A[A[A




 15%|█▍        | 1332/9000 [03:32<21:08,  6.04it/s][A[A[A[A[A




 15%|█▍        | 1333/9000 [03:32<19:44,  6.47it/s][A[A[A[A[A




 15%|█▍        | 1334/9000 [03:32<21:46,  5.87it/s][A[A[A[A[A




 15%|█▍        | 1335/9000 [03:32<20:26,  6.25it/s][A[A[A[A[A




 15%|█▍        | 1336/9000 [03:33<21:43,  5.88it/s][A[A[A[A[A




 15%|█▍        | 1338/9000 [03:33<20:23,  6.26it/s][A[A[A[A[A




 15%|█▍        | 1339/9000 [03:33<23:48,  5.36it/s][A[A[A[A[A




 15%|█

 18%|█▊        | 1578/9000 [04:14<22:11,  5.57it/s][A[A[A[A[A




 18%|█▊        | 1579/9000 [04:14<19:37,  6.30it/s][A[A[A[A[A




 18%|█▊        | 1581/9000 [04:14<19:18,  6.40it/s][A[A[A[A[A




 18%|█▊        | 1582/9000 [04:14<19:26,  6.36it/s][A[A[A[A[A




 18%|█▊        | 1583/9000 [04:15<21:13,  5.82it/s][A[A[A[A[A




 18%|█▊        | 1584/9000 [04:15<21:40,  5.70it/s][A[A[A[A[A




 18%|█▊        | 1585/9000 [04:15<18:57,  6.52it/s][A[A[A[A[A




 18%|█▊        | 1586/9000 [04:15<20:43,  5.96it/s][A[A[A[A[A




 18%|█▊        | 1587/9000 [04:15<19:40,  6.28it/s][A[A[A[A[A




 18%|█▊        | 1589/9000 [04:16<19:43,  6.26it/s][A[A[A[A[A




 18%|█▊        | 1590/9000 [04:16<20:27,  6.03it/s][A[A[A[A[A




 18%|█▊        | 1592/9000 [04:16<18:24,  6.71it/s][A[A[A[A[A




 18%|█▊        | 1593/9000 [04:16<16:36,  7.44it/s][A[A[A[A[A




 18%|█▊        | 1595/9000 [04:16<13:29,  9.15it/s][A[A[A[A[A




 18%|█

 20%|██        | 1833/9000 [04:57<23:34,  5.07it/s][A[A[A[A[A




 20%|██        | 1834/9000 [04:57<22:26,  5.32it/s][A[A[A[A[A




 20%|██        | 1836/9000 [04:58<20:27,  5.83it/s][A[A[A[A[A




 20%|██        | 1837/9000 [04:58<22:36,  5.28it/s][A[A[A[A[A




 20%|██        | 1838/9000 [04:58<23:13,  5.14it/s][A[A[A[A[A




 20%|██        | 1839/9000 [04:58<20:11,  5.91it/s][A[A[A[A[A




 20%|██        | 1840/9000 [04:58<23:41,  5.04it/s][A[A[A[A[A




 20%|██        | 1841/9000 [04:58<21:08,  5.64it/s][A[A[A[A[A




 20%|██        | 1843/9000 [04:59<18:24,  6.48it/s][A[A[A[A[A




 20%|██        | 1844/9000 [04:59<18:49,  6.34it/s][A[A[A[A[A




 20%|██        | 1845/9000 [04:59<19:10,  6.22it/s][A[A[A[A[A




 21%|██        | 1846/9000 [04:59<18:42,  6.37it/s][A[A[A[A[A




 21%|██        | 1847/9000 [04:59<22:28,  5.31it/s][A[A[A[A[A




 21%|██        | 1848/9000 [05:00<28:59,  4.11it/s][A[A[A[A[A




 21%|█

 23%|██▎       | 2090/9000 [05:41<20:48,  5.54it/s][A[A[A[A[A




 23%|██▎       | 2092/9000 [05:41<20:03,  5.74it/s][A[A[A[A[A




 23%|██▎       | 2093/9000 [05:42<20:58,  5.49it/s][A[A[A[A[A




 23%|██▎       | 2094/9000 [05:42<19:59,  5.76it/s][A[A[A[A[A




 23%|██▎       | 2095/9000 [05:42<21:26,  5.37it/s][A[A[A[A[A




 23%|██▎       | 2097/9000 [05:42<18:23,  6.25it/s][A[A[A[A[A




 23%|██▎       | 2098/9000 [05:42<18:12,  6.32it/s][A[A[A[A[A




 23%|██▎       | 2099/9000 [05:43<16:49,  6.84it/s][A[A[A[A[A




 23%|██▎       | 2100/9000 [05:43<15:38,  7.35it/s][A[A[A[A[A




 23%|██▎       | 2101/9000 [05:43<16:32,  6.95it/s][A[A[A[A[A




 23%|██▎       | 2103/9000 [05:43<16:19,  7.04it/s][A[A[A[A[A




 23%|██▎       | 2104/9000 [05:43<16:44,  6.87it/s][A[A[A[A[A




 23%|██▎       | 2105/9000 [05:43<20:10,  5.70it/s][A[A[A[A[A




 23%|██▎       | 2106/9000 [05:44<19:04,  6.03it/s][A[A[A[A[A




 23%|█

 26%|██▌       | 2347/9000 [06:27<20:55,  5.30it/s][A[A[A[A[A




 26%|██▌       | 2348/9000 [06:27<22:06,  5.02it/s][A[A[A[A[A




 26%|██▌       | 2349/9000 [06:27<19:34,  5.66it/s][A[A[A[A[A




 26%|██▌       | 2350/9000 [06:27<18:59,  5.84it/s][A[A[A[A[A




 26%|██▌       | 2351/9000 [06:27<19:44,  5.61it/s][A[A[A[A[A




 26%|██▌       | 2352/9000 [06:27<18:41,  5.93it/s][A[A[A[A[A




 26%|██▌       | 2353/9000 [06:28<22:07,  5.01it/s][A[A[A[A[A




 26%|██▌       | 2354/9000 [06:28<24:09,  4.58it/s][A[A[A[A[A




 26%|██▌       | 2355/9000 [06:28<23:03,  4.80it/s][A[A[A[A[A




 26%|██▌       | 2357/9000 [06:28<20:37,  5.37it/s][A[A[A[A[A




 26%|██▌       | 2358/9000 [06:29<19:02,  5.81it/s][A[A[A[A[A




 26%|██▌       | 2359/9000 [06:29<18:28,  5.99it/s][A[A[A[A[A




 26%|██▌       | 2360/9000 [06:29<18:55,  5.85it/s][A[A[A[A[A




 26%|██▌       | 2361/9000 [06:29<18:32,  5.97it/s][A[A[A[A[A




 26%|█

 29%|██▉       | 2594/9000 [07:12<24:07,  4.43it/s][A[A[A[A[A




 29%|██▉       | 2595/9000 [07:12<22:00,  4.85it/s][A[A[A[A[A




 29%|██▉       | 2596/9000 [07:13<19:31,  5.47it/s][A[A[A[A[A




 29%|██▉       | 2597/9000 [07:13<17:15,  6.18it/s][A[A[A[A[A




 29%|██▉       | 2598/9000 [07:13<18:54,  5.64it/s][A[A[A[A[A




 29%|██▉       | 2599/9000 [07:13<21:45,  4.90it/s][A[A[A[A[A




 29%|██▉       | 2600/9000 [07:13<21:27,  4.97it/s][A[A[A[A[A




 29%|██▉       | 2601/9000 [07:14<21:37,  4.93it/s][A[A[A[A[A




 29%|██▉       | 2603/9000 [07:14<18:48,  5.67it/s][A[A[A[A[A




 29%|██▉       | 2604/9000 [07:14<19:27,  5.48it/s][A[A[A[A[A




 29%|██▉       | 2605/9000 [07:14<19:42,  5.41it/s][A[A[A[A[A




 29%|██▉       | 2606/9000 [07:14<20:17,  5.25it/s][A[A[A[A[A




 29%|██▉       | 2607/9000 [07:14<18:01,  5.91it/s][A[A[A[A[A




 29%|██▉       | 2608/9000 [07:15<19:46,  5.39it/s][A[A[A[A[A




 29%|█

 32%|███▏      | 2843/9000 [07:59<18:26,  5.57it/s][A[A[A[A[A




 32%|███▏      | 2844/9000 [07:59<18:34,  5.52it/s][A[A[A[A[A




 32%|███▏      | 2845/9000 [07:59<17:22,  5.90it/s][A[A[A[A[A




 32%|███▏      | 2846/9000 [07:59<16:19,  6.28it/s][A[A[A[A[A




 32%|███▏      | 2847/9000 [07:59<16:09,  6.35it/s][A[A[A[A[A




 32%|███▏      | 2848/9000 [07:59<16:39,  6.15it/s][A[A[A[A[A




 32%|███▏      | 2849/9000 [08:00<23:40,  4.33it/s][A[A[A[A[A




 32%|███▏      | 2851/9000 [08:00<20:13,  5.07it/s][A[A[A[A[A




 32%|███▏      | 2852/9000 [08:00<22:20,  4.59it/s][A[A[A[A[A




 32%|███▏      | 2853/9000 [08:01<23:25,  4.37it/s][A[A[A[A[A




 32%|███▏      | 2854/9000 [08:01<21:13,  4.82it/s][A[A[A[A[A




 32%|███▏      | 2855/9000 [08:01<18:43,  5.47it/s][A[A[A[A[A




 32%|███▏      | 2856/9000 [08:01<16:51,  6.07it/s][A[A[A[A[A




 32%|███▏      | 2857/9000 [08:01<16:51,  6.07it/s][A[A[A[A[A




 32%|█

 34%|███▍      | 3098/9000 [08:43<18:10,  5.41it/s][A[A[A[A[A




 34%|███▍      | 3099/9000 [08:43<18:19,  5.37it/s][A[A[A[A[A




 34%|███▍      | 3100/9000 [08:43<18:51,  5.22it/s][A[A[A[A[A




 34%|███▍      | 3101/9000 [08:43<18:08,  5.42it/s][A[A[A[A[A




 34%|███▍      | 3102/9000 [08:43<17:44,  5.54it/s][A[A[A[A[A




 34%|███▍      | 3103/9000 [08:44<17:33,  5.60it/s][A[A[A[A[A




 34%|███▍      | 3104/9000 [08:44<16:18,  6.03it/s][A[A[A[A[A




 34%|███▍      | 3105/9000 [08:44<15:09,  6.48it/s][A[A[A[A[A




 35%|███▍      | 3106/9000 [08:44<17:41,  5.55it/s][A[A[A[A[A




 35%|███▍      | 3107/9000 [08:44<19:36,  5.01it/s][A[A[A[A[A




 35%|███▍      | 3108/9000 [08:45<20:29,  4.79it/s][A[A[A[A[A




 35%|███▍      | 3109/9000 [08:45<20:40,  4.75it/s][A[A[A[A[A




 35%|███▍      | 3110/9000 [08:45<24:32,  4.00it/s][A[A[A[A[A




 35%|███▍      | 3111/9000 [08:45<21:48,  4.50it/s][A[A[A[A[A




 35%|█

 37%|███▋      | 3342/9000 [09:30<15:22,  6.13it/s][A[A[A[A[A




 37%|███▋      | 3343/9000 [09:30<15:22,  6.13it/s][A[A[A[A[A




 37%|███▋      | 3344/9000 [09:30<15:09,  6.22it/s][A[A[A[A[A




 37%|███▋      | 3346/9000 [09:31<13:57,  6.75it/s][A[A[A[A[A




 37%|███▋      | 3347/9000 [09:31<14:38,  6.43it/s][A[A[A[A[A




 37%|███▋      | 3348/9000 [09:31<14:07,  6.67it/s][A[A[A[A[A




 37%|███▋      | 3350/9000 [09:31<12:51,  7.32it/s][A[A[A[A[A




 37%|███▋      | 3351/9000 [09:31<16:04,  5.86it/s][A[A[A[A[A




 37%|███▋      | 3352/9000 [09:32<19:14,  4.89it/s][A[A[A[A[A




 37%|███▋      | 3353/9000 [09:32<17:49,  5.28it/s][A[A[A[A[A




 37%|███▋      | 3354/9000 [09:32<18:58,  4.96it/s][A[A[A[A[A




 37%|███▋      | 3355/9000 [09:32<16:32,  5.69it/s][A[A[A[A[A




 37%|███▋      | 3356/9000 [09:32<15:58,  5.89it/s][A[A[A[A[A




 37%|███▋      | 3357/9000 [09:33<17:59,  5.23it/s][A[A[A[A[A




 37%|█

 40%|███▉      | 3589/9000 [10:16<18:01,  5.00it/s][A[A[A[A[A




 40%|███▉      | 3590/9000 [10:17<20:22,  4.43it/s][A[A[A[A[A




 40%|███▉      | 3591/9000 [10:17<21:39,  4.16it/s][A[A[A[A[A




 40%|███▉      | 3592/9000 [10:17<21:54,  4.11it/s][A[A[A[A[A




 40%|███▉      | 3593/9000 [10:17<18:04,  4.99it/s][A[A[A[A[A




 40%|███▉      | 3594/9000 [10:18<19:07,  4.71it/s][A[A[A[A[A




 40%|███▉      | 3595/9000 [10:18<16:22,  5.50it/s][A[A[A[A[A




 40%|███▉      | 3597/9000 [10:18<14:04,  6.40it/s][A[A[A[A[A




 40%|███▉      | 3598/9000 [10:18<13:26,  6.70it/s][A[A[A[A[A




 40%|███▉      | 3599/9000 [10:18<15:29,  5.81it/s][A[A[A[A[A




 40%|████      | 3600/9000 [10:18<16:06,  5.59it/s][A[A[A[A[A




 40%|████      | 3601/9000 [10:19<15:01,  5.99it/s][A[A[A[A[A




 40%|████      | 3602/9000 [10:19<17:42,  5.08it/s][A[A[A[A[A




 40%|████      | 3603/9000 [10:19<20:49,  4.32it/s][A[A[A[A[A




 40%|█

 43%|████▎     | 3841/9000 [11:04<17:13,  4.99it/s][A[A[A[A[A




 43%|████▎     | 3842/9000 [11:04<15:44,  5.46it/s][A[A[A[A[A




 43%|████▎     | 3843/9000 [11:04<15:55,  5.40it/s][A[A[A[A[A




 43%|████▎     | 3844/9000 [11:05<15:31,  5.54it/s][A[A[A[A[A




 43%|████▎     | 3846/9000 [11:05<13:45,  6.25it/s][A[A[A[A[A




 43%|████▎     | 3847/9000 [11:05<13:57,  6.15it/s][A[A[A[A[A




 43%|████▎     | 3848/9000 [11:05<13:26,  6.39it/s][A[A[A[A[A




 43%|████▎     | 3849/9000 [11:05<16:40,  5.15it/s][A[A[A[A[A




 43%|████▎     | 3851/9000 [11:06<15:23,  5.58it/s][A[A[A[A[A




 43%|████▎     | 3852/9000 [11:06<18:02,  4.75it/s][A[A[A[A[A




 43%|████▎     | 3853/9000 [11:06<15:49,  5.42it/s][A[A[A[A[A




 43%|████▎     | 3854/9000 [11:06<17:23,  4.93it/s][A[A[A[A[A




 43%|████▎     | 3855/9000 [11:07<18:08,  4.73it/s][A[A[A[A[A




 43%|████▎     | 3856/9000 [11:07<20:04,  4.27it/s][A[A[A[A[A




 43%|█

 45%|████▌     | 4087/9000 [11:51<18:47,  4.36it/s][A[A[A[A[A




 45%|████▌     | 4088/9000 [11:51<18:00,  4.55it/s][A[A[A[A[A




 45%|████▌     | 4089/9000 [11:52<17:11,  4.76it/s][A[A[A[A[A




 45%|████▌     | 4090/9000 [11:52<16:37,  4.92it/s][A[A[A[A[A




 45%|████▌     | 4091/9000 [11:52<15:23,  5.32it/s][A[A[A[A[A




 45%|████▌     | 4092/9000 [11:52<15:18,  5.34it/s][A[A[A[A[A




 45%|████▌     | 4094/9000 [11:52<14:31,  5.63it/s][A[A[A[A[A




 46%|████▌     | 4095/9000 [11:53<15:43,  5.20it/s][A[A[A[A[A




 46%|████▌     | 4096/9000 [11:53<16:51,  4.85it/s][A[A[A[A[A




 46%|████▌     | 4097/9000 [11:53<16:42,  4.89it/s][A[A[A[A[A




 46%|████▌     | 4098/9000 [11:53<15:46,  5.18it/s][A[A[A[A[A




 46%|████▌     | 4100/9000 [11:54<13:30,  6.04it/s][A[A[A[A[A




 46%|████▌     | 4101/9000 [11:54<13:21,  6.11it/s][A[A[A[A[A




 46%|████▌     | 4102/9000 [11:54<14:51,  5.49it/s][A[A[A[A[A




 46%|█

 48%|████▊     | 4331/9000 [12:39<14:32,  5.35it/s][A[A[A[A[A




 48%|████▊     | 4332/9000 [12:39<16:09,  4.81it/s][A[A[A[A[A




 48%|████▊     | 4333/9000 [12:40<19:08,  4.06it/s][A[A[A[A[A




 48%|████▊     | 4334/9000 [12:40<16:30,  4.71it/s][A[A[A[A[A




 48%|████▊     | 4335/9000 [12:40<15:16,  5.09it/s][A[A[A[A[A




 48%|████▊     | 4336/9000 [12:40<15:34,  4.99it/s][A[A[A[A[A




 48%|████▊     | 4337/9000 [12:41<16:36,  4.68it/s][A[A[A[A[A




 48%|████▊     | 4338/9000 [12:41<16:06,  4.83it/s][A[A[A[A[A




 48%|████▊     | 4339/9000 [12:41<14:28,  5.37it/s][A[A[A[A[A




 48%|████▊     | 4340/9000 [12:41<16:49,  4.62it/s][A[A[A[A[A




 48%|████▊     | 4341/9000 [12:41<14:46,  5.26it/s][A[A[A[A[A




 48%|████▊     | 4342/9000 [12:42<17:40,  4.39it/s][A[A[A[A[A




 48%|████▊     | 4343/9000 [12:42<17:12,  4.51it/s][A[A[A[A[A




 48%|████▊     | 4344/9000 [12:42<16:00,  4.85it/s][A[A[A[A[A




 48%|█

 51%|█████     | 4567/9000 [13:29<13:05,  5.64it/s][A[A[A[A[A




 51%|█████     | 4568/9000 [13:29<13:17,  5.55it/s][A[A[A[A[A




 51%|█████     | 4569/9000 [13:29<18:07,  4.08it/s][A[A[A[A[A




 51%|█████     | 4570/9000 [13:29<16:42,  4.42it/s][A[A[A[A[A




 51%|█████     | 4571/9000 [13:30<16:51,  4.38it/s][A[A[A[A[A




 51%|█████     | 4572/9000 [13:30<15:06,  4.89it/s][A[A[A[A[A




 51%|█████     | 4573/9000 [13:30<15:44,  4.69it/s][A[A[A[A[A




 51%|█████     | 4575/9000 [13:30<14:37,  5.04it/s][A[A[A[A[A




 51%|█████     | 4577/9000 [13:31<13:04,  5.64it/s][A[A[A[A[A




 51%|█████     | 4578/9000 [13:31<14:54,  4.94it/s][A[A[A[A[A




 51%|█████     | 4579/9000 [13:31<15:34,  4.73it/s][A[A[A[A[A




 51%|█████     | 4580/9000 [13:31<16:09,  4.56it/s][A[A[A[A[A




 51%|█████     | 4581/9000 [13:31<15:30,  4.75it/s][A[A[A[A[A




 51%|█████     | 4582/9000 [13:32<14:49,  4.97it/s][A[A[A[A[A




 51%|█

 54%|█████▎    | 4815/9000 [14:21<14:55,  4.67it/s][A[A[A[A[A




 54%|█████▎    | 4816/9000 [14:21<13:18,  5.24it/s][A[A[A[A[A




 54%|█████▎    | 4817/9000 [14:21<14:06,  4.94it/s][A[A[A[A[A




 54%|█████▎    | 4818/9000 [14:22<15:35,  4.47it/s][A[A[A[A[A




 54%|█████▎    | 4819/9000 [14:22<14:58,  4.65it/s][A[A[A[A[A




 54%|█████▎    | 4820/9000 [14:22<16:09,  4.31it/s][A[A[A[A[A




 54%|█████▎    | 4821/9000 [14:22<16:01,  4.35it/s][A[A[A[A[A




 54%|█████▎    | 4822/9000 [14:22<13:58,  4.98it/s][A[A[A[A[A




 54%|█████▎    | 4823/9000 [14:23<13:11,  5.28it/s][A[A[A[A[A




 54%|█████▎    | 4824/9000 [14:23<12:04,  5.77it/s][A[A[A[A[A




 54%|█████▎    | 4825/9000 [14:23<10:35,  6.57it/s][A[A[A[A[A




 54%|█████▎    | 4826/9000 [14:23<10:28,  6.65it/s][A[A[A[A[A




 54%|█████▎    | 4827/9000 [14:23<09:49,  7.08it/s][A[A[A[A[A




 54%|█████▎    | 4828/9000 [14:23<09:02,  7.68it/s][A[A[A[A[A




 54%|█

 56%|█████▌    | 5060/9000 [15:08<12:28,  5.26it/s][A[A[A[A[A




 56%|█████▌    | 5061/9000 [15:09<13:18,  4.93it/s][A[A[A[A[A




 56%|█████▌    | 5062/9000 [15:09<12:33,  5.23it/s][A[A[A[A[A




 56%|█████▋    | 5063/9000 [15:09<14:36,  4.49it/s][A[A[A[A[A




 56%|█████▋    | 5064/9000 [15:09<16:47,  3.91it/s][A[A[A[A[A




 56%|█████▋    | 5065/9000 [15:10<15:13,  4.31it/s][A[A[A[A[A




 56%|█████▋    | 5066/9000 [15:10<15:23,  4.26it/s][A[A[A[A[A




 56%|█████▋    | 5067/9000 [15:10<14:37,  4.48it/s][A[A[A[A[A




 56%|█████▋    | 5068/9000 [15:10<14:18,  4.58it/s][A[A[A[A[A




 56%|█████▋    | 5069/9000 [15:10<15:06,  4.34it/s][A[A[A[A[A




 56%|█████▋    | 5070/9000 [15:11<14:09,  4.63it/s][A[A[A[A[A




 56%|█████▋    | 5071/9000 [15:11<15:10,  4.32it/s][A[A[A[A[A




 56%|█████▋    | 5072/9000 [15:11<15:02,  4.35it/s][A[A[A[A[A




 56%|█████▋    | 5074/9000 [15:11<13:05,  5.00it/s][A[A[A[A[A




 56%|█

 59%|█████▉    | 5299/9000 [15:58<12:52,  4.79it/s][A[A[A[A[A




 59%|█████▉    | 5300/9000 [15:58<12:27,  4.95it/s][A[A[A[A[A




 59%|█████▉    | 5301/9000 [15:58<13:17,  4.64it/s][A[A[A[A[A




 59%|█████▉    | 5302/9000 [15:59<13:14,  4.66it/s][A[A[A[A[A




 59%|█████▉    | 5303/9000 [15:59<12:40,  4.86it/s][A[A[A[A[A




 59%|█████▉    | 5304/9000 [15:59<11:47,  5.22it/s][A[A[A[A[A




 59%|█████▉    | 5305/9000 [15:59<11:56,  5.16it/s][A[A[A[A[A




 59%|█████▉    | 5306/9000 [15:59<11:06,  5.55it/s][A[A[A[A[A




 59%|█████▉    | 5307/9000 [15:59<10:09,  6.06it/s][A[A[A[A[A




 59%|█████▉    | 5308/9000 [16:00<10:24,  5.91it/s][A[A[A[A[A




 59%|█████▉    | 5309/9000 [16:00<09:46,  6.30it/s][A[A[A[A[A




 59%|█████▉    | 5310/9000 [16:00<09:25,  6.52it/s][A[A[A[A[A




 59%|█████▉    | 5311/9000 [16:00<10:26,  5.89it/s][A[A[A[A[A




 59%|█████▉    | 5312/9000 [16:00<09:54,  6.20it/s][A[A[A[A[A




 59%|█

 62%|██████▏   | 5548/9000 [16:47<12:42,  4.53it/s][A[A[A[A[A




 62%|██████▏   | 5549/9000 [16:47<13:52,  4.15it/s][A[A[A[A[A




 62%|██████▏   | 5550/9000 [16:48<13:57,  4.12it/s][A[A[A[A[A




 62%|██████▏   | 5551/9000 [16:48<13:33,  4.24it/s][A[A[A[A[A




 62%|██████▏   | 5552/9000 [16:48<13:17,  4.32it/s][A[A[A[A[A




 62%|██████▏   | 5553/9000 [16:48<12:24,  4.63it/s][A[A[A[A[A




 62%|██████▏   | 5554/9000 [16:48<12:21,  4.65it/s][A[A[A[A[A




 62%|██████▏   | 5555/9000 [16:49<13:40,  4.20it/s][A[A[A[A[A




 62%|██████▏   | 5556/9000 [16:49<14:03,  4.08it/s][A[A[A[A[A




 62%|██████▏   | 5557/9000 [16:49<12:17,  4.67it/s][A[A[A[A[A




 62%|██████▏   | 5558/9000 [16:49<11:28,  5.00it/s][A[A[A[A[A




 62%|██████▏   | 5559/9000 [16:50<11:57,  4.80it/s][A[A[A[A[A




 62%|██████▏   | 5560/9000 [16:50<11:03,  5.18it/s][A[A[A[A[A




 62%|██████▏   | 5561/9000 [16:50<09:59,  5.74it/s][A[A[A[A[A




 62%|█

 64%|██████▍   | 5793/9000 [17:36<10:04,  5.30it/s][A[A[A[A[A




 64%|██████▍   | 5794/9000 [17:36<12:47,  4.18it/s][A[A[A[A[A




 64%|██████▍   | 5795/9000 [17:36<12:40,  4.21it/s][A[A[A[A[A




 64%|██████▍   | 5796/9000 [17:36<11:55,  4.48it/s][A[A[A[A[A




 64%|██████▍   | 5797/9000 [17:37<11:46,  4.53it/s][A[A[A[A[A




 64%|██████▍   | 5798/9000 [17:37<11:01,  4.84it/s][A[A[A[A[A




 64%|██████▍   | 5799/9000 [17:37<11:41,  4.57it/s][A[A[A[A[A




 64%|██████▍   | 5800/9000 [17:37<10:20,  5.16it/s][A[A[A[A[A




 64%|██████▍   | 5801/9000 [17:37<12:49,  4.16it/s][A[A[A[A[A




 64%|██████▍   | 5802/9000 [17:38<12:10,  4.38it/s][A[A[A[A[A




 64%|██████▍   | 5803/9000 [17:38<13:10,  4.04it/s][A[A[A[A[A




 64%|██████▍   | 5804/9000 [17:38<14:04,  3.79it/s][A[A[A[A[A




 64%|██████▍   | 5805/9000 [17:39<18:38,  2.86it/s][A[A[A[A[A




 65%|██████▍   | 5806/9000 [17:39<17:30,  3.04it/s][A[A[A[A[A




 65%|█

 67%|██████▋   | 6035/9000 [18:26<12:09,  4.06it/s][A[A[A[A[A




 67%|██████▋   | 6036/9000 [18:26<09:59,  4.94it/s][A[A[A[A[A




 67%|██████▋   | 6037/9000 [18:27<11:15,  4.39it/s][A[A[A[A[A




 67%|██████▋   | 6038/9000 [18:27<11:49,  4.17it/s][A[A[A[A[A




 67%|██████▋   | 6039/9000 [18:27<11:44,  4.20it/s][A[A[A[A[A




 67%|██████▋   | 6040/9000 [18:27<12:12,  4.04it/s][A[A[A[A[A




 67%|██████▋   | 6041/9000 [18:28<10:49,  4.55it/s][A[A[A[A[A




 67%|██████▋   | 6042/9000 [18:28<10:08,  4.86it/s][A[A[A[A[A




 67%|██████▋   | 6043/9000 [18:28<09:40,  5.10it/s][A[A[A[A[A




 67%|██████▋   | 6044/9000 [18:28<11:30,  4.28it/s][A[A[A[A[A




 67%|██████▋   | 6045/9000 [18:28<10:12,  4.82it/s][A[A[A[A[A




 67%|██████▋   | 6046/9000 [18:29<10:09,  4.85it/s][A[A[A[A[A




 67%|██████▋   | 6047/9000 [18:29<11:34,  4.25it/s][A[A[A[A[A




 67%|██████▋   | 6048/9000 [18:29<13:19,  3.69it/s][A[A[A[A[A




 67%|█

 70%|██████▉   | 6271/9000 [19:17<09:37,  4.72it/s][A[A[A[A[A




 70%|██████▉   | 6272/9000 [19:18<09:28,  4.80it/s][A[A[A[A[A




 70%|██████▉   | 6273/9000 [19:18<09:04,  5.01it/s][A[A[A[A[A




 70%|██████▉   | 6274/9000 [19:18<10:13,  4.45it/s][A[A[A[A[A




 70%|██████▉   | 6275/9000 [19:18<09:22,  4.84it/s][A[A[A[A[A




 70%|██████▉   | 6276/9000 [19:18<08:11,  5.54it/s][A[A[A[A[A




 70%|██████▉   | 6277/9000 [19:19<09:40,  4.69it/s][A[A[A[A[A




 70%|██████▉   | 6278/9000 [19:19<09:01,  5.02it/s][A[A[A[A[A




 70%|██████▉   | 6279/9000 [19:19<09:43,  4.66it/s][A[A[A[A[A




 70%|██████▉   | 6280/9000 [19:19<08:46,  5.17it/s][A[A[A[A[A




 70%|██████▉   | 6281/9000 [19:19<09:01,  5.02it/s][A[A[A[A[A




 70%|██████▉   | 6282/9000 [19:19<08:19,  5.44it/s][A[A[A[A[A




 70%|██████▉   | 6283/9000 [19:20<07:58,  5.67it/s][A[A[A[A[A




 70%|██████▉   | 6284/9000 [19:20<08:49,  5.13it/s][A[A[A[A[A




 70%|█

 72%|███████▏  | 6514/9000 [20:10<07:43,  5.36it/s][A[A[A[A[A




 72%|███████▏  | 6515/9000 [20:10<09:55,  4.18it/s][A[A[A[A[A




 72%|███████▏  | 6516/9000 [20:10<08:33,  4.84it/s][A[A[A[A[A




 72%|███████▏  | 6517/9000 [20:10<08:16,  5.00it/s][A[A[A[A[A




 72%|███████▏  | 6518/9000 [20:10<07:13,  5.72it/s][A[A[A[A[A




 72%|███████▏  | 6520/9000 [20:11<07:00,  5.90it/s][A[A[A[A[A




 72%|███████▏  | 6521/9000 [20:11<09:05,  4.55it/s][A[A[A[A[A




 72%|███████▏  | 6522/9000 [20:11<08:08,  5.07it/s][A[A[A[A[A




 72%|███████▏  | 6523/9000 [20:11<07:11,  5.73it/s][A[A[A[A[A




 72%|███████▏  | 6524/9000 [20:12<06:22,  6.48it/s][A[A[A[A[A




 72%|███████▎  | 6525/9000 [20:12<06:19,  6.52it/s][A[A[A[A[A




 73%|███████▎  | 6526/9000 [20:12<06:27,  6.39it/s][A[A[A[A[A




 73%|███████▎  | 6527/9000 [20:12<06:36,  6.24it/s][A[A[A[A[A




 73%|███████▎  | 6528/9000 [20:12<08:21,  4.93it/s][A[A[A[A[A




 73%|█

 75%|███████▌  | 6753/9000 [21:01<10:54,  3.43it/s][A[A[A[A[A




 75%|███████▌  | 6754/9000 [21:02<09:43,  3.85it/s][A[A[A[A[A




 75%|███████▌  | 6755/9000 [21:02<08:40,  4.31it/s][A[A[A[A[A




 75%|███████▌  | 6756/9000 [21:02<07:56,  4.71it/s][A[A[A[A[A




 75%|███████▌  | 6757/9000 [21:02<08:57,  4.18it/s][A[A[A[A[A




 75%|███████▌  | 6758/9000 [21:02<07:48,  4.79it/s][A[A[A[A[A




 75%|███████▌  | 6759/9000 [21:03<08:45,  4.26it/s][A[A[A[A[A




 75%|███████▌  | 6760/9000 [21:03<08:05,  4.62it/s][A[A[A[A[A




 75%|███████▌  | 6761/9000 [21:03<07:30,  4.97it/s][A[A[A[A[A




 75%|███████▌  | 6762/9000 [21:03<06:38,  5.62it/s][A[A[A[A[A




 75%|███████▌  | 6763/9000 [21:03<07:19,  5.09it/s][A[A[A[A[A




 75%|███████▌  | 6765/9000 [21:04<06:59,  5.33it/s][A[A[A[A[A




 75%|███████▌  | 6766/9000 [21:04<06:32,  5.69it/s][A[A[A[A[A




 75%|███████▌  | 6767/9000 [21:04<07:49,  4.76it/s][A[A[A[A[A




 75%|█

 78%|███████▊  | 6994/9000 [21:52<06:18,  5.30it/s][A[A[A[A[A




 78%|███████▊  | 6995/9000 [21:53<07:13,  4.62it/s][A[A[A[A[A




 78%|███████▊  | 6996/9000 [21:53<06:44,  4.96it/s][A[A[A[A[A




 78%|███████▊  | 6997/9000 [21:53<07:23,  4.51it/s][A[A[A[A[A




 78%|███████▊  | 6998/9000 [21:53<07:23,  4.51it/s][A[A[A[A[A




 78%|███████▊  | 6999/9000 [21:54<09:56,  3.36it/s][A[A[A[A[A




 78%|███████▊  | 7000/9000 [21:54<10:13,  3.26it/s][A[A[A[A[A




 78%|███████▊  | 7001/9000 [21:54<08:54,  3.74it/s][A[A[A[A[A




 78%|███████▊  | 7002/9000 [21:54<08:47,  3.79it/s][A[A[A[A[A




 78%|███████▊  | 7003/9000 [21:55<08:57,  3.71it/s][A[A[A[A[A




 78%|███████▊  | 7004/9000 [21:55<09:01,  3.69it/s][A[A[A[A[A




 78%|███████▊  | 7005/9000 [21:55<07:42,  4.32it/s][A[A[A[A[A




 78%|███████▊  | 7006/9000 [21:55<07:34,  4.39it/s][A[A[A[A[A




 78%|███████▊  | 7007/9000 [21:56<08:23,  3.96it/s][A[A[A[A[A




 78%|█

 80%|████████  | 7226/9000 [22:44<06:16,  4.71it/s][A[A[A[A[A




 80%|████████  | 7227/9000 [22:44<06:31,  4.53it/s][A[A[A[A[A




 80%|████████  | 7228/9000 [22:44<05:55,  4.99it/s][A[A[A[A[A




 80%|████████  | 7229/9000 [22:44<05:46,  5.11it/s][A[A[A[A[A




 80%|████████  | 7230/9000 [22:45<06:44,  4.38it/s][A[A[A[A[A




 80%|████████  | 7231/9000 [22:45<07:12,  4.09it/s][A[A[A[A[A




 80%|████████  | 7233/9000 [22:45<06:01,  4.89it/s][A[A[A[A[A




 80%|████████  | 7234/9000 [22:46<06:54,  4.26it/s][A[A[A[A[A




 80%|████████  | 7235/9000 [22:46<06:46,  4.34it/s][A[A[A[A[A




 80%|████████  | 7236/9000 [22:46<07:25,  3.96it/s][A[A[A[A[A




 80%|████████  | 7237/9000 [22:46<07:30,  3.91it/s][A[A[A[A[A




 80%|████████  | 7238/9000 [22:47<07:42,  3.81it/s][A[A[A[A[A




 80%|████████  | 7239/9000 [22:47<08:12,  3.57it/s][A[A[A[A[A




 80%|████████  | 7240/9000 [22:47<08:03,  3.64it/s][A[A[A[A[A




 80%|█

 83%|████████▎ | 7464/9000 [23:33<04:54,  5.21it/s][A[A[A[A[A




 83%|████████▎ | 7465/9000 [23:34<04:54,  5.20it/s][A[A[A[A[A




 83%|████████▎ | 7466/9000 [23:34<05:03,  5.06it/s][A[A[A[A[A




 83%|████████▎ | 7467/9000 [23:34<05:09,  4.95it/s][A[A[A[A[A




 83%|████████▎ | 7468/9000 [23:34<04:32,  5.61it/s][A[A[A[A[A




 83%|████████▎ | 7469/9000 [23:34<04:48,  5.32it/s][A[A[A[A[A




 83%|████████▎ | 7470/9000 [23:35<05:55,  4.30it/s][A[A[A[A[A




 83%|████████▎ | 7471/9000 [23:35<06:02,  4.21it/s][A[A[A[A[A




 83%|████████▎ | 7472/9000 [23:35<06:29,  3.92it/s][A[A[A[A[A




 83%|████████▎ | 7473/9000 [23:35<05:51,  4.34it/s][A[A[A[A[A




 83%|████████▎ | 7474/9000 [23:36<06:53,  3.69it/s][A[A[A[A[A




 83%|████████▎ | 7475/9000 [23:36<07:13,  3.52it/s][A[A[A[A[A




 83%|████████▎ | 7476/9000 [23:36<06:34,  3.86it/s][A[A[A[A[A




 83%|████████▎ | 7477/9000 [23:37<06:37,  3.83it/s][A[A[A[A[A




 83%|█

 86%|████████▌ | 7704/9000 [24:28<05:01,  4.30it/s][A[A[A[A[A




 86%|████████▌ | 7705/9000 [24:28<04:38,  4.65it/s][A[A[A[A[A




 86%|████████▌ | 7706/9000 [24:28<04:45,  4.54it/s][A[A[A[A[A




 86%|████████▌ | 7707/9000 [24:29<04:39,  4.62it/s][A[A[A[A[A




 86%|████████▌ | 7708/9000 [24:29<05:12,  4.13it/s][A[A[A[A[A




 86%|████████▌ | 7709/9000 [24:29<04:25,  4.86it/s][A[A[A[A[A




 86%|████████▌ | 7710/9000 [24:29<04:52,  4.40it/s][A[A[A[A[A




 86%|████████▌ | 7711/9000 [24:29<05:01,  4.28it/s][A[A[A[A[A




 86%|████████▌ | 7712/9000 [24:30<05:51,  3.66it/s][A[A[A[A[A




 86%|████████▌ | 7713/9000 [24:30<05:31,  3.88it/s][A[A[A[A[A




 86%|████████▌ | 7714/9000 [24:30<04:59,  4.30it/s][A[A[A[A[A




 86%|████████▌ | 7715/9000 [24:30<04:34,  4.67it/s][A[A[A[A[A




 86%|████████▌ | 7717/9000 [24:31<04:10,  5.12it/s][A[A[A[A[A




 86%|████████▌ | 7718/9000 [24:31<04:24,  4.84it/s][A[A[A[A[A




 86%|█

 89%|████████▊ | 7979/9000 [28:44<04:45,  3.57it/s][A[A[A[A[A




 89%|████████▊ | 7980/9000 [28:44<04:57,  3.42it/s][A[A[A[A[A




 89%|████████▊ | 7981/9000 [28:44<04:50,  3.50it/s][A[A[A[A[A




 89%|████████▊ | 7982/9000 [28:45<04:21,  3.89it/s][A[A[A[A[A




 89%|████████▊ | 7983/9000 [28:45<03:59,  4.24it/s][A[A[A[A[A




 89%|████████▊ | 7984/9000 [28:45<04:35,  3.68it/s][A[A[A[A[A




 89%|████████▊ | 7985/9000 [28:46<05:01,  3.37it/s][A[A[A[A[A




 89%|████████▊ | 7986/9000 [28:46<04:12,  4.02it/s][A[A[A[A[A




 89%|████████▊ | 7987/9000 [28:46<04:00,  4.21it/s][A[A[A[A[A




 89%|████████▉ | 7988/9000 [28:46<04:09,  4.06it/s][A[A[A[A[A




 89%|████████▉ | 7989/9000 [28:47<05:13,  3.23it/s][A[A[A[A[A




 89%|████████▉ | 7990/9000 [28:47<04:43,  3.56it/s][A[A[A[A[A




 89%|████████▉ | 7991/9000 [28:47<04:28,  3.75it/s][A[A[A[A[A




 89%|████████▉ | 7992/9000 [28:47<03:46,  4.45it/s][A[A[A[A[A




 89%|█

 91%|█████████▏| 8221/9000 [29:41<02:37,  4.94it/s][A[A[A[A[A




 91%|█████████▏| 8222/9000 [29:41<02:39,  4.88it/s][A[A[A[A[A




 91%|█████████▏| 8223/9000 [29:41<02:38,  4.91it/s][A[A[A[A[A




 91%|█████████▏| 8224/9000 [29:41<02:51,  4.53it/s][A[A[A[A[A




 91%|█████████▏| 8225/9000 [29:42<02:58,  4.34it/s][A[A[A[A[A




 91%|█████████▏| 8226/9000 [29:42<03:22,  3.82it/s][A[A[A[A[A




 91%|█████████▏| 8227/9000 [29:42<03:29,  3.69it/s][A[A[A[A[A




 91%|█████████▏| 8228/9000 [29:43<03:17,  3.90it/s][A[A[A[A[A




 91%|█████████▏| 8229/9000 [29:43<02:52,  4.46it/s][A[A[A[A[A




 91%|█████████▏| 8230/9000 [29:43<03:43,  3.45it/s][A[A[A[A[A




 91%|█████████▏| 8231/9000 [29:44<04:05,  3.13it/s][A[A[A[A[A




 91%|█████████▏| 8232/9000 [29:44<03:43,  3.44it/s][A[A[A[A[A




 91%|█████████▏| 8233/9000 [29:44<02:59,  4.27it/s][A[A[A[A[A




 91%|█████████▏| 8234/9000 [29:44<03:01,  4.23it/s][A[A[A[A[A




 92%|█

 94%|█████████▍| 8459/9000 [30:40<02:19,  3.88it/s][A[A[A[A[A




 94%|█████████▍| 8460/9000 [30:40<02:22,  3.78it/s][A[A[A[A[A




 94%|█████████▍| 8461/9000 [30:41<02:57,  3.03it/s][A[A[A[A[A




 94%|█████████▍| 8462/9000 [30:41<03:19,  2.70it/s][A[A[A[A[A




 94%|█████████▍| 8463/9000 [30:42<03:46,  2.37it/s][A[A[A[A[A




 94%|█████████▍| 8464/9000 [30:42<03:20,  2.68it/s][A[A[A[A[A




 94%|█████████▍| 8465/9000 [30:42<03:25,  2.61it/s][A[A[A[A[A




 94%|█████████▍| 8466/9000 [30:43<03:12,  2.77it/s][A[A[A[A[A




 94%|█████████▍| 8467/9000 [30:43<03:23,  2.62it/s][A[A[A[A[A




 94%|█████████▍| 8468/9000 [30:43<03:07,  2.84it/s][A[A[A[A[A




 94%|█████████▍| 8469/9000 [30:44<03:19,  2.66it/s][A[A[A[A[A




 94%|█████████▍| 8470/9000 [30:44<02:58,  2.97it/s][A[A[A[A[A




 94%|█████████▍| 8471/9000 [30:44<02:49,  3.12it/s][A[A[A[A[A




 94%|█████████▍| 8472/9000 [30:44<02:27,  3.58it/s][A[A[A[A[A




 94%|█

 97%|█████████▋| 8694/9000 [31:38<01:09,  4.38it/s][A[A[A[A[A




 97%|█████████▋| 8695/9000 [31:38<01:01,  4.97it/s][A[A[A[A[A




 97%|█████████▋| 8696/9000 [31:39<01:14,  4.06it/s][A[A[A[A[A




 97%|█████████▋| 8697/9000 [31:39<01:13,  4.13it/s][A[A[A[A[A




 97%|█████████▋| 8698/9000 [31:39<01:28,  3.41it/s][A[A[A[A[A




 97%|█████████▋| 8699/9000 [31:40<01:20,  3.75it/s][A[A[A[A[A




 97%|█████████▋| 8700/9000 [31:40<01:20,  3.72it/s][A[A[A[A[A




 97%|█████████▋| 8701/9000 [31:40<01:18,  3.82it/s][A[A[A[A[A




 97%|█████████▋| 8702/9000 [31:40<01:17,  3.84it/s][A[A[A[A[A




 97%|█████████▋| 8704/9000 [31:41<01:05,  4.55it/s][A[A[A[A[A




 97%|█████████▋| 8705/9000 [31:41<01:05,  4.50it/s][A[A[A[A[A




 97%|█████████▋| 8706/9000 [31:41<00:59,  4.94it/s][A[A[A[A[A




 97%|█████████▋| 8707/9000 [31:41<00:52,  5.55it/s][A[A[A[A[A




 97%|█████████▋| 8708/9000 [31:41<01:04,  4.55it/s][A[A[A[A[A




 97%|█

 99%|█████████▉| 8927/9000 [32:44<00:22,  3.19it/s][A[A[A[A[A




 99%|█████████▉| 8928/9000 [32:44<00:22,  3.21it/s][A[A[A[A[A




 99%|█████████▉| 8929/9000 [32:45<00:24,  2.91it/s][A[A[A[A[A




 99%|█████████▉| 8930/9000 [32:45<00:22,  3.12it/s][A[A[A[A[A




 99%|█████████▉| 8931/9000 [32:45<00:17,  3.92it/s][A[A[A[A[A




 99%|█████████▉| 8932/9000 [32:45<00:18,  3.59it/s][A[A[A[A[A




 99%|█████████▉| 8933/9000 [32:45<00:15,  4.32it/s][A[A[A[A[A




 99%|█████████▉| 8934/9000 [32:46<00:15,  4.40it/s][A[A[A[A[A




 99%|█████████▉| 8935/9000 [32:46<00:15,  4.21it/s][A[A[A[A[A




 99%|█████████▉| 8936/9000 [32:46<00:15,  4.02it/s][A[A[A[A[A




 99%|█████████▉| 8937/9000 [32:46<00:16,  3.77it/s][A[A[A[A[A




 99%|█████████▉| 8938/9000 [32:47<00:17,  3.51it/s][A[A[A[A[A




 99%|█████████▉| 8939/9000 [32:47<00:17,  3.52it/s][A[A[A[A[A




 99%|█████████▉| 8940/9000 [32:47<00:17,  3.43it/s][A[A[A[A[A




 99%|█

In [None]:
df.loc[(df['Prediction'] != "O") & (df['Category'] != "O") & (df['Category'] != df['Prediction'])]

In [102]:
df.keys()

Index(['Sentence', 'Word', 'Tag', 'Category', 'OriginalPrediction',
       'Prediction', 'SentenceID'],
      dtype='object')

In [88]:
df['SentenceID'].unique()

array([   0,    1,    2, ..., 8997, 8998, 8999])

In [111]:
def show_sentence(id):
    start = indexes[id]
    end = indexes[id+1]
    init_tokens = df[(df.index >= start) & (df.index < end)]['Word'].values.tolist()
    s = ' '.join(init_tokens)
    print(s)
    return df[(df.index >= start) & (df.index < end)][['Word','Category','Prediction','OriginalPrediction']]

In [112]:
show_sentence(1)

The commander of NATO 's Afghan force , British General David Richards , said the unity of command the transfer brought will enhance the effectiveness of the overall operation .


Unnamed: 0,Word,Category,Prediction,OriginalPrediction
12,The,O,O,O
13,commander,O,O,O
14,of,O,O,O
15,NATO,B-org,B-org,B-ORG
16,'s,O,O,O
17,Afghan,B-gpe,O,B-NORP
18,force,O,O,O
19,",",O,O,O
20,British,B-org,O,B-NORP
21,General,I-org,O,O


In [116]:
ne_list = ["B-event","I-event","B-geo","I-geo","B-gpe","I-gpe","B-obj","I-obj","B-obj","I-obj","B-org","I-org",
           "B-per","I-per","B-time","I-time","B-time","I-time"]

In [141]:
for cat in ne_list:
    n = df.loc[(df['Category'] == cat) & (df['Category'] != df['Prediction'])].shape[0]
    print('{} {}'.format(cat,n))

B-event 68
I-event 28
B-geo 1490
I-geo 489
B-gpe 715
I-gpe 49
B-obj 94
I-obj 46
B-obj 94
I-obj 46
B-org 2225
I-org 1188
B-per 1942
I-per 1658
B-time 979
I-time 457
B-time 979
I-time 457


In [160]:
for cat in df.loc[(df['Category'] == 'B-org') & (df['Category'] != df['Prediction'])]['OriginalPrediction'].unique():
    n = df.loc[(df['Category'] == 'B-org') & (df['OriginalPrediction'] == cat) & (df['Category'] != df['Prediction'])].shape[0]
    print(cat,n)

B-NORP 173
O 327
B-PERSON 194
I-ORG 655
B-GPE 664
B-EVENT 14
B-FAC 18
I-NORP 2
I-LOC 12
B-PRODUCT 13
I-FAC 23
I-GPE 49
I-LAW 14
B-LOC 23
I-DATE 2
I-EVENT 12
I-PERSON 12
B-DATE 5
B-WORK_OF_ART 2
I-WORK_OF_ART 4
B-CARDINAL 4
B-LAW 1
B-ORDINAL 1
I-MONEY 1


In [161]:
df.loc[(df['Category'] == 'I-org') & (df['OriginalPrediction'] == 'I-ORG')].shape[0]

1780

In [151]:
df.loc[(df['Category'] == 'B-org') & (df['OriginalPrediction'] == 'I-ORG')].shape[0]

655

In [135]:
df.loc[(df['Category'] == 'B-gpe') & (df['OriginalPrediction'] == 'B-NORP')].shape[0]

2568

In [136]:
df.loc[(df['Category'] != 'B-gpe') & (df['OriginalPrediction'] == 'B-NORP')].shape[0]

868

In [120]:
df.loc[(df['Category'] == 'B-geo')].shape[0]

6837

In [122]:
df.loc[(df['Category'] == 'B-geo')]['OriginalPrediction'].unique()

array(['O', 'B-GPE', 'B-LOC', 'I-NORP', 'B-NORP', 'B-PRODUCT', 'B-ORG',
       'B-EVENT', 'I-GPE', 'I-LOC', 'I-ORG', 'B-PERSON', 'B-FAC',
       'B-WORK_OF_ART', 'B-DATE', 'I-EVENT', 'I-FAC', 'I-PERSON',
       'I-WORK_OF_ART', 'I-DATE', 'B-LAW', 'B-CARDINAL'], dtype=object)

In [123]:
df.loc[(df['Category'] == 'B-geo') & (df['OriginalPrediction'] == 'B-GPE')].shape[0]

5068

In [124]:
df.loc[(df['Category'] == 'B-gpe')].shape[0]

3283

In [127]:
df.loc[(df['OriginalPrediction'] == 'B-GPE')].shape[0]

6861

In [165]:
results_by_models = [
    {'Size': 196646,
     'CorrectPredictionsWithO': 178588,
     'CorrectEntity': 18380, 
     'AllEntities': 30036, 
     'AllPredictions': 32141},
    {'Size': 196646, 
     'CorrectPredictionsWithO': 179008, 
     'CorrectEntity': 18607, 
     'AllEntities': 30036, 
     'AllPredictions': 32087},
    {'Size': 196646, 
     'CorrectPredictionsWithO': 179013, 
     'CorrectEntity': 18723, 
     'AllEntities': 30036, 
     'AllPredictions': 32220}
]
models = ['sm','md','lg']

In [173]:
res_out = []
for i in range(len(models)):
    o = [models[i]]
    res = results_by_models[i]
    o.append(res["CorrectPredictionsWithO"] / res["Size"] * 100)
    o.append(res["CorrectEntity"] / res["AllEntities"] * 100)
    o.append(res["CorrectEntity"] / res["AllPredictions"] * 100)
    res_out.append(o)

In [174]:
pd.DataFrame(res_out, columns=['model','accuracy','coverage','precision'])

Unnamed: 0,model,accuracy,coverage,precision
0,sm,90.817001,61.193235,57.185526
1,md,91.030583,61.948995,57.989217
2,lg,91.033126,62.335198,58.10987
