In [1]:
import os
import os.path as osp
from glob import glob
import re
from pprint import pprint
from tqdm import tqdm
import pydicom as dicom
import random
import csv

PATHS = [f'/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p{s}' for s in range(10, 20)]
TEXT_PATH_ROOT = '/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/'

get_report_path = lambda img_path: Path.joinpath(TEXT_ROOT, img_path.parents[0]).with_suffix(".txt")

def check_jpg_extension(filename):
    pattern = r"\.jpg$"
    if re.search(pattern, filename):
        return True
    else:
        return False

In [2]:
import csv

splits = {
    "train": set(),
    "validate": set(),
    "test": set(),
}
with open("/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic-cxr-2.0.0-split.csv", "r") as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        if i == 0:
            continue
        splits[row[-1]].add(f"p{row[-2]}")

In [3]:
[len(splits[x]) for x in list(splits.keys())]

[64586, 500, 293]

In [4]:
ALL_IMG_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

ALL_TXT_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

In [5]:
get_txt_path = lambda s: TEXT_PATH_ROOT + "/".join(s.split("/")[10:-1]).split(".")[0] + ".txt"
get_new_img_path = lambda s: "/".join(s.split("/")[10:])

In [6]:
for PATH in PATHS:
    top_dirs = [dir for dir in os.listdir(PATH) if dir.find(".") == -1]
    for dir_name in tqdm(top_dirs):
        paths = os.listdir(osp.join(PATH, dir_name))
        img_dirs = [osp.join(PATH, dir_name, dir) for dir in paths if dir.find(".") == -1]
        for img_dir in img_dirs:
            img_paths = [osp.join(img_dir, img_name) for img_name in os.listdir(img_dir)]
            img_paths = [path for path in img_paths if check_jpg_extension(path)]
            for sname, split_set in splits.items():
                if dir_name in split_set:
                    ALL_IMG_PATHS[sname].extend([get_new_img_path(s) for s in img_paths])
                    ALL_TXT_PATHS[sname].extend([get_txt_path(s) for s in img_paths])
                    break
            else:
                assert False, "No split is defined"
                

100%|██████████| 6396/6396 [00:46<00:00, 137.98it/s]
100%|██████████| 6571/6571 [00:50<00:00, 129.24it/s]
100%|██████████| 6526/6526 [00:44<00:00, 146.32it/s]
100%|██████████| 6548/6548 [00:50<00:00, 128.83it/s]
100%|██████████| 6506/6506 [00:47<00:00, 138.20it/s]
100%|██████████| 6592/6592 [00:48<00:00, 135.03it/s]
100%|██████████| 6476/6476 [00:49<00:00, 131.48it/s]
100%|██████████| 6642/6642 [00:52<00:00, 125.52it/s]
100%|██████████| 6543/6543 [00:49<00:00, 131.80it/s]
100%|██████████| 6579/6579 [00:50<00:00, 130.05it/s]


In [7]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        new_subdir = subdirs[9][:3]
        subdirs.insert(9, new_subdir)
        path = "/".join(subdirs)
        split[p_idx] = path

In [8]:
ALL_TXT_PATHS_PATIENT = {"train": {}, "validate": {}, "test": {}}

In [9]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        patient_id = subdirs[10]
        study_id = subdirs[11]
        if patient_id not in ALL_TXT_PATHS_PATIENT[sname]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id] = {study_id: path}
        elif study_id not in ALL_TXT_PATHS_PATIENT[sname][patient_id]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id][study_id] = path

In [10]:
idx = random.randint(0, len(ALL_TXT_PATHS)-1)
f = open("/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/p10/p10975446/s58917552.txt", "r")
text = f.read()
f.close()
print(text)

                                 FINAL REPORT
 PORTABLE CHEST, ___
 
 COMPARISON:  ___ chest x-ray.
 
 FINDINGS:  Cardiac silhouette remains enlarged and is accompanied by
 persistent pulmonary vascular congestion and interstitial edema.  Patchy
 bibasilar atelectasis also appears similar compared to the prior study.



In [11]:
def clean_section(text):
    text = re.sub(r'[\S]+:', '', text)
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [12]:
import string
def preprocess_report(text):
    # Remove unnecessary and insensible parts
    text = re.sub(r"EXAMINATION:.*", "", text)  # Remove EXAMINATION line
    text = re.sub(r"WET READ:.*", "", text)  # Remove WET READ line
    text = re.sub(r"FINAL REPORT", "", text)  # Remove FINAL REPORT line
    text = re.sub(r"STUDY:.*", "", text)  # Remove STUDY line
    text = re.sub(r"COMPARISON:.*", "", text)  # Remove COMPARISON section
    text = re.sub(r"TECHNIQUE:.*", "", text)  # Remove TECHNIQUE section
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores

    # Clean up excessive newlines and spaces
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [13]:
def extract_sections(report_text):
    findings_pattern = r'FINDINGS:[\s\S]*:'
    impression_pattern = r'IMPRESSION:[\s\S]*'

    findings_match = re.search(findings_pattern, report_text, re.IGNORECASE)
    impression_match = re.search(impression_pattern, report_text, re.IGNORECASE)

    findings = findings_match.group().strip() if findings_match else None
    impression = impression_match.group().strip() if impression_match else None

    if findings is None and impression is None:
        return preprocess_report(report_text)
    elif impression is None:
        return clean_section(findings)
    elif findings is None:
        return clean_section(impression)
    else:
        return clean_section(findings) + " " + clean_section(impression)

    return findings, impression

In [14]:
img_positions = {}

In [15]:
frontal = {"antero-posterior", "postero-anterior", ""}
lateral = {"lateral", "left lateral"}

In [16]:
with open("../data/mimic-cxr-2.0.0-metadata.csv") as f:
    lines = [line.rstrip() for line in f][1:]
    lines = [line for line in lines if line]
    
    for line in lines:
        fields = line.split(",")
        path = osp.join(f"p{fields[1][:2]}", f"p{fields[1]}", f"s{fields[2]}", f"{fields[0]}.jpg")
        if f"s{fields[2]}" not in img_positions:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"] = [(path, "frontal")]
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"] = [(path, "lateral")]
        else:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"].append((path, "frontal"))
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"].append((path, "lateral"))


In [17]:
list(img_positions.items())[:5]

[('s50414267',
  [('p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg',
    'frontal'),
   ('p10/p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg',
    'lateral')]),
 ('s53189527',
  [('p10/p10000032/s53189527/2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab.jpg',
    'frontal'),
   ('p10/p10000032/s53189527/e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c.jpg',
    'lateral')]),
 ('s53911762',
  [('p10/p10000032/s53911762/68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714.jpg',
    'frontal'),
   ('p10/p10000032/s53911762/fffabebf-74fd3a1f-673b6b41-96ec0ac9-2ab69818.jpg',
    'frontal')]),
 ('s56699142',
  [('p10/p10000032/s56699142/ea030e7a-2e3b1346-bc518786-7a8fd698-f673b44c.jpg',
    'frontal')]),
 ('s57375967',
  [('p10/p10000764/s57375967/096052b7-d256dc40-453a102b-fa7d01c6-1b22c6b4.jpg',
    'frontal'),
   ('p10/p10000764/s57375967/b79e55c3-735ce5ac-64412506-cdc9ea79-f1af521f.jpg',
    'lateral'),
   ('p10/p10000764/s57375967/dcfeeac4-1597e318-d0e673

In [18]:
lines[0].split(",")

['02aa804e-bde0afdd-112c0b34-7bc16630-4e384014',
 '10000032',
 '50414267',
 'CHEST (PA AND LAT)',
 'PA',
 '3056',
 '2544',
 '21800506',
 '213014.53100000002',
 'CHEST (PA AND LAT)',
 'postero-anterior',
 'Erect']

In [19]:
ALL_IMG_PATHS["train"][0]

'p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg'

In [20]:
extract_sections(text)

'PORTABLE CHEST, FINDINGS: Cardiac silhouette remains enlarged and is accompanied by persistent pulmonary vascular congestion and interstitial edema. Patchy bibasilar atelectasis also appears similar compared to the prior study.'

In [21]:
def get_report_from_path(path):
    try:
        f = open(path, "r")
        txt = extract_sections(f.read().strip())
        f.close()
    except FileNotFoundError as e:
        print(str(e))
        return None
    return txt

In [24]:
DATASET_PATH = "/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/files/"

def images_n_locs_2_txt(study_id):
    study_id = study_id.split(".")[0]
    images = img_positions[study_id]

    strings = []
    for img_path_pairs in images:
        if not osp.isfile(osp.join(DATASET_PATH, img_path_pairs[0])):
            print("Image does not exist")
            return None

        strings.append("[LOC_DELIM]".join(img_path_pairs))

    return "[VIEW_DELIM]".join(strings)

In [25]:
import csv


for sname, split in ALL_TXT_PATHS_PATIENT.items():
    with open(f"MIMIC_JPG_{sname[:5]}.tsv", 'w') as f:
        writer = csv.writer(f, delimiter='\t')

        for patient_id, studies in tqdm(split.items()):
            studies = list(studies.items())
            study_lim = len(studies) if len(studies) == 1 else len(studies) - 1
    
            if study_lim == 1:
                cur_study_id, cur_report_path = studies[0]
                cur_txt = get_report_from_path(cur_report_path)

                try:
                    cur_image_info = images_n_locs_2_txt(cur_study_id)
                    if cur_image_info is None:
                        continue
                except:
                    print("Invalid image position")
                    continue

                writer.writerow([cur_txt, cur_image_info])
            else:
                for study_idx in range(study_lim):
                    cur_study_id, cur_report_path = studies[study_idx]
                    next_study_id, next_report_path = studies[study_idx+1]
    
                    cur_txt = get_report_from_path(cur_report_path)
                    next_txt = get_report_from_path(next_report_path)
    
                    combined_txt = f"{cur_txt}[NEXT_TXT]{next_txt}"

                    try:
                        cur_image_info = images_n_locs_2_txt(cur_study_id)
                        next_image_info = images_n_locs_2_txt(next_study_id)

                        if cur_image_info is None or next_image_info is None:
                            continue
                    except:
                        print("Invalid image position")
                        continue

                    combined_img_info = f"{cur_image_info}[NEXT_IMG]{next_image_info}"
                    writer.writerow([combined_txt, combined_img_info])


  0%|          | 54/64213 [00:00<08:08, 131.44it/s]

Invalid image position
Invalid image position


  0%|          | 132/64213 [00:01<09:24, 113.54it/s]

Invalid image position


  0%|          | 245/64213 [00:02<07:19, 145.44it/s]

Invalid image position


  1%|          | 531/64213 [00:04<08:50, 120.11it/s]

Invalid image position
Invalid image position
Invalid image position


  1%|          | 637/64213 [00:05<09:15, 114.38it/s]

Invalid image position


  2%|▏         | 1333/64213 [00:12<13:43, 76.32it/s] 

Invalid image position
Invalid image position


  2%|▏         | 1378/64213 [00:12<13:48, 75.88it/s]

Invalid image position
Invalid image position
Invalid image position


  3%|▎         | 1792/64213 [00:17<09:16, 112.10it/s]

Invalid image position
Invalid image position


  3%|▎         | 1835/64213 [00:18<14:52, 69.90it/s] 

Invalid image position
Invalid image position


  3%|▎         | 2077/64213 [00:20<08:47, 117.76it/s]

Invalid image position


  3%|▎         | 2167/64213 [00:21<11:18, 91.50it/s] 

Invalid image position
Invalid image position


  4%|▍         | 2409/64213 [00:24<10:43, 96.01it/s] 

Invalid image position
Invalid image position


  4%|▍         | 2744/64213 [00:27<09:05, 112.78it/s]

Invalid image position
Invalid image position


  5%|▌         | 3281/64213 [00:32<09:43, 104.41it/s]

Invalid image position


  5%|▌         | 3328/64213 [00:33<10:38, 95.39it/s] 

Invalid image position


  5%|▌         | 3525/64213 [00:35<11:46, 85.93it/s] 

Invalid image position
Invalid image position


  7%|▋         | 4577/64213 [00:46<09:40, 102.75it/s]

Invalid image position
Invalid image position


  7%|▋         | 4651/64213 [00:47<09:45, 101.66it/s]

Invalid image position
Invalid image position


  7%|▋         | 4746/64213 [00:47<06:01, 164.30it/s]

Invalid image position


  7%|▋         | 4795/64213 [00:48<07:56, 124.58it/s]

Invalid image position
Invalid image position


  8%|▊         | 5311/64213 [00:53<08:43, 112.49it/s]

Invalid image position


  8%|▊         | 5382/64213 [00:54<07:41, 127.60it/s]

Invalid image position
Invalid image position


  9%|▊         | 5489/64213 [00:55<10:56, 89.45it/s] 

Invalid image position
Invalid image position


  9%|▉         | 5916/64213 [00:59<10:02, 96.73it/s] 

Invalid image position


  9%|▉         | 5981/64213 [01:00<08:35, 113.05it/s]

Invalid image position


 10%|▉         | 6192/64213 [01:02<07:49, 123.65it/s]

Invalid image position
Invalid image position


 10%|█         | 6557/64213 [01:06<08:47, 109.27it/s]

Invalid image position
Invalid image position


 11%|█         | 7044/64213 [01:10<08:56, 106.62it/s]

Invalid image position
Invalid image position


 11%|█         | 7124/64213 [01:11<10:07, 94.00it/s] 

Invalid image position
Invalid image position
Invalid image position


 12%|█▏        | 7409/64213 [01:15<07:51, 120.49it/s]

Invalid image position
Invalid image position


 12%|█▏        | 7616/64213 [01:17<10:38, 88.68it/s] 

Invalid image position
Invalid image position


 12%|█▏        | 7681/64213 [01:17<08:31, 110.48it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8097/64213 [01:22<07:36, 122.80it/s]

Invalid image position


 13%|█▎        | 8324/64213 [01:25<14:14, 65.40it/s] 

Invalid image position
Invalid image position


 13%|█▎        | 8347/64213 [01:25<12:38, 73.62it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8445/64213 [01:26<07:59, 116.31it/s]

Invalid image position


 14%|█▍        | 8898/64213 [01:31<10:12, 90.36it/s] 

Invalid image position
Invalid image position


 15%|█▍        | 9324/64213 [01:35<10:56, 83.62it/s] 

Invalid image position
Invalid image position


 15%|█▍        | 9487/64213 [01:37<08:36, 106.00it/s]

Invalid image position
Invalid image position


 15%|█▌        | 9775/64213 [01:40<09:38, 94.10it/s] 

Invalid image position
Invalid image position
Invalid image position


 16%|█▌        | 10085/64213 [01:44<10:33, 85.38it/s] 

Invalid image position
Invalid image position


 16%|█▌        | 10256/64213 [01:46<09:24, 95.59it/s] 

Invalid image position


 16%|█▌        | 10411/64213 [01:47<09:42, 92.29it/s] 

Invalid image position
Invalid image position


 17%|█▋        | 10644/64213 [01:50<12:59, 68.71it/s] 

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 17%|█▋        | 10869/64213 [01:52<07:52, 112.97it/s]

Invalid image position


 17%|█▋        | 10921/64213 [01:53<09:41, 91.57it/s] 

Invalid image position


 18%|█▊        | 11317/64213 [01:57<08:23, 105.02it/s]

Invalid image position
Invalid image position


 18%|█▊        | 11779/64213 [02:02<10:46, 81.05it/s] 

Invalid image position
Invalid image position


 18%|█▊        | 11838/64213 [02:03<07:28, 116.84it/s]

Invalid image position
Invalid image position


 19%|█▊        | 11992/64213 [02:04<07:58, 109.06it/s]

Invalid image position
Invalid image position


 19%|█▉        | 12357/64213 [02:08<09:11, 94.01it/s] 

Invalid image position


 19%|█▉        | 12387/64213 [02:09<13:05, 65.98it/s]

Invalid image position
Invalid image position


 20%|█▉        | 12641/64213 [02:12<12:30, 68.68it/s] 

Invalid image position
Invalid image position


 20%|█▉        | 12720/64213 [02:13<06:37, 129.65it/s]

Invalid image position
Invalid image position


 20%|██        | 13126/64213 [02:17<12:03, 70.63it/s] 

Invalid image position
Invalid image position


 21%|██        | 13477/64213 [02:21<08:29, 99.64it/s] 

Invalid image position


 21%|██        | 13539/64213 [02:22<10:36, 79.58it/s]

Invalid image position


 21%|██        | 13571/64213 [02:22<09:02, 93.39it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13822/64213 [02:25<10:20, 81.26it/s] 

Invalid image position


 22%|██▏       | 13858/64213 [02:25<08:31, 98.48it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13895/64213 [02:26<07:25, 112.84it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 23%|██▎       | 14652/64213 [02:33<09:16, 89.08it/s] 

Invalid image position
Invalid image position


 23%|██▎       | 14844/64213 [02:35<06:10, 133.32it/s]

Invalid image position


 23%|██▎       | 14961/64213 [02:37<11:48, 69.49it/s] 

Invalid image position
Invalid image position


 23%|██▎       | 15001/64213 [02:37<07:54, 103.61it/s]

Invalid image position
Invalid image position


 23%|██▎       | 15077/64213 [02:38<08:16, 98.99it/s] 

Invalid image position


 24%|██▎       | 15120/64213 [02:38<11:25, 71.63it/s] 

Invalid image position


 24%|██▍       | 15474/64213 [02:42<08:54, 91.17it/s] 

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 24%|██▍       | 15557/64213 [02:43<08:10, 99.11it/s]

Invalid image position


 24%|██▍       | 15621/64213 [02:44<08:04, 100.34it/s]

Invalid image position
Invalid image position


 25%|██▍       | 15938/64213 [02:48<06:51, 117.38it/s]

Invalid image position
Invalid image position


 25%|██▌       | 16137/64213 [02:49<06:01, 133.01it/s]

Invalid image position


 26%|██▌       | 16440/64213 [02:52<07:39, 103.88it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17076/64213 [02:59<06:43, 116.84it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17295/64213 [03:01<07:38, 102.29it/s]

Invalid image position
Invalid image position
Invalid image position


 27%|██▋       | 17546/64213 [03:04<09:08, 85.15it/s] 

Invalid image position
Invalid image position


 28%|██▊       | 17701/64213 [03:05<05:37, 137.71it/s]

Invalid image position


 28%|██▊       | 17972/64213 [03:08<08:03, 95.60it/s] 

Invalid image position
Invalid image position


 28%|██▊       | 18224/64213 [03:11<07:05, 107.99it/s]

Invalid image position
Invalid image position
Invalid image position


 28%|██▊       | 18268/64213 [03:11<08:02, 95.16it/s] 

Invalid image position
Invalid image position
Invalid image position


 29%|██▉       | 18643/64213 [03:15<08:04, 94.10it/s] 

Invalid image position
Invalid image position


 29%|██▉       | 18707/64213 [03:16<08:52, 85.52it/s]

Invalid image position
Invalid image position


 30%|██▉       | 19136/64213 [03:20<06:26, 116.52it/s]

Invalid image position


 30%|███       | 19362/64213 [03:23<09:34, 78.04it/s] 

Invalid image position


 30%|███       | 19431/64213 [03:24<08:55, 83.59it/s]

Invalid image position


 30%|███       | 19486/64213 [03:24<06:46, 110.12it/s]

Invalid image position
Invalid image position


 30%|███       | 19573/64213 [03:25<08:00, 92.95it/s] 

Invalid image position


 31%|███       | 19713/64213 [03:27<05:32, 133.89it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 31%|███       | 19991/64213 [03:29<06:54, 106.61it/s]

Invalid image position
Invalid image position


 31%|███       | 20038/64213 [03:30<09:29, 77.51it/s] 

Invalid image position
Invalid image position


 32%|███▏      | 20480/64213 [03:34<08:13, 88.70it/s] 

Invalid image position
Invalid image position


 32%|███▏      | 20720/64213 [03:37<05:01, 144.33it/s]

Invalid image position


 32%|███▏      | 20756/64213 [03:37<06:31, 111.08it/s]

Invalid image position


 32%|███▏      | 20798/64213 [03:38<07:01, 103.07it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20837/64213 [03:38<06:30, 111.09it/s]

Invalid image position


 33%|███▎      | 21012/64213 [03:40<08:20, 86.32it/s] 

Invalid image position


 33%|███▎      | 21077/64213 [03:41<08:02, 89.48it/s]

Invalid image position


 33%|███▎      | 21344/64213 [03:44<08:06, 88.05it/s] 

Invalid image position
Invalid image position


 34%|███▍      | 21861/64213 [03:49<06:11, 113.93it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23013/64213 [04:00<06:50, 100.37it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23269/64213 [04:03<05:44, 118.70it/s]

Invalid image position


 37%|███▋      | 23605/64213 [04:07<05:11, 130.34it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 38%|███▊      | 24307/64213 [04:14<05:38, 117.91it/s]

Invalid image position
Invalid image position


 38%|███▊      | 24427/64213 [04:15<06:01, 110.08it/s]

Invalid image position


 39%|███▊      | 24735/64213 [04:18<05:41, 115.62it/s]

Invalid image position


 39%|███▉      | 25105/64213 [04:22<07:21, 88.67it/s] 

Invalid image position
Invalid image position


 40%|███▉      | 25494/64213 [04:26<05:34, 115.65it/s]

Invalid image position


 40%|███▉      | 25598/64213 [04:27<05:38, 114.12it/s]

Invalid image position


 40%|████      | 25846/64213 [04:30<08:02, 79.51it/s] 

Invalid image position


 41%|████      | 26134/64213 [04:32<05:40, 111.88it/s]

Invalid image position
Invalid image position


 41%|████      | 26247/64213 [04:34<06:39, 94.97it/s] 

Invalid image position
Invalid image position


 41%|████      | 26318/64213 [04:35<06:14, 101.11it/s]

Invalid image position
Invalid image position


 41%|████▏     | 26605/64213 [04:37<06:03, 103.33it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26661/64213 [04:38<05:35, 111.94it/s]

Invalid image position


 42%|████▏     | 26716/64213 [04:39<06:24, 97.54it/s] 

Invalid image position
Invalid image position


 42%|████▏     | 26757/64213 [04:39<05:43, 109.07it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26980/64213 [04:41<07:08, 86.85it/s] 

Invalid image position


 44%|████▎     | 28005/64213 [04:51<06:17, 95.85it/s] 

Invalid image position


 44%|████▍     | 28137/64213 [04:52<07:13, 83.29it/s] 

Invalid image position
Invalid image position


 44%|████▍     | 28210/64213 [04:53<05:06, 117.34it/s]

Invalid image position
Invalid image position


 44%|████▍     | 28527/64213 [04:56<04:31, 131.24it/s]

Invalid image position
Invalid image position


 45%|████▍     | 28845/64213 [04:59<05:37, 104.72it/s]

Invalid image position


 45%|████▌     | 28942/64213 [05:00<06:38, 88.51it/s] 

Invalid image position
Invalid image position


 45%|████▌     | 29035/64213 [05:01<04:47, 122.49it/s]

Invalid image position


 45%|████▌     | 29077/64213 [05:01<04:59, 117.23it/s]

Invalid image position
Invalid image position


 46%|████▌     | 29410/64213 [05:04<06:13, 93.23it/s] 

Invalid image position


 46%|████▌     | 29589/64213 [05:06<05:44, 100.56it/s]

Invalid image position


 47%|████▋     | 30032/64213 [05:10<04:31, 125.81it/s]

Invalid image position
Invalid image position


 47%|████▋     | 30117/64213 [05:11<05:20, 106.40it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 47%|████▋     | 30434/64213 [05:14<05:36, 100.33it/s]

Invalid image position
Invalid image position


 48%|████▊     | 30821/64213 [05:18<07:14, 76.79it/s] 

Invalid image position
Invalid image position


 48%|████▊     | 30854/64213 [05:18<05:43, 97.16it/s]

Invalid image position


 48%|████▊     | 30876/64213 [05:19<06:19, 87.91it/s]

Invalid image position


 49%|████▊     | 31284/64213 [05:22<04:13, 129.85it/s]

Invalid image position


 49%|████▉     | 31567/64213 [05:26<05:25, 100.35it/s]

Invalid image position


 49%|████▉     | 31624/64213 [05:26<06:24, 84.71it/s] 

Invalid image position
Invalid image position


 49%|████▉     | 31772/64213 [05:28<06:20, 85.17it/s]

Invalid image position
Invalid image position


 50%|█████     | 32107/64213 [05:31<05:23, 99.18it/s] 

Invalid image position
Invalid image position


 50%|█████     | 32400/64213 [05:35<04:17, 123.61it/s]

Invalid image position


 51%|█████     | 32877/64213 [05:39<05:45, 90.70it/s] 

Invalid image position
Invalid image position
Invalid image position


 52%|█████▏    | 33118/64213 [05:42<04:11, 123.87it/s]

Invalid image position


 52%|█████▏    | 33131/64213 [05:42<04:53, 105.85it/s]

Invalid image position
Invalid image position


 52%|█████▏    | 33345/64213 [05:44<04:22, 117.71it/s]

Invalid image position


 53%|█████▎    | 33868/64213 [05:50<04:21, 115.87it/s]

Invalid image position


 53%|█████▎    | 34015/64213 [05:51<04:11, 119.92it/s]

Invalid image position
Invalid image position


 53%|█████▎    | 34223/64213 [05:54<05:36, 89.04it/s] 

Invalid image position


 53%|█████▎    | 34296/64213 [05:54<04:30, 110.58it/s]

Invalid image position


 53%|█████▎    | 34308/64213 [05:55<04:26, 112.14it/s]

Invalid image position


 54%|█████▎    | 34400/64213 [05:56<05:54, 84.20it/s] 

Invalid image position
Invalid image position


 54%|█████▎    | 34458/64213 [05:56<05:41, 87.23it/s]

Invalid image position
Invalid image position


 54%|█████▍    | 34947/64213 [06:01<05:09, 94.48it/s] 

Invalid image position


 55%|█████▌    | 35359/64213 [06:06<05:54, 81.30it/s] 

Invalid image position


 56%|█████▌    | 35975/64213 [06:12<04:25, 106.23it/s]

Invalid image position


 57%|█████▋    | 36333/64213 [06:16<04:06, 112.91it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36393/64213 [06:16<05:07, 90.47it/s] 

Invalid image position
Invalid image position


 57%|█████▋    | 36450/64213 [06:17<03:37, 127.40it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36730/64213 [06:20<08:47, 52.09it/s] 

Invalid image position
Invalid image position


 58%|█████▊    | 37405/64213 [06:27<05:13, 85.63it/s] 

Invalid image position
Invalid image position


 58%|█████▊    | 37458/64213 [06:27<04:26, 100.31it/s]

Invalid image position


 59%|█████▊    | 37696/64213 [06:29<03:35, 123.04it/s]

Invalid image position


 59%|█████▉    | 37848/64213 [06:31<04:32, 96.76it/s] 

Invalid image position
Invalid image position


 59%|█████▉    | 37959/64213 [06:32<03:40, 119.32it/s]

Invalid image position
Invalid image position


 59%|█████▉    | 37985/64213 [06:32<04:05, 107.02it/s]

Invalid image position


 60%|█████▉    | 38422/64213 [06:38<04:42, 91.29it/s] 

Invalid image position
Invalid image position


 60%|█████▉    | 38470/64213 [06:38<04:43, 90.72it/s] 

Invalid image position


 60%|██████    | 38619/64213 [06:40<03:24, 125.26it/s]

Invalid image position
Invalid image position


 60%|██████    | 38813/64213 [06:42<03:45, 112.87it/s]

Invalid image position
Invalid image position


 61%|██████    | 38853/64213 [06:42<04:06, 103.09it/s]

Invalid image position


 62%|██████▏   | 39496/64213 [06:49<03:48, 108.13it/s]

Invalid image position
Invalid image position


 62%|██████▏   | 39789/64213 [06:52<04:53, 83.26it/s] 

Invalid image position
Invalid image position


 63%|██████▎   | 40224/64213 [06:56<03:56, 101.35it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40475/64213 [06:58<03:30, 112.71it/s]

Invalid image position


 63%|██████▎   | 40523/64213 [06:59<04:35, 86.11it/s] 

Invalid image position
Invalid image position


 64%|██████▎   | 40777/64213 [07:02<03:42, 105.34it/s]

Invalid image position
Invalid image position


 64%|██████▎   | 40816/64213 [07:02<03:52, 100.58it/s]

Invalid image position
Invalid image position


 64%|██████▍   | 40949/64213 [07:04<03:18, 117.38it/s]

Image does not exist
Image does not exist


 64%|██████▍   | 41012/64213 [07:04<02:43, 142.11it/s]

Image does not exist


 64%|██████▍   | 41081/64213 [07:05<02:47, 138.00it/s]

Invalid image position


 64%|██████▍   | 41375/64213 [07:08<03:40, 103.55it/s]

Image does not exist
Image does not exist


 65%|██████▍   | 41708/64213 [07:11<02:35, 144.96it/s]

Invalid image position


 66%|██████▌   | 42196/64213 [07:15<03:43, 98.37it/s] 

Invalid image position


 66%|██████▌   | 42370/64213 [07:17<02:59, 121.74it/s]

Invalid image position
Invalid image position


 66%|██████▌   | 42441/64213 [07:17<03:06, 116.56it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 42836/64213 [07:21<02:52, 123.66it/s]

Invalid image position


 67%|██████▋   | 43028/64213 [07:23<03:29, 101.09it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 43150/64213 [07:24<02:59, 117.08it/s]

Invalid image position


 68%|██████▊   | 43519/64213 [07:28<03:54, 88.29it/s] 

Invalid image position
Invalid image position


 68%|██████▊   | 43803/64213 [07:30<02:59, 113.79it/s]

Invalid image position


 68%|██████▊   | 43921/64213 [07:31<03:05, 109.53it/s]

Invalid image position
Invalid image position
Invalid image position


 68%|██████▊   | 43983/64213 [07:32<02:22, 141.87it/s]

Image does not exist


 69%|██████▉   | 44184/64213 [07:34<04:19, 77.11it/s] 

Invalid image position
Invalid image position


 69%|██████▉   | 44207/64213 [07:34<03:37, 92.03it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44501/64213 [07:38<04:13, 77.89it/s] 

Invalid image position


 69%|██████▉   | 44530/64213 [07:38<03:17, 99.47it/s]

Invalid image position
Invalid image position


 70%|██████▉   | 44657/64213 [07:39<02:42, 120.60it/s]

Invalid image position


 70%|██████▉   | 44838/64213 [07:41<03:41, 87.38it/s] 

Invalid image position
Invalid image position


 70%|███████   | 45191/64213 [07:44<03:22, 93.95it/s] 

Invalid image position
Invalid image position


 70%|███████   | 45253/64213 [07:45<02:46, 114.15it/s]

Invalid image position


 71%|███████   | 45369/64213 [07:46<03:52, 81.09it/s] 

Invalid image position


 71%|███████   | 45453/64213 [07:47<02:23, 130.80it/s]

Invalid image position


 71%|███████   | 45645/64213 [07:49<02:53, 107.02it/s]

Invalid image position
Invalid image position


 71%|███████   | 45723/64213 [07:50<04:04, 75.67it/s] 

Invalid image position
Invalid image position


 71%|███████▏  | 45771/64213 [07:51<03:27, 89.09it/s]

Invalid image position


 71%|███████▏  | 45868/64213 [07:51<02:26, 125.41it/s]

Invalid image position
Invalid image position


 72%|███████▏  | 46028/64213 [07:53<03:04, 98.36it/s] 

Invalid image position
Invalid image position


 72%|███████▏  | 46192/64213 [07:55<03:04, 97.52it/s] 

Invalid image position


 73%|███████▎  | 46747/64213 [08:01<03:36, 80.58it/s] 

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 73%|███████▎  | 46802/64213 [08:02<03:07, 92.97it/s]

Invalid image position


 73%|███████▎  | 47024/64213 [08:04<03:05, 92.66it/s] 

Invalid image position
Invalid image position


 74%|███████▎  | 47332/64213 [08:06<02:13, 126.62it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47418/64213 [08:07<02:42, 103.07it/s]

Invalid image position


 74%|███████▍  | 47523/64213 [08:08<02:28, 112.03it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47606/64213 [08:09<02:45, 100.34it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 74%|███████▍  | 47714/64213 [08:11<03:58, 69.14it/s] 

Invalid image position
Invalid image position


 75%|███████▍  | 47942/64213 [08:14<03:33, 76.21it/s]

Invalid image position


 75%|███████▌  | 48169/64213 [08:16<02:42, 98.68it/s] 

Invalid image position


 76%|███████▌  | 48493/64213 [08:19<02:44, 95.72it/s] 

Invalid image position


 76%|███████▌  | 48753/64213 [08:22<02:50, 90.57it/s] 

Invalid image position
Invalid image position


 76%|███████▌  | 48854/64213 [08:23<02:21, 108.78it/s]

Invalid image position


 77%|███████▋  | 49336/64213 [08:27<02:22, 104.47it/s]

Invalid image position
Invalid image position


 77%|███████▋  | 49456/64213 [08:29<02:14, 109.49it/s]

Invalid image position


 77%|███████▋  | 49656/64213 [08:30<01:44, 139.04it/s]

Invalid image position


 78%|███████▊  | 50008/64213 [08:33<01:44, 135.55it/s]

Invalid image position
Invalid image position


 78%|███████▊  | 50068/64213 [08:34<02:18, 101.90it/s]

Invalid image position
Invalid image position


 78%|███████▊  | 50295/64213 [08:36<02:16, 102.32it/s]

Invalid image position
Invalid image position


 79%|███████▉  | 50630/64213 [08:39<01:56, 116.66it/s]

Invalid image position


 79%|███████▉  | 50866/64213 [08:41<02:15, 98.40it/s] 

Image does not exist
Image does not exist


 80%|███████▉  | 51127/64213 [08:43<02:06, 103.12it/s]

Invalid image position
Invalid image position


 80%|███████▉  | 51310/64213 [08:45<02:03, 104.73it/s]

Invalid image position


 81%|████████  | 51817/64213 [08:51<01:59, 104.16it/s]

Invalid image position
Invalid image position
Invalid image position


 81%|████████  | 51869/64213 [08:51<01:30, 137.13it/s]

Invalid image position
Invalid image position


 81%|████████  | 52166/64213 [08:54<01:57, 102.50it/s]

Invalid image position
Invalid image position


 81%|████████▏ | 52281/64213 [08:56<02:04, 95.92it/s] 

Invalid image position
Invalid image position


 81%|████████▏ | 52321/64213 [08:56<02:10, 91.02it/s]

Invalid image position


 82%|████████▏ | 52642/64213 [08:59<01:28, 130.45it/s]

Invalid image position
Invalid image position


 82%|████████▏ | 52681/64213 [08:59<02:04, 92.83it/s] 

Invalid image position
Invalid image position


 82%|████████▏ | 52782/64213 [09:00<01:28, 129.52it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53014/64213 [09:02<02:12, 84.72it/s] 

Invalid image position
Invalid image position


 83%|████████▎ | 53066/64213 [09:03<01:46, 104.43it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53593/64213 [09:09<02:22, 74.42it/s] 

Invalid image position
Invalid image position


 84%|████████▍ | 53831/64213 [09:11<01:28, 117.04it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 54038/64213 [09:13<01:28, 115.53it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 54083/64213 [09:14<01:24, 120.52it/s]

Invalid image position


 84%|████████▍ | 54106/64213 [09:14<01:10, 143.14it/s]

Invalid image position


 85%|████████▍ | 54537/64213 [09:18<01:13, 131.86it/s]

Invalid image position


 85%|████████▌ | 54687/64213 [09:20<01:32, 103.19it/s]

Invalid image position
Invalid image position


 85%|████████▌ | 54742/64213 [09:20<01:20, 117.67it/s]

Invalid image position
Invalid image position


 86%|████████▌ | 54980/64213 [09:22<01:17, 119.26it/s]

Invalid image position


 86%|████████▌ | 55134/64213 [09:24<01:15, 119.50it/s]

Invalid image position


 86%|████████▌ | 55232/64213 [09:25<01:06, 135.79it/s]

Invalid image position


 86%|████████▌ | 55381/64213 [09:26<01:51, 78.89it/s] 

Invalid image position


 86%|████████▋ | 55498/64213 [09:27<01:19, 109.92it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55616/64213 [09:29<01:48, 78.97it/s] 

Invalid image position


 87%|████████▋ | 55930/64213 [09:32<01:12, 114.32it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55971/64213 [09:32<01:33, 88.44it/s] 

Invalid image position
Invalid image position


 88%|████████▊ | 56285/64213 [09:35<01:10, 113.09it/s]

Invalid image position


 88%|████████▊ | 56454/64213 [09:37<01:27, 89.09it/s] 

Invalid image position


 88%|████████▊ | 56766/64213 [09:40<00:53, 139.75it/s]

Invalid image position


 89%|████████▉ | 57350/64213 [09:46<01:14, 92.20it/s] 

Invalid image position
Invalid image position


 89%|████████▉ | 57458/64213 [09:47<00:58, 115.77it/s]

Invalid image position
Invalid image position


 90%|████████▉ | 57585/64213 [09:48<01:03, 105.07it/s]

Invalid image position
Invalid image position


 90%|█████████ | 57882/64213 [09:51<00:56, 112.90it/s]

Invalid image position


 91%|█████████ | 58269/64213 [09:55<00:47, 124.54it/s]

Invalid image position
Invalid image position


 91%|█████████ | 58332/64213 [09:55<00:50, 116.88it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58623/64213 [09:58<01:14, 75.54it/s] 

Invalid image position
Invalid image position


 91%|█████████▏| 58673/64213 [09:59<01:17, 71.77it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58707/64213 [09:59<01:16, 72.02it/s]

Image does not exist
Image does not exist


 92%|█████████▏| 58872/64213 [10:01<00:58, 91.70it/s] 

Invalid image position


 92%|█████████▏| 59072/64213 [10:02<00:31, 164.92it/s]

Image does not exist
Image does not exist


 93%|█████████▎| 59606/64213 [10:07<00:40, 114.62it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 94%|█████████▎| 60167/64213 [10:13<00:43, 93.53it/s] 

Invalid image position
Invalid image position


 94%|█████████▍| 60288/64213 [10:14<00:34, 113.94it/s]

Invalid image position


 94%|█████████▍| 60416/64213 [10:15<00:27, 139.57it/s]

Invalid image position


 94%|█████████▍| 60466/64213 [10:16<00:28, 132.89it/s]

Invalid image position
Invalid image position


 95%|█████████▍| 60804/64213 [10:19<00:26, 127.75it/s]

Invalid image position
Invalid image position


 95%|█████████▍| 60904/64213 [10:20<00:28, 116.15it/s]

Invalid image position
Invalid image position


 95%|█████████▌| 61026/64213 [10:21<00:33, 94.98it/s] 

Invalid image position
Invalid image position


 95%|█████████▌| 61290/64213 [10:24<00:33, 88.55it/s] 

Invalid image position


 96%|█████████▌| 61329/64213 [10:24<00:30, 95.97it/s]

Invalid image position


 96%|█████████▌| 61525/64213 [10:26<00:19, 135.50it/s]

Invalid image position
Invalid image position


 96%|█████████▌| 61599/64213 [10:27<00:21, 122.97it/s]

Invalid image position
Invalid image position
Invalid image position


 96%|█████████▌| 61697/64213 [10:28<00:28, 88.65it/s] 

Invalid image position
Invalid image position


 97%|█████████▋| 62116/64213 [10:33<00:21, 99.46it/s] 

Invalid image position
Invalid image position


 97%|█████████▋| 62248/64213 [10:34<00:22, 86.84it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62312/64213 [10:35<00:17, 109.03it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62445/64213 [10:36<00:13, 134.69it/s]

Image does not exist


 97%|█████████▋| 62473/64213 [10:36<00:17, 97.78it/s] 

Image does not exist
Image does not exist


 97%|█████████▋| 62546/64213 [10:37<00:14, 118.72it/s]

Image does not exist


 98%|█████████▊| 62660/64213 [10:38<00:16, 93.13it/s] 

Invalid image position


 98%|█████████▊| 62757/64213 [10:39<00:19, 75.95it/s] 

Invalid image position
Invalid image position


 98%|█████████▊| 62847/64213 [10:40<00:14, 93.97it/s]

Invalid image position
Invalid image position


 99%|█████████▊| 63292/64213 [10:45<00:12, 72.80it/s] 

Invalid image position
Invalid image position


 99%|█████████▉| 63697/64213 [10:49<00:04, 123.94it/s]

Invalid image position


 99%|█████████▉| 63781/64213 [10:50<00:03, 109.91it/s]

Invalid image position


 99%|█████████▉| 63804/64213 [10:50<00:04, 90.70it/s] 

Invalid image position
Invalid image position


100%|█████████▉| 64153/64213 [10:53<00:00, 112.97it/s]

Invalid image position


100%|██████████| 64213/64213 [10:54<00:00, 98.09it/s] 
 19%|█▉        | 96/496 [00:01<00:08, 49.42it/s] 

Invalid image position
Invalid image position


100%|██████████| 496/496 [00:05<00:00, 91.27it/s] 
 10%|▉         | 29/293 [00:00<00:08, 29.57it/s]

Invalid image position
Invalid image position


 28%|██▊       | 83/293 [00:02<00:07, 29.32it/s]

Invalid image position


 54%|█████▍    | 159/293 [00:05<00:05, 25.71it/s]

Invalid image position


 58%|█████▊    | 171/293 [00:06<00:03, 36.19it/s]

Invalid image position
Invalid image position


 76%|███████▌  | 223/293 [00:07<00:01, 36.53it/s]

Invalid image position
Invalid image position


 89%|████████▊ | 260/293 [00:08<00:01, 32.95it/s]

Invalid image position
Invalid image position


100%|██████████| 293/293 [00:09<00:00, 29.61it/s]


In [25]:
l = []

In [26]:
with open("MIMIC_JPG_train_trimmed.tsv", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for item in reader:
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")
        l.append((cur_img_count, next_img_count))


FileNotFoundError: [Errno 2] No such file or directory: 'MIMIC_JPG_train_trimmed.tsv'

In [28]:
LIMIT = 2

with open("MIMIC_JPG_test.tsv", "r") as f, open("MIMIC_JPG_test_trimmed.tsv", "w") as f_write:
    reader = csv.reader(f, delimiter="\t")
    writer = csv.writer(f_write, delimiter="\t")
    for item in tqdm(reader):
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")

        if cur_img_count > LIMIT:
            cur_img = "[VIEW_DELIM]".join(cur_img.split("[VIEW_DELIM]")[:LIMIT])
        if next_img_count > LIMIT:
            next_img = "[VIEW_DELIM]".join(next_img.split("[VIEW_DELIM]")[:LIMIT])

        views = cur_img.split("[VIEW_DELIM]")
        views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
        cur_img = "[VIEW_DELIM]".join(views)

        if next_img_count != 0:
            views = next_img.split("[VIEW_DELIM]")
            views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
            next_img = "[VIEW_DELIM]".join(views)

        if "[NEXT_IMG]" in item[-1]:
            cur_img = f"{cur_img}[NEXT_IMG]{next_img}"

        item[-1] = cur_img
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")

        item = (*item, cur_img_count, next_img_count)

        writer.writerow(item)

2934it [00:00, 12506.52it/s]


In [None]:
from collections import Counter
c = Counter(l)

In [75]:
c

Counter({(1, 1): 52654,
         (2, 2): 40642,
         (2, 0): 34681,
         (1, 2): 25935,
         (2, 1): 25884,
         (1, 0): 8285})

In [61]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28532,
         (1, 2): 21655,
         (2, 1): 21549,
         (1, 0): 8285,
         (3, 0): 5950,
         (2, 3): 4962,
         (3, 2): 4950,
         (3, 1): 4335,
         (1, 3): 4280,
         (3, 3): 2198})

In [47]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28534,
         (1, 2): 21655,
         (2, 1): 21550,
         (1, 0): 8285,
         (3, 0): 5378,
         (2, 3): 4514,
         (3, 2): 4501,
         (3, 1): 3869,
         (1, 3): 3794,
         (3, 3): 1646,
         (4, 0): 544,
         (1, 4): 439,
         (4, 1): 429,
         (4, 2): 422,
         (2, 4): 421,
         (4, 3): 240,
         (3, 4): 222,
         (4, 4): 59,
         (1, 5): 36,
         (5, 1): 30,
         (5, 2): 22,
         (5, 0): 20,
         (2, 5): 17,
         (2, 6): 9,
         (5, 3): 9,
         (6, 0): 8,
         (1, 6): 6,
         (3, 5): 6,
         (6, 1): 5,
         (4, 5): 4,
         (6, 6): 3,
         (6, 4): 3,
         (1, 8): 3,
         (6, 2): 3,
         (1, 7): 2,
         (4, 6): 2,
         (5, 4): 2,
         (7, 6): 1,
         (8, 4): 1,
         (9, 1): 1,
         (8, 1): 1,
         (8, 2): 1,
         (2, 11): 1,
         (11, 2): 1})