In [1]:
import os
import os.path as osp
from glob import glob
import re
from pprint import pprint
from tqdm import tqdm
import pydicom as dicom
import random
import csv

PATHS = [f'/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p{s}' for s in range(10, 20)]
TEXT_PATH_ROOT = '/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/'

get_report_path = lambda img_path: Path.joinpath(TEXT_ROOT, img_path.parents[0]).with_suffix(".txt")

def check_jpg_extension(filename):
    pattern = r"\.jpg$"
    if re.search(pattern, filename):
        return True
    else:
        return False

In [2]:
import csv

splits = {
    "train": set(),
    "validate": set(),
    "test": set(),
}
with open("/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic-cxr-2.0.0-split.csv", "r") as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        if i == 0:
            continue
        splits[row[-1]].add(f"p{row[-2]}")

In [3]:
[len(splits[x]) for x in list(splits.keys())]

[64586, 500, 293]

In [4]:
ALL_IMG_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

ALL_TXT_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

In [5]:
get_txt_path = lambda s: TEXT_PATH_ROOT + "/".join(s.split("/")[10:-1]).split(".")[0] + ".txt"
get_new_img_path = lambda s: "/".join(s.split("/")[10:])

In [6]:
for PATH in PATHS:
    top_dirs = [dir for dir in os.listdir(PATH) if dir.find(".") == -1]
    for dir_name in tqdm(top_dirs):
        paths = os.listdir(osp.join(PATH, dir_name))
        img_dirs = [osp.join(PATH, dir_name, dir) for dir in paths if dir.find(".") == -1]
        for img_dir in img_dirs:
            img_paths = [osp.join(img_dir, img_name) for img_name in os.listdir(img_dir)]
            img_paths = [path for path in img_paths if check_jpg_extension(path)]
            for sname, split_set in splits.items():
                if dir_name in split_set:
                    ALL_IMG_PATHS[sname].extend([get_new_img_path(s) for s in img_paths])
                    ALL_TXT_PATHS[sname].extend([get_txt_path(s) for s in img_paths])
                    break
            else:
                assert False, "No split is defined"
                

100%|██████████| 6396/6396 [00:13<00:00, 458.88it/s]
100%|██████████| 6571/6571 [00:49<00:00, 132.22it/s]
100%|██████████| 6526/6526 [00:50<00:00, 129.86it/s]
100%|██████████| 6548/6548 [00:56<00:00, 116.17it/s]
100%|██████████| 6506/6506 [00:51<00:00, 125.71it/s]
100%|██████████| 6592/6592 [00:54<00:00, 120.09it/s]
100%|██████████| 6476/6476 [00:55<00:00, 117.35it/s]
100%|██████████| 6642/6642 [00:56<00:00, 118.14it/s]
100%|██████████| 6543/6543 [00:55<00:00, 117.49it/s]
100%|██████████| 6579/6579 [00:54<00:00, 120.52it/s]


In [7]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        new_subdir = subdirs[9][:3]
        subdirs.insert(9, new_subdir)
        path = "/".join(subdirs)
        split[p_idx] = path

In [8]:
ALL_TXT_PATHS_PATIENT = {"train": {}, "validate": {}, "test": {}}

In [10]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        patient_id = subdirs[10]
        study_id = subdirs[11]
        if patient_id not in ALL_TXT_PATHS_PATIENT[sname]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id] = {study_id: path}
        elif study_id not in ALL_TXT_PATHS_PATIENT[sname][patient_id]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id][study_id] = path

In [11]:
idx = random.randint(0, len(ALL_TXT_PATHS)-1)
f = open("/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/p10/p10975446/s58917552.txt", "r")
text = f.read()
f.close()
print(text)

                                 FINAL REPORT
 PORTABLE CHEST, ___
 
 COMPARISON:  ___ chest x-ray.
 
 FINDINGS:  Cardiac silhouette remains enlarged and is accompanied by
 persistent pulmonary vascular congestion and interstitial edema.  Patchy
 bibasilar atelectasis also appears similar compared to the prior study.



In [12]:
def clean_section(text):
    text = re.sub(r'[\S]+:', '', text)
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [13]:
import string
def preprocess_report(text):
    # Remove unnecessary and insensible parts
    text = re.sub(r"EXAMINATION:.*", "", text)  # Remove EXAMINATION line
    text = re.sub(r"WET READ:.*", "", text)  # Remove WET READ line
    text = re.sub(r"FINAL REPORT", "", text)  # Remove FINAL REPORT line
    text = re.sub(r"STUDY:.*", "", text)  # Remove STUDY line
    text = re.sub(r"COMPARISON:.*", "", text)  # Remove COMPARISON section
    text = re.sub(r"TECHNIQUE:.*", "", text)  # Remove TECHNIQUE section
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores

    # Clean up excessive newlines and spaces
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [14]:
def extract_sections(report_text):
    findings_pattern = r'FINDINGS:[\s\S]*:'
    impression_pattern = r'IMPRESSION:[\s\S]*'

    findings_match = re.search(findings_pattern, report_text, re.IGNORECASE)
    impression_match = re.search(impression_pattern, report_text, re.IGNORECASE)

    findings = findings_match.group().strip() if findings_match else None
    impression = impression_match.group().strip() if impression_match else None

    if findings is None and impression is None:
        return preprocess_report(report_text)
    elif impression is None:
        return clean_section(findings)
    elif findings is None:
        return clean_section(impression)
    else:
        return clean_section(findings) + " " + clean_section(impression)

    return findings, impression

In [15]:
img_positions = {}

In [16]:
frontal = {"antero-posterior", "postero-anterior", ""}
lateral = {"lateral", "left lateral"}

In [17]:
with open("../data/mimic-cxr-2.0.0-metadata.csv") as f:
    lines = [line.rstrip() for line in f][1:]
    lines = [line for line in lines if line]
    
    for line in lines:
        fields = line.split(",")
        path = osp.join(f"p{fields[1][:2]}", f"p{fields[1]}", f"s{fields[2]}", f"{fields[0]}.jpg")
        if f"s{fields[2]}" not in img_positions:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"] = [(path, "frontal")]
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"] = [(path, "lateral")]
        else:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"].append((path, "frontal"))
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"].append((path, "lateral"))


In [18]:
list(img_positions.items())[:5]

[('s50414267',
  [('p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg',
    'frontal'),
   ('p10/p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg',
    'lateral')]),
 ('s53189527',
  [('p10/p10000032/s53189527/2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab.jpg',
    'frontal'),
   ('p10/p10000032/s53189527/e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c.jpg',
    'lateral')]),
 ('s53911762',
  [('p10/p10000032/s53911762/68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714.jpg',
    'frontal'),
   ('p10/p10000032/s53911762/fffabebf-74fd3a1f-673b6b41-96ec0ac9-2ab69818.jpg',
    'frontal')]),
 ('s56699142',
  [('p10/p10000032/s56699142/ea030e7a-2e3b1346-bc518786-7a8fd698-f673b44c.jpg',
    'frontal')]),
 ('s57375967',
  [('p10/p10000764/s57375967/096052b7-d256dc40-453a102b-fa7d01c6-1b22c6b4.jpg',
    'frontal'),
   ('p10/p10000764/s57375967/b79e55c3-735ce5ac-64412506-cdc9ea79-f1af521f.jpg',
    'lateral'),
   ('p10/p10000764/s57375967/dcfeeac4-1597e318-d0e673

In [19]:
lines[0].split(",")

['02aa804e-bde0afdd-112c0b34-7bc16630-4e384014',
 '10000032',
 '50414267',
 'CHEST (PA AND LAT)',
 'PA',
 '3056',
 '2544',
 '21800506',
 '213014.53100000002',
 'CHEST (PA AND LAT)',
 'postero-anterior',
 'Erect']

In [20]:
ALL_IMG_PATHS["train"][0]

'p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg'

In [21]:
extract_sections(text)

'PORTABLE CHEST, FINDINGS: Cardiac silhouette remains enlarged and is accompanied by persistent pulmonary vascular congestion and interstitial edema. Patchy bibasilar atelectasis also appears similar compared to the prior study.'

In [22]:
def get_report_from_path(path):
    try:
        f = open(path, "r")
        txt = extract_sections(f.read().strip())
        f.close()
    except FileNotFoundError as e:
        print(str(e))
        return None
    return txt

In [23]:
def images_n_locs_2_txt(study_id):
    study_id = study_id.split(".")[0]
    images = img_positions[study_id]

    strings = []
    for img_path_pairs in images:
        strings.append("[LOC_DELIM]".join(img_path_pairs))

    return "[VIEW_DELIM]".join(strings)

In [24]:
import csv

for sname, split in ALL_TXT_PATHS_PATIENT.items():
    with open(f"MIMIC_JPG_{sname[:5]}.tsv", 'w') as f:
        writer = csv.writer(f, delimiter='\t')

        for patient_id, studies in tqdm(split.items()):
            studies = list(studies.items())
            study_lim = len(studies) if len(studies) == 1 else len(studies) - 1
    
            if study_lim == 1:
                cur_study_id, cur_report_path = studies[0]
                cur_txt = get_report_from_path(cur_report_path)

                try:
                    cur_image_info = images_n_locs_2_txt(cur_study_id)
                except:
                    print("Invalid image position")
                    continue

                writer.writerow([cur_txt, cur_image_info])
            else:
                for study_idx in range(study_lim):
                    cur_study_id, cur_report_path = studies[study_idx]
                    next_study_id, next_report_path = studies[study_idx+1]
    
                    cur_txt = get_report_from_path(cur_report_path)
                    next_txt = get_report_from_path(next_report_path)
    
                    combined_txt = f"{cur_txt}[NEXT_TXT]{next_txt}"

                    try:
                        cur_image_info = images_n_locs_2_txt(cur_study_id)
                        next_image_info = images_n_locs_2_txt(next_study_id)
                    except:
                        print("Invalid image position")
                        continue

                    combined_img_info = f"{cur_image_info}[NEXT_IMG]{next_image_info}"
                    writer.writerow([combined_txt, combined_img_info])


  0%|          | 33/64213 [00:00<22:33, 47.41it/s]

Invalid image position
Invalid image position


  0%|          | 120/64213 [00:02<16:43, 63.85it/s]

Invalid image position


  0%|          | 237/64213 [00:04<16:28, 64.69it/s]

Invalid image position


  1%|          | 531/64213 [00:08<15:18, 69.31it/s]

Invalid image position
Invalid image position


  1%|          | 547/64213 [00:08<16:01, 66.20it/s]

Invalid image position


  1%|          | 633/64213 [00:10<18:00, 58.85it/s]

Invalid image position


  2%|▏         | 1336/64213 [00:21<20:16, 51.68it/s]

Invalid image position
Invalid image position


  2%|▏         | 1370/64213 [00:22<16:53, 62.02it/s]

Invalid image position
Invalid image position


  2%|▏         | 1385/64213 [00:22<17:55, 58.40it/s]

Invalid image position


  3%|▎         | 1790/64213 [00:30<17:30, 59.42it/s]

Invalid image position
Invalid image position


  3%|▎         | 1835/64213 [00:31<20:08, 51.63it/s]

Invalid image position
Invalid image position


  3%|▎         | 2060/64213 [00:35<16:31, 62.70it/s]

Invalid image position


  3%|▎         | 2161/64213 [00:36<14:46, 69.98it/s]

Invalid image position
Invalid image position


  4%|▎         | 2402/64213 [00:40<16:00, 64.36it/s]

Invalid image position
Invalid image position


  4%|▍         | 2741/64213 [00:45<13:24, 76.36it/s]

Invalid image position
Invalid image position


  5%|▌         | 3283/64213 [00:54<13:01, 77.96it/s]

Invalid image position


  5%|▌         | 3314/64213 [00:55<16:54, 60.00it/s]

Invalid image position


  5%|▌         | 3520/64213 [00:58<16:47, 60.24it/s]

Invalid image position
Invalid image position


  7%|▋         | 4572/64213 [01:15<12:26, 79.90it/s]

Invalid image position
Invalid image position


  7%|▋         | 4655/64213 [01:17<12:09, 81.63it/s]

Invalid image position
Invalid image position


  7%|▋         | 4739/64213 [01:18<11:27, 86.57it/s]

Invalid image position


  7%|▋         | 4782/64213 [01:18<12:20, 80.25it/s]

Invalid image position
Invalid image position


  8%|▊         | 5307/64213 [01:26<12:05, 81.18it/s]

Invalid image position


  8%|▊         | 5387/64213 [01:27<14:36, 67.10it/s]

Invalid image position
Invalid image position


  9%|▊         | 5486/64213 [01:28<17:07, 57.18it/s]

Invalid image position
Invalid image position


  9%|▉         | 5909/64213 [01:36<17:24, 55.83it/s]

Invalid image position


  9%|▉         | 5985/64213 [01:38<34:20, 28.26it/s]

Invalid image position


 10%|▉         | 6182/64213 [01:42<17:32, 55.14it/s]

Invalid image position
Invalid image position


 10%|█         | 6543/64213 [01:48<12:41, 75.77it/s]

Invalid image position


 10%|█         | 6578/64213 [01:48<11:54, 80.72it/s]

Invalid image position


 11%|█         | 7025/64213 [01:55<16:19, 58.38it/s]

Invalid image position
Invalid image position


 11%|█         | 7116/64213 [01:57<13:11, 72.13it/s]

Invalid image position
Invalid image position
Invalid image position


 12%|█▏        | 7398/64213 [02:02<16:32, 57.23it/s]

Invalid image position
Invalid image position


 12%|█▏        | 7611/64213 [02:06<17:22, 54.29it/s]

Invalid image position
Invalid image position


 12%|█▏        | 7673/64213 [02:07<17:13, 54.71it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8091/64213 [02:14<12:52, 72.63it/s]

Invalid image position


 13%|█▎        | 8327/64213 [02:18<20:01, 46.53it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8344/64213 [02:18<17:36, 52.87it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8438/64213 [02:20<14:56, 62.24it/s]

Invalid image position


 14%|█▍        | 8898/64213 [02:27<13:38, 67.56it/s]

Invalid image position
Invalid image position


 15%|█▍        | 9319/64213 [02:34<14:41, 62.26it/s]

Invalid image position
Invalid image position


 15%|█▍        | 9475/64213 [02:36<16:33, 55.09it/s]

Invalid image position
Invalid image position


 15%|█▌        | 9774/64213 [02:41<13:00, 69.71it/s]

Invalid image position


 15%|█▌        | 9782/64213 [02:41<17:57, 50.52it/s]

Invalid image position
Invalid image position


 16%|█▌        | 10093/64213 [02:46<16:03, 56.19it/s]

Invalid image position
Invalid image position


 16%|█▌        | 10254/64213 [02:49<11:19, 79.46it/s]

Invalid image position


 16%|█▌        | 10409/64213 [02:51<13:40, 65.61it/s]

Invalid image position
Invalid image position


 17%|█▋        | 10640/64213 [02:55<17:18, 51.57it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 17%|█▋        | 10853/64213 [02:58<12:50, 69.25it/s]

Invalid image position


 17%|█▋        | 10917/64213 [02:59<13:47, 64.42it/s] 

Invalid image position


 18%|█▊        | 11331/64213 [03:05<08:55, 98.77it/s]

Invalid image position
Invalid image position


 18%|█▊        | 11773/64213 [03:11<12:39, 69.01it/s]

Invalid image position
Invalid image position


 18%|█▊        | 11824/64213 [03:12<09:24, 92.77it/s]

Invalid image position
Invalid image position


 19%|█▊        | 11983/64213 [03:14<12:10, 71.52it/s]

Invalid image position
Invalid image position


 19%|█▉        | 12362/64213 [03:20<12:09, 71.12it/s]

Invalid image position


 19%|█▉        | 12390/64213 [03:21<17:53, 48.29it/s]

Invalid image position
Invalid image position


 20%|█▉        | 12634/64213 [03:25<17:58, 47.81it/s]

Invalid image position
Invalid image position


 20%|█▉        | 12721/64213 [03:26<09:59, 85.84it/s]

Invalid image position
Invalid image position


 20%|██        | 13130/64213 [03:33<13:27, 63.25it/s]

Invalid image position
Invalid image position


 21%|██        | 13479/64213 [03:38<10:48, 78.21it/s]

Invalid image position


 21%|██        | 13538/64213 [03:39<10:20, 81.65it/s]

Invalid image position


 21%|██        | 13559/64213 [03:39<10:32, 80.08it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13818/64213 [03:43<14:44, 56.96it/s]

Invalid image position


 22%|██▏       | 13854/64213 [03:44<13:17, 63.12it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13889/64213 [03:46<23:58, 34.99it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13904/64213 [03:46<18:17, 45.84it/s]

Invalid image position
Invalid image position


 23%|██▎       | 14657/64213 [03:57<12:38, 65.30it/s]

Invalid image position
Invalid image position


 23%|██▎       | 14837/64213 [04:01<13:31, 60.88it/s]

Invalid image position


 23%|██▎       | 14955/64213 [04:03<18:07, 45.31it/s]

Invalid image position
Invalid image position


 23%|██▎       | 15003/64213 [04:04<14:34, 56.30it/s]

Invalid image position
Invalid image position


 23%|██▎       | 15077/64213 [04:05<14:03, 58.25it/s]

Invalid image position


 24%|██▎       | 15122/64213 [04:06<13:21, 61.26it/s]

Invalid image position


 24%|██▍       | 15478/64213 [04:12<14:00, 57.97it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 24%|██▍       | 15552/64213 [04:13<10:58, 73.90it/s]

Invalid image position


 24%|██▍       | 15614/64213 [04:14<11:35, 69.86it/s]

Invalid image position
Invalid image position


 25%|██▍       | 15933/64213 [04:18<09:21, 86.05it/s]

Invalid image position
Invalid image position


 25%|██▌       | 16141/64213 [04:21<09:54, 80.84it/s]

Invalid image position


 26%|██▌       | 16432/64213 [04:25<12:11, 65.28it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17066/64213 [04:35<11:59, 65.51it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17286/64213 [04:38<11:35, 67.43it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17300/64213 [04:39<12:08, 64.38it/s]

Invalid image position


 27%|██▋       | 17545/64213 [04:43<11:45, 66.12it/s]

Invalid image position
Invalid image position


 28%|██▊       | 17681/64213 [04:45<13:48, 56.16it/s]

Invalid image position


 28%|██▊       | 17969/64213 [04:50<13:25, 57.40it/s]

Invalid image position
Invalid image position


 28%|██▊       | 18219/64213 [04:54<12:26, 61.63it/s]

Invalid image position
Invalid image position
Invalid image position


 28%|██▊       | 18267/64213 [04:55<09:53, 77.39it/s]

Invalid image position
Invalid image position
Invalid image position


 29%|██▉       | 18646/64213 [05:01<14:05, 53.90it/s]

Invalid image position
Invalid image position


 29%|██▉       | 18714/64213 [05:02<11:57, 63.43it/s]

Invalid image position
Invalid image position


 30%|██▉       | 19125/64213 [05:10<13:53, 54.10it/s]

Invalid image position


 30%|███       | 19360/64213 [05:14<13:49, 54.08it/s]

Invalid image position


 30%|███       | 19430/64213 [05:15<13:13, 56.46it/s]

Invalid image position


 30%|███       | 19494/64213 [05:16<12:57, 57.52it/s]

Invalid image position
Invalid image position


 30%|███       | 19580/64213 [05:17<11:24, 65.17it/s]

Invalid image position


 31%|███       | 19706/64213 [05:20<10:41, 69.37it/s]

Invalid image position
Invalid image position


 31%|███       | 19724/64213 [05:20<10:10, 72.92it/s]

Invalid image position
Invalid image position


 31%|███       | 19990/64213 [05:25<13:18, 55.36it/s]

Invalid image position
Invalid image position


 31%|███       | 20045/64213 [05:26<12:10, 60.47it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20478/64213 [05:33<13:46, 52.91it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20696/64213 [05:37<12:39, 57.27it/s]

Invalid image position


 32%|███▏      | 20747/64213 [05:38<13:24, 54.02it/s]

Invalid image position


 32%|███▏      | 20792/64213 [05:38<11:52, 60.94it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20837/64213 [05:39<11:16, 64.10it/s]

Invalid image position


 33%|███▎      | 21008/64213 [05:42<12:14, 58.81it/s]

Invalid image position


 33%|███▎      | 21075/64213 [05:43<11:03, 65.05it/s]

Invalid image position


 33%|███▎      | 21349/64213 [05:48<12:10, 58.71it/s]

Invalid image position
Invalid image position


 34%|███▍      | 21854/64213 [05:55<08:52, 79.60it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23020/64213 [06:13<12:04, 56.88it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23275/64213 [06:18<09:01, 75.60it/s]

Invalid image position


 37%|███▋      | 23585/64213 [06:23<08:54, 75.98it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 38%|███▊      | 24290/64213 [06:35<10:06, 65.79it/s]

Invalid image position
Invalid image position


 38%|███▊      | 24428/64213 [06:37<11:02, 60.07it/s]

Invalid image position


 38%|███▊      | 24719/64213 [06:41<11:47, 55.85it/s]

Invalid image position


 39%|███▉      | 25100/64213 [06:47<10:40, 61.08it/s]

Invalid image position
Invalid image position


 40%|███▉      | 25488/64213 [06:54<08:59, 71.81it/s]

Invalid image position


 40%|███▉      | 25591/64213 [06:56<09:23, 68.56it/s]

Invalid image position


 40%|████      | 25847/64213 [06:59<10:32, 60.63it/s]

Invalid image position


 41%|████      | 26129/64213 [07:04<08:55, 71.18it/s]

Invalid image position
Invalid image position


 41%|████      | 26252/64213 [07:06<09:06, 69.48it/s]

Invalid image position
Invalid image position


 41%|████      | 26315/64213 [07:07<09:56, 63.52it/s]

Invalid image position
Invalid image position


 41%|████▏     | 26599/64213 [07:12<10:10, 61.64it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26657/64213 [07:13<10:55, 57.32it/s]

Invalid image position


 42%|████▏     | 26699/64213 [07:14<13:11, 47.39it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26753/64213 [07:15<09:38, 64.74it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26967/64213 [07:18<12:42, 48.83it/s]

Invalid image position


 44%|████▎     | 28000/64213 [07:34<08:13, 73.32it/s]

Invalid image position


 44%|████▍     | 28131/64213 [07:36<08:35, 69.99it/s]

Invalid image position
Invalid image position


 44%|████▍     | 28211/64213 [07:37<07:18, 82.18it/s]

Invalid image position
Invalid image position


 44%|████▍     | 28521/64213 [07:42<08:18, 71.54it/s]

Invalid image position
Invalid image position


 45%|████▍     | 28832/64213 [07:47<11:14, 52.47it/s]

Invalid image position


 45%|████▌     | 28925/64213 [07:49<16:54, 34.77it/s]

Invalid image position
Invalid image position


 45%|████▌     | 29032/64213 [07:51<10:45, 54.54it/s]

Invalid image position


 45%|████▌     | 29059/64213 [07:51<10:02, 58.33it/s]

Invalid image position
Invalid image position


 46%|████▌     | 29399/64213 [07:57<09:47, 59.28it/s]

Invalid image position


 46%|████▌     | 29580/64213 [08:00<08:34, 67.27it/s]

Invalid image position


 47%|████▋     | 30025/64213 [08:07<08:37, 66.05it/s]

Invalid image position
Invalid image position


 47%|████▋     | 30112/64213 [08:08<09:27, 60.10it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 47%|████▋     | 30424/64213 [08:13<07:22, 76.38it/s]

Invalid image position
Invalid image position


 48%|████▊     | 30817/64213 [08:19<10:23, 53.60it/s]

Invalid image position
Invalid image position


 48%|████▊     | 30842/64213 [08:20<09:39, 57.60it/s]

Invalid image position


 48%|████▊     | 30871/64213 [08:20<08:01, 69.26it/s]

Invalid image position


 49%|████▊     | 31273/64213 [08:26<08:45, 62.67it/s]

Invalid image position


 49%|████▉     | 31567/64213 [08:30<09:13, 58.97it/s]

Invalid image position


 49%|████▉     | 31615/64213 [08:31<10:16, 52.84it/s]

Invalid image position
Invalid image position


 49%|████▉     | 31766/64213 [08:34<09:02, 59.77it/s]

Invalid image position
Invalid image position


 50%|████▉     | 32105/64213 [08:39<07:37, 70.18it/s]

Invalid image position
Invalid image position


 50%|█████     | 32399/64213 [08:44<06:15, 84.64it/s]

Invalid image position


 51%|█████     | 32875/64213 [08:50<07:33, 69.12it/s]

Invalid image position
Invalid image position


 51%|█████     | 32892/64213 [08:50<07:14, 72.01it/s]

Invalid image position


 52%|█████▏    | 33106/64213 [08:54<06:57, 74.59it/s]

Invalid image position


 52%|█████▏    | 33123/64213 [08:54<07:20, 70.64it/s]

Invalid image position
Invalid image position


 52%|█████▏    | 33340/64213 [08:57<05:27, 94.18it/s]

Invalid image position


 53%|█████▎    | 33855/64213 [09:05<07:52, 64.25it/s]

Invalid image position


 53%|█████▎    | 34008/64213 [09:07<06:53, 73.00it/s]

Invalid image position
Invalid image position


 53%|█████▎    | 34220/64213 [09:12<10:35, 47.20it/s]

Invalid image position


 53%|█████▎    | 34277/64213 [09:13<12:17, 40.57it/s]

Invalid image position


 53%|█████▎    | 34310/64213 [09:13<07:52, 63.27it/s]

Invalid image position


 54%|█████▎    | 34399/64213 [09:15<09:09, 54.23it/s]

Invalid image position
Invalid image position


 54%|█████▎    | 34455/64213 [09:16<08:52, 55.85it/s]

Invalid image position
Invalid image position


 54%|█████▍    | 34945/64213 [09:25<08:07, 60.00it/s]

Invalid image position


 55%|█████▌    | 35361/64213 [09:32<08:09, 58.96it/s]

Invalid image position


 56%|█████▌    | 35975/64213 [09:42<06:39, 70.68it/s]

Invalid image position


 57%|█████▋    | 36327/64213 [09:47<08:21, 55.58it/s] 

Invalid image position
Invalid image position


 57%|█████▋    | 36381/64213 [09:48<10:34, 43.89it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36444/64213 [09:49<06:16, 73.75it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36725/64213 [09:54<11:04, 41.40it/s]

Invalid image position
Invalid image position


 58%|█████▊    | 37396/64213 [10:05<08:31, 52.47it/s]

Invalid image position
Invalid image position


 58%|█████▊    | 37446/64213 [10:06<07:33, 59.07it/s]

Invalid image position


 59%|█████▊    | 37690/64213 [10:10<05:36, 78.82it/s]

Invalid image position


 59%|█████▉    | 37840/64213 [10:13<08:27, 51.93it/s]

Invalid image position
Invalid image position


 59%|█████▉    | 37950/64213 [10:15<07:14, 60.43it/s]

Invalid image position
Invalid image position


 59%|█████▉    | 37979/64213 [10:15<07:31, 58.14it/s]

Invalid image position


 60%|█████▉    | 38410/64213 [10:23<08:22, 51.38it/s]

Invalid image position


 60%|█████▉    | 38444/64213 [10:24<05:53, 72.86it/s]

Invalid image position


 60%|█████▉    | 38473/64213 [10:24<07:28, 57.45it/s]

Invalid image position


 60%|██████    | 38608/64213 [10:26<06:58, 61.22it/s]

Invalid image position
Invalid image position


 60%|██████    | 38809/64213 [10:29<06:29, 65.16it/s]

Invalid image position
Invalid image position


 60%|██████    | 38845/64213 [10:30<06:45, 62.49it/s]

Invalid image position


 62%|██████▏   | 39495/64213 [10:40<06:07, 67.22it/s]

Invalid image position
Invalid image position


 62%|██████▏   | 39786/64213 [10:45<07:00, 58.04it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40220/64213 [10:52<06:02, 66.10it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40478/64213 [10:56<05:43, 69.03it/s]

Invalid image position


 63%|██████▎   | 40525/64213 [10:57<07:05, 55.62it/s]

Invalid image position
Invalid image position


 64%|██████▎   | 40776/64213 [11:01<04:23, 88.80it/s]

Invalid image position
Invalid image position


 64%|██████▎   | 40817/64213 [11:01<04:52, 79.90it/s]

Invalid image position
Invalid image position


 64%|██████▍   | 41075/64213 [11:05<04:45, 81.05it/s]

Invalid image position


 65%|██████▍   | 41686/64213 [11:14<05:05, 73.76it/s]

Invalid image position


 66%|██████▌   | 42189/64213 [11:21<05:21, 68.47it/s]

Invalid image position


 66%|██████▌   | 42349/64213 [11:24<06:51, 53.12it/s]

Invalid image position
Invalid image position


 66%|██████▌   | 42433/64213 [11:25<06:35, 55.03it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 42822/64213 [11:32<07:11, 49.55it/s]

Invalid image position


 67%|██████▋   | 43018/64213 [11:36<06:42, 52.72it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 43139/64213 [11:38<05:05, 69.06it/s]

Invalid image position


 68%|██████▊   | 43515/64213 [11:45<07:16, 47.37it/s]

Invalid image position
Invalid image position


 68%|██████▊   | 43791/64213 [11:50<06:52, 49.57it/s]

Invalid image position


 68%|██████▊   | 43916/64213 [11:52<07:43, 43.84it/s]

Invalid image position


 68%|██████▊   | 43934/64213 [11:53<07:52, 42.88it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44178/64213 [11:57<06:15, 53.40it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44198/64213 [11:57<05:39, 58.92it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44498/64213 [12:03<06:29, 50.62it/s]

Invalid image position


 69%|██████▉   | 44526/64213 [12:04<07:07, 46.08it/s]

Invalid image position
Invalid image position


 70%|██████▉   | 44653/64213 [12:06<06:23, 50.96it/s]

Invalid image position


 70%|██████▉   | 44836/64213 [12:10<06:14, 51.69it/s]

Invalid image position
Invalid image position


 70%|███████   | 45183/64213 [12:16<06:55, 45.83it/s]

Invalid image position
Invalid image position


 70%|███████   | 45234/64213 [12:17<06:01, 52.50it/s]

Invalid image position


 71%|███████   | 45370/64213 [12:20<06:26, 48.77it/s]

Invalid image position


 71%|███████   | 45453/64213 [12:22<04:24, 70.90it/s]

Invalid image position


 71%|███████   | 45639/64213 [12:25<06:27, 47.91it/s]

Invalid image position
Invalid image position


 71%|███████   | 45719/64213 [12:26<05:41, 54.21it/s]

Invalid image position
Invalid image position


 71%|███████▏  | 45762/64213 [12:27<06:22, 48.20it/s]

Invalid image position


 71%|███████▏  | 45871/64213 [12:29<03:56, 77.69it/s]

Invalid image position
Invalid image position


 72%|███████▏  | 46023/64213 [12:32<04:40, 64.82it/s]

Invalid image position
Invalid image position


 72%|███████▏  | 46187/64213 [12:35<05:55, 50.68it/s]

Invalid image position


 73%|███████▎  | 46731/64213 [12:45<06:40, 43.62it/s]

Invalid image position
Invalid image position


 73%|███████▎  | 46745/64213 [12:46<05:34, 52.16it/s]

Invalid image position
Invalid image position


 73%|███████▎  | 46790/64213 [12:46<05:30, 52.70it/s]

Invalid image position


 73%|███████▎  | 47016/64213 [12:51<05:40, 50.55it/s]

Invalid image position
Invalid image position


 74%|███████▎  | 47324/64213 [12:56<04:56, 56.94it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47418/64213 [12:58<05:39, 49.48it/s]

Invalid image position


 74%|███████▍  | 47516/64213 [13:00<04:36, 60.47it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47594/64213 [13:01<06:00, 46.13it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47615/64213 [13:02<04:55, 56.11it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47704/64213 [13:03<05:18, 51.77it/s]

Invalid image position
Invalid image position


 75%|███████▍  | 47946/64213 [13:08<05:30, 49.24it/s]

Invalid image position


 75%|███████▌  | 48165/64213 [13:12<04:27, 59.93it/s]

Invalid image position


 76%|███████▌  | 48489/64213 [13:17<04:10, 62.88it/s]

Invalid image position


 76%|███████▌  | 48741/64213 [13:21<05:34, 46.30it/s]

Invalid image position
Invalid image position


 76%|███████▌  | 48852/64213 [13:23<04:27, 57.35it/s]

Invalid image position


 77%|███████▋  | 49325/64213 [13:31<04:06, 60.32it/s]

Invalid image position
Invalid image position


 77%|███████▋  | 49451/64213 [13:34<04:31, 54.29it/s]

Invalid image position


 77%|███████▋  | 49631/64213 [13:37<03:50, 63.27it/s]

Invalid image position


 78%|███████▊  | 49997/64213 [13:43<03:49, 62.02it/s]

Invalid image position


 78%|███████▊  | 50021/64213 [13:44<03:23, 69.80it/s]

Invalid image position


 78%|███████▊  | 50071/64213 [13:45<05:03, 46.57it/s]

Invalid image position
Invalid image position


 78%|███████▊  | 50290/64213 [13:49<03:20, 69.34it/s]

Invalid image position
Invalid image position


 79%|███████▉  | 50629/64213 [13:54<04:23, 51.59it/s]

Invalid image position


 80%|███████▉  | 51106/64213 [14:03<04:45, 45.84it/s]

Invalid image position
Invalid image position


 80%|███████▉  | 51299/64213 [14:07<04:46, 45.12it/s]

Invalid image position


 81%|████████  | 51810/64213 [14:17<03:42, 55.84it/s]

Invalid image position
Invalid image position


 81%|████████  | 51834/64213 [14:18<04:03, 50.81it/s]

Invalid image position


 81%|████████  | 51881/64213 [14:19<04:44, 43.37it/s]

Invalid image position
Invalid image position


 81%|████████  | 52147/64213 [14:24<04:53, 41.11it/s]

Invalid image position
Invalid image position


 81%|████████▏ | 52273/64213 [14:27<04:46, 41.72it/s]

Invalid image position
Invalid image position


 81%|████████▏ | 52320/64213 [14:28<03:35, 55.18it/s]

Invalid image position


 82%|████████▏ | 52628/64213 [14:34<03:06, 62.06it/s]

Invalid image position
Invalid image position


 82%|████████▏ | 52674/64213 [14:35<03:58, 48.37it/s]

Invalid image position
Invalid image position


 82%|████████▏ | 52776/64213 [14:37<02:17, 82.94it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53009/64213 [14:40<03:38, 51.35it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53054/64213 [14:41<03:53, 47.73it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53591/64213 [14:53<04:31, 39.19it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 53808/64213 [14:58<03:31, 49.27it/s]

Invalid image position


 84%|████████▍ | 53824/64213 [14:59<03:42, 46.79it/s]

Invalid image position


 84%|████████▍ | 54026/64213 [15:03<03:27, 49.15it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 54068/64213 [15:04<03:10, 53.30it/s]

Invalid image position


 84%|████████▍ | 54116/64213 [15:05<03:34, 46.98it/s]

Invalid image position


 85%|████████▍ | 54525/64213 [15:14<03:10, 50.81it/s]

Invalid image position


 85%|████████▌ | 54678/64213 [15:17<03:15, 48.88it/s]

Invalid image position
Invalid image position


 85%|████████▌ | 54732/64213 [15:18<02:23, 66.21it/s]

Invalid image position
Invalid image position


 86%|████████▌ | 54964/64213 [15:23<03:19, 46.26it/s]

Invalid image position


 86%|████████▌ | 55128/64213 [15:26<02:51, 52.92it/s]

Invalid image position


 86%|████████▌ | 55214/64213 [15:28<02:20, 63.99it/s]

Invalid image position


 86%|████████▌ | 55381/64213 [15:31<02:53, 50.79it/s]

Invalid image position


 86%|████████▋ | 55493/64213 [15:33<02:40, 54.16it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55611/64213 [15:36<03:20, 42.82it/s]

Invalid image position


 87%|████████▋ | 55912/64213 [15:41<02:13, 62.30it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55965/64213 [15:42<02:58, 46.09it/s]

Invalid image position
Invalid image position


 88%|████████▊ | 56270/64213 [15:49<02:17, 57.57it/s]

Invalid image position


 88%|████████▊ | 56454/64213 [15:52<02:26, 52.96it/s]

Invalid image position


 88%|████████▊ | 56757/64213 [15:57<01:52, 66.14it/s]

Invalid image position


 89%|████████▉ | 57348/64213 [16:11<02:22, 48.12it/s]

Invalid image position
Invalid image position


 89%|████████▉ | 57445/64213 [16:13<01:54, 59.28it/s]

Invalid image position
Invalid image position


 90%|████████▉ | 57579/64213 [16:15<01:59, 55.42it/s]

Invalid image position
Invalid image position


 90%|█████████ | 57871/64213 [16:21<01:42, 61.75it/s]

Invalid image position


 91%|█████████ | 58252/64213 [16:26<01:43, 57.79it/s]

Invalid image position
Invalid image position


 91%|█████████ | 58317/64213 [16:27<01:27, 67.09it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58624/64213 [16:32<01:44, 53.65it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58673/64213 [16:33<01:46, 51.84it/s]

Invalid image position
Invalid image position


 92%|█████████▏| 58868/64213 [16:36<01:17, 68.61it/s]

Invalid image position


 93%|█████████▎| 59592/64213 [16:48<01:23, 55.53it/s]

Invalid image position
Invalid image position


 93%|█████████▎| 59617/64213 [16:49<01:24, 54.07it/s]

Invalid image position
Invalid image position


 94%|█████████▎| 60170/64213 [16:58<01:10, 57.55it/s]

Invalid image position
Invalid image position


 94%|█████████▍| 60284/64213 [17:00<00:59, 65.85it/s]

Invalid image position


 94%|█████████▍| 60409/64213 [17:02<01:01, 62.17it/s]

Invalid image position


 94%|█████████▍| 60450/64213 [17:02<00:57, 65.36it/s]

Invalid image position


 94%|█████████▍| 60473/64213 [17:03<00:55, 66.94it/s]

Invalid image position


 95%|█████████▍| 60801/64213 [17:08<00:46, 73.48it/s]

Invalid image position
Invalid image position


 95%|█████████▍| 60903/64213 [17:10<00:51, 64.88it/s]

Invalid image position
Invalid image position


 95%|█████████▌| 61035/64213 [17:13<01:36, 33.10it/s]

Invalid image position
Invalid image position


 95%|█████████▌| 61288/64213 [17:16<00:40, 73.05it/s]

Invalid image position


 96%|█████████▌| 61330/64213 [17:17<00:47, 60.95it/s]

Invalid image position


 96%|█████████▌| 61525/64213 [17:20<00:30, 87.66it/s]

Invalid image position
Invalid image position


 96%|█████████▌| 61604/64213 [17:21<00:30, 84.62it/s]

Invalid image position


 96%|█████████▌| 61622/64213 [17:21<00:35, 73.86it/s]

Invalid image position
Invalid image position


 96%|█████████▌| 61696/64213 [17:22<00:43, 57.39it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62118/64213 [17:31<00:43, 48.28it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62241/64213 [17:33<00:39, 49.54it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62293/64213 [17:34<00:32, 59.11it/s]

Invalid image position
Invalid image position


 98%|█████████▊| 62654/64213 [17:41<00:28, 53.81it/s]

Invalid image position


 98%|█████████▊| 62762/64213 [17:43<00:22, 64.68it/s]

Invalid image position
Invalid image position


 98%|█████████▊| 62836/64213 [17:44<00:24, 56.05it/s]

Invalid image position
Invalid image position


 99%|█████████▊| 63288/64213 [17:52<00:16, 56.67it/s]

Invalid image position
Invalid image position


 99%|█████████▉| 63690/64213 [17:58<00:08, 60.96it/s]

Invalid image position


 99%|█████████▉| 63768/64213 [17:59<00:08, 50.57it/s]

Invalid image position


 99%|█████████▉| 63808/64213 [18:00<00:06, 58.70it/s]

Invalid image position
Invalid image position


100%|█████████▉| 64141/64213 [18:06<00:01, 63.27it/s]

Invalid image position


100%|██████████| 64213/64213 [18:07<00:00, 59.04it/s]
 20%|█▉        | 99/496 [00:02<00:11, 35.94it/s]

Invalid image position
Invalid image position


100%|██████████| 496/496 [00:09<00:00, 53.21it/s]
 10%|▉         | 28/293 [00:00<00:06, 40.00it/s]

Invalid image position
Invalid image position


 29%|██▊       | 84/293 [00:02<00:06, 33.99it/s]

Invalid image position


 55%|█████▍    | 160/293 [00:04<00:03, 42.38it/s]

Invalid image position
Invalid image position
Invalid image position


 75%|███████▍  | 219/293 [00:05<00:01, 48.32it/s]

Invalid image position
Invalid image position


 88%|████████▊ | 259/293 [00:06<00:00, 53.01it/s]

Invalid image position
Invalid image position


100%|██████████| 293/293 [00:07<00:00, 40.58it/s]


In [25]:
l = []

In [26]:
with open("MIMIC_JPG_train_trimmed.tsv", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for item in reader:
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")
        l.append((cur_img_count, next_img_count))


FileNotFoundError: [Errno 2] No such file or directory: 'MIMIC_JPG_train_trimmed.tsv'

In [29]:
LIMIT = 2

with open("MIMIC_JPG_test.tsv", "r") as f, open("MIMIC_JPG_test_trimmed.tsv", "w") as f_write:
    reader = csv.reader(f, delimiter="\t")
    writer = csv.writer(f_write, delimiter="\t")
    for item in tqdm(reader):
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")

        if cur_img_count > LIMIT:
            cur_img = "[VIEW_DELIM]".join(cur_img.split("[VIEW_DELIM]")[:LIMIT])
        if next_img_count > LIMIT:
            next_img = "[VIEW_DELIM]".join(next_img.split("[VIEW_DELIM]")[:LIMIT])

        views = cur_img.split("[VIEW_DELIM]")
        views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
        cur_img = "[VIEW_DELIM]".join(views)

        if next_img_count != 0:
            views = next_img.split("[VIEW_DELIM]")
            views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
            next_img = "[VIEW_DELIM]".join(views)

        if "[NEXT_IMG]" in item[-1]:
            cur_img = f"{cur_img}[NEXT_IMG]{next_img}"

        item[-1] = cur_img
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")

        item = (*item, cur_img_count, next_img_count)

        writer.writerow(item)

2934it [00:00, 14064.95it/s]


In [None]:
from collections import Counter
c = Counter(l)

In [75]:
c

Counter({(1, 1): 52654,
         (2, 2): 40642,
         (2, 0): 34681,
         (1, 2): 25935,
         (2, 1): 25884,
         (1, 0): 8285})

In [61]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28532,
         (1, 2): 21655,
         (2, 1): 21549,
         (1, 0): 8285,
         (3, 0): 5950,
         (2, 3): 4962,
         (3, 2): 4950,
         (3, 1): 4335,
         (1, 3): 4280,
         (3, 3): 2198})

In [47]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28534,
         (1, 2): 21655,
         (2, 1): 21550,
         (1, 0): 8285,
         (3, 0): 5378,
         (2, 3): 4514,
         (3, 2): 4501,
         (3, 1): 3869,
         (1, 3): 3794,
         (3, 3): 1646,
         (4, 0): 544,
         (1, 4): 439,
         (4, 1): 429,
         (4, 2): 422,
         (2, 4): 421,
         (4, 3): 240,
         (3, 4): 222,
         (4, 4): 59,
         (1, 5): 36,
         (5, 1): 30,
         (5, 2): 22,
         (5, 0): 20,
         (2, 5): 17,
         (2, 6): 9,
         (5, 3): 9,
         (6, 0): 8,
         (1, 6): 6,
         (3, 5): 6,
         (6, 1): 5,
         (4, 5): 4,
         (6, 6): 3,
         (6, 4): 3,
         (1, 8): 3,
         (6, 2): 3,
         (1, 7): 2,
         (4, 6): 2,
         (5, 4): 2,
         (7, 6): 1,
         (8, 4): 1,
         (9, 1): 1,
         (8, 1): 1,
         (8, 2): 1,
         (2, 11): 1,
         (11, 2): 1})