In [1]:
import os
import os.path as osp
from glob import glob
import re
from pprint import pprint
from tqdm import tqdm
import pydicom as dicom
import random
import csv

PATHS = [f'/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p{s}' for s in range(10, 20)]
TEXT_PATH_ROOT = '/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/'

get_report_path = lambda img_path: Path.joinpath(TEXT_ROOT, img_path.parents[0]).with_suffix(".txt")

def check_jpg_extension(filename):
    pattern = r"\.jpg$"
    if re.search(pattern, filename):
        return True
    else:
        return False

In [2]:
import csv

splits = {
    "train": set(),
    "validate": set(),
    "test": set(),
}
with open("/datasets/mimic/cxr-jpg/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic-cxr-2.0.0-split.csv", "r") as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        if i == 0:
            continue
        splits[row[-1]].add(f"p{row[-2]}")

In [3]:
[len(splits[x]) for x in list(splits.keys())]

[64586, 500, 293]

In [4]:
ALL_IMG_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

ALL_TXT_PATHS = {
    "train": list(),
    "validate": list(),
    "test": list(),
}

In [5]:
get_txt_path = lambda s: TEXT_PATH_ROOT + "/".join(s.split("/")[10:-1]).split(".")[0] + ".txt"
get_new_img_path = lambda s: "/".join(s.split("/")[10:])

In [6]:
for PATH in PATHS:
    top_dirs = [dir for dir in os.listdir(PATH) if dir.find(".") == -1]
    for dir_name in tqdm(top_dirs):
        paths = os.listdir(osp.join(PATH, dir_name))
        img_dirs = [osp.join(PATH, dir_name, dir) for dir in paths if dir.find(".") == -1]
        for img_dir in img_dirs:
            img_paths = [osp.join(img_dir, img_name) for img_name in os.listdir(img_dir)]
            img_paths = [path for path in img_paths if check_jpg_extension(path)]
            for sname, split_set in splits.items():
                if dir_name in split_set:
                    ALL_IMG_PATHS[sname].extend([get_new_img_path(s) for s in img_paths])
                    ALL_TXT_PATHS[sname].extend([get_txt_path(s) for s in img_paths])
                    break
            else:
                assert False, "No split is defined"
                

100%|██████████| 6396/6396 [00:29<00:00, 219.46it/s]
100%|██████████| 6571/6571 [01:07<00:00, 98.03it/s] 
100%|██████████| 6526/6526 [01:04<00:00, 100.42it/s]
100%|██████████| 6548/6548 [01:08<00:00, 95.08it/s] 
100%|██████████| 6506/6506 [01:06<00:00, 97.50it/s] 
100%|██████████| 6592/6592 [01:09<00:00, 95.52it/s] 
100%|██████████| 6476/6476 [01:08<00:00, 94.71it/s] 
100%|██████████| 6642/6642 [01:11<00:00, 92.90it/s] 
100%|██████████| 6543/6543 [01:08<00:00, 94.92it/s] 
100%|██████████| 6579/6579 [01:08<00:00, 95.94it/s] 


In [7]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        new_subdir = subdirs[9][:3]
        subdirs.insert(9, new_subdir)
        path = "/".join(subdirs)
        split[p_idx] = path

In [8]:
ALL_TXT_PATHS_PATIENT = {"train": {}, "validate": {}, "test": {}}

In [9]:
for sname, split in ALL_TXT_PATHS.items():
    for p_idx in range(len(split)):
        path = split[p_idx]
        subdirs = path.split("/")
        patient_id = subdirs[10]
        study_id = subdirs[11]
        if patient_id not in ALL_TXT_PATHS_PATIENT[sname]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id] = {study_id: pat h}
        elif study_id not in ALL_TXT_PATHS_PATIENT[sname][patient_id]:
            ALL_TXT_PATHS_PATIENT[sname][patient_id][study_id] = path

In [10]:
idx = random.randint(0, len(ALL_TXT_PATHS)-1)
f = open("/datasets/mimic/cxr/physionet.org/files/mimic-cxr/2.0.0/files/p10/p10975446/s58917552.txt", "r")
text = f.read()
f.close()
print(text)

                                 FINAL REPORT
 PORTABLE CHEST, ___
 
 COMPARISON:  ___ chest x-ray.
 
 FINDINGS:  Cardiac silhouette remains enlarged and is accompanied by
 persistent pulmonary vascular congestion and interstitial edema.  Patchy
 bibasilar atelectasis also appears similar compared to the prior study.



In [11]:
def clean_section(text):
    text = re.sub(r'[\S]+:', '', text)
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [12]:
import string
def preprocess_report(text):
    # Remove unnecessary and insensible parts
    text = re.sub(r"EXAMINATION:.*", "", text)  # Remove EXAMINATION line
    text = re.sub(r"WET READ:.*", "", text)  # Remove WET READ line
    text = re.sub(r"FINAL REPORT", "", text)  # Remove FINAL REPORT line
    text = re.sub(r"STUDY:.*", "", text)  # Remove STUDY line
    text = re.sub(r"COMPARISON:.*", "", text)  # Remove COMPARISON section
    text = re.sub(r"TECHNIQUE:.*", "", text)  # Remove TECHNIQUE section
    text = re.sub(r"_+", "_", text)  # Remove multiple underscores

    # Clean up excessive newlines and spaces
    text = re.sub(r"\s\s+", " ", text)
    text = re.sub("[^a-zA-Z0-9 :.,-]", "", text)
    text = re.sub(r" +", " ", text)
    text = text.strip()
    return text

In [13]:
def extract_sections(report_text):
    findings_pattern = r'FINDINGS:[\s\S]*:'
    impression_pattern = r'IMPRESSION:[\s\S]*'

    findings_match = re.search(findings_pattern, report_text, re.IGNORECASE)
    impression_match = re.search(impression_pattern, report_text, re.IGNORECASE)

    findings = findings_match.group().strip() if findings_match else None
    impression = impression_match.group().strip() if impression_match else None

    if findings is None and impression is None:
        return preprocess_report(report_text)
    elif impression is None:
        return clean_section(findings)
    elif findings is None:
        return clean_section(impression)
    else:
        return clean_section(findings) + " " + clean_section(impression)

    return findings, impression

In [14]:
img_positions = {}

In [15]:
frontal = {"antero-posterior", "postero-anterior", ""}
lateral = {"lateral", "left lateral"}

In [16]:
with open("../data/mimic-cxr-2.0.0-metadata.csv") as f:
    lines = [line.rstrip() for line in f][1:]
    lines = [line for line in lines if line]
    
    for line in lines:
        fields = line.split(",")
        path = osp.join(f"p{fields[1]}", f"s{fields[2]}", f"{fields[0]}.jpg")
        if f"s{fields[2]}" not in img_positions:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"] = [(path, "frontal")]
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"] = [(path, "lateral")]
        else:
            if fields[-2] in frontal:
                img_positions[f"s{fields[2]}"].append((path, "frontal"))
            elif fields[-2] in lateral:
                img_positions[f"s{fields[2]}"].append((path, "lateral"))


In [17]:
list(img_positions.items())[:5]

[('s50414267',
  [('p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg',
    'frontal'),
   ('p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg',
    'lateral')]),
 ('s53189527',
  [('p10000032/s53189527/2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab.jpg',
    'frontal'),
   ('p10000032/s53189527/e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c.jpg',
    'lateral')]),
 ('s53911762',
  [('p10000032/s53911762/68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714.jpg',
    'frontal'),
   ('p10000032/s53911762/fffabebf-74fd3a1f-673b6b41-96ec0ac9-2ab69818.jpg',
    'frontal')]),
 ('s56699142',
  [('p10000032/s56699142/ea030e7a-2e3b1346-bc518786-7a8fd698-f673b44c.jpg',
    'frontal')]),
 ('s57375967',
  [('p10000764/s57375967/096052b7-d256dc40-453a102b-fa7d01c6-1b22c6b4.jpg',
    'frontal'),
   ('p10000764/s57375967/b79e55c3-735ce5ac-64412506-cdc9ea79-f1af521f.jpg',
    'lateral'),
   ('p10000764/s57375967/dcfeeac4-1597e318-d0e6736a-8b2c2238-47ac3f1b.jpg',
    'lateral'

In [18]:
lines[0].split(",")

['02aa804e-bde0afdd-112c0b34-7bc16630-4e384014',
 '10000032',
 '50414267',
 'CHEST (PA AND LAT)',
 'PA',
 '3056',
 '2544',
 '21800506',
 '213014.53100000002',
 'CHEST (PA AND LAT)',
 'postero-anterior',
 'Erect']

In [19]:
ALL_IMG_PATHS["train"][0]

'p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg'

In [20]:
extract_sections(text)

'PORTABLE CHEST, FINDINGS: Cardiac silhouette remains enlarged and is accompanied by persistent pulmonary vascular congestion and interstitial edema. Patchy bibasilar atelectasis also appears similar compared to the prior study.'

In [21]:
def get_report_from_path(path):
    try:
        f = open(path, "r")
        txt = extract_sections(f.read().strip())
        f.close()
    except FileNotFoundError as e:
        print(str(e))
        return None
    return txt

In [22]:
def images_n_locs_2_txt(study_id):
    study_id = study_id.split(".")[0]
    images = img_positions[study_id]

    strings = []
    for img_path_pairs in images:
        strings.append("[LOC_DELIM]".join(img_path_pairs))

    return "[VIEW_DELIM]".join(strings)

In [23]:
import csv

for sname, split in ALL_TXT_PATHS_PATIENT.items():
    with open(f"MIMIC_JPG_{sname[:5]}.tsv", 'w') as f:
        writer = csv.writer(f, delimiter='\t')

        for patient_id, studies in tqdm(split.items()):
            studies = list(studies.items())
            study_lim = len(studies) if len(studies) == 1 else len(studies) - 1
    
            if study_lim == 1:
                cur_study_id, cur_report_path = studies[0]
                cur_txt = get_report_from_path(cur_report_path)

                try:
                    cur_image_info = images_n_locs_2_txt(cur_study_id)
                except:
                    print("Invalid image position")
                    continue

                writer.writerow([cur_txt, cur_image_info])
            else:
                for study_idx in range(study_lim):
                    cur_study_id, cur_report_path = studies[study_idx]
                    next_study_id, next_report_path = studies[study_idx+1]
    
                    cur_txt = get_report_from_path(cur_report_path)
                    next_txt = get_report_from_path(next_report_path)
    
                    combined_txt = f"{cur_txt}[NEXT_TXT]{next_txt}"

                    try:
                        cur_image_info = images_n_locs_2_txt(cur_study_id)
                        next_image_info = images_n_locs_2_txt(next_study_id)
                    except:
                        print("Invalid image position")
                        continue

                    combined_img_info = f"{cur_image_info}[NEXT_IMG]{next_image_info}"
                    writer.writerow([combined_txt, combined_img_info])


  0%|          | 42/64213 [00:00<08:23, 127.56it/s]

Invalid image position
Invalid image position


  0%|          | 126/64213 [00:00<07:22, 144.92it/s]

Invalid image position


  0%|          | 268/64213 [00:01<06:15, 170.32it/s]

Invalid image position


  1%|          | 550/64213 [00:03<05:17, 200.46it/s]

Invalid image position
Invalid image position
Invalid image position


  1%|          | 637/64213 [00:03<05:53, 179.96it/s]

Invalid image position


  2%|▏         | 1344/64213 [00:08<08:35, 122.07it/s]

Invalid image position
Invalid image position


  2%|▏         | 1378/64213 [00:08<09:09, 114.27it/s]

Invalid image position
Invalid image position
Invalid image position


  3%|▎         | 1787/64213 [00:11<05:13, 199.25it/s]

Invalid image position
Invalid image position


  3%|▎         | 1843/64213 [00:12<08:08, 127.61it/s]

Invalid image position
Invalid image position


  3%|▎         | 2099/64213 [00:13<05:07, 201.76it/s]

Invalid image position


  3%|▎         | 2171/64213 [00:14<05:51, 176.27it/s]

Invalid image position
Invalid image position


  4%|▍         | 2416/64213 [00:15<09:02, 113.99it/s]

Invalid image position
Invalid image position


  4%|▍         | 2744/64213 [00:17<05:47, 177.04it/s]

Invalid image position
Invalid image position


  5%|▌         | 3284/64213 [00:21<05:19, 190.43it/s]

Invalid image position


  5%|▌         | 3339/64213 [00:21<06:40, 151.90it/s]

Invalid image position


  5%|▌         | 3525/64213 [00:23<07:31, 134.41it/s]

Invalid image position
Invalid image position


  7%|▋         | 4584/64213 [00:29<05:25, 183.16it/s]

Invalid image position
Invalid image position


  7%|▋         | 4676/64213 [00:30<05:25, 182.79it/s]

Invalid image position
Invalid image position


  7%|▋         | 4758/64213 [00:30<04:22, 226.80it/s]

Invalid image position


  7%|▋         | 4804/64213 [00:30<04:51, 204.14it/s]

Invalid image position
Invalid image position


  8%|▊         | 5313/64213 [00:34<05:39, 173.63it/s]

Invalid image position


  8%|▊         | 5405/64213 [00:34<05:07, 191.51it/s]

Invalid image position
Invalid image position


  9%|▊         | 5500/64213 [00:35<06:24, 152.76it/s]

Invalid image position
Invalid image position


  9%|▉         | 5925/64213 [00:38<06:31, 148.83it/s]

Invalid image position


  9%|▉         | 5991/64213 [00:38<06:03, 160.37it/s]

Invalid image position


 10%|▉         | 6197/64213 [00:40<05:32, 174.61it/s]

Invalid image position
Invalid image position


 10%|█         | 6537/64213 [00:49<28:12, 34.09it/s] 

Invalid image position


 10%|█         | 6567/64213 [00:50<26:15, 36.59it/s]

Invalid image position


 11%|█         | 7024/64213 [01:05<30:49, 30.92it/s]

Invalid image position
Invalid image position


 11%|█         | 7116/64213 [01:08<30:22, 31.32it/s]

Invalid image position
Invalid image position
Invalid image position


 12%|█▏        | 7398/64213 [01:18<33:35, 28.20it/s]

Invalid image position
Invalid image position


 12%|█▏        | 7610/64213 [01:26<30:05, 31.35it/s]

Invalid image position
Invalid image position


 12%|█▏        | 7668/64213 [01:28<29:43, 31.70it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8087/64213 [01:42<23:51, 39.22it/s]

Invalid image position


 13%|█▎        | 8324/64213 [01:51<37:12, 25.03it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8341/64213 [01:51<29:21, 31.71it/s]

Invalid image position
Invalid image position


 13%|█▎        | 8433/64213 [01:55<26:36, 34.93it/s]

Invalid image position


 14%|█▍        | 8899/64213 [02:08<22:36, 40.78it/s]

Invalid image position
Invalid image position


 15%|█▍        | 9313/64213 [02:20<28:52, 31.70it/s]

Invalid image position
Invalid image position


 15%|█▍        | 9472/64213 [02:25<27:27, 33.22it/s]

Invalid image position
Invalid image position


 15%|█▌        | 9771/64213 [02:33<22:00, 41.22it/s]

Invalid image position


 15%|█▌        | 9776/64213 [02:33<22:21, 40.56it/s]

Invalid image position
Invalid image position


 16%|█▌        | 10088/64213 [02:42<25:32, 35.32it/s]

Invalid image position
Invalid image position


 16%|█▌        | 10248/64213 [02:46<26:52, 33.46it/s]

Invalid image position


 16%|█▌        | 10411/64213 [02:51<20:00, 44.82it/s]

Invalid image position
Invalid image position


 17%|█▋        | 10639/64213 [02:58<37:21, 23.90it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 17%|█▋        | 10853/64213 [03:04<18:17, 48.62it/s]

Invalid image position


 17%|█▋        | 10919/64213 [03:05<21:53, 40.57it/s]

Invalid image position


 18%|█▊        | 11320/64213 [03:15<18:51, 46.74it/s]

Invalid image position
Invalid image position


 18%|█▊        | 11774/64213 [03:27<21:46, 40.14it/s]

Invalid image position
Invalid image position


 18%|█▊        | 11815/64213 [03:28<19:23, 45.05it/s]

Invalid image position
Invalid image position


 19%|█▊        | 11980/64213 [03:33<22:47, 38.19it/s]

Invalid image position
Invalid image position


 19%|█▉        | 12354/64213 [03:46<25:31, 33.86it/s]

Invalid image position


 19%|█▉        | 12389/64213 [03:48<50:11, 17.21it/s]  

Invalid image position
Invalid image position


 20%|█▉        | 12632/64213 [03:56<37:21, 23.01it/s]

Invalid image position
Invalid image position


 20%|█▉        | 12706/64213 [03:58<20:50, 41.18it/s]

Invalid image position
Invalid image position


 20%|██        | 13121/64213 [04:12<28:51, 29.50it/s]

Invalid image position
Invalid image position


 21%|██        | 13465/64213 [04:22<21:49, 38.75it/s]

Invalid image position


 21%|██        | 13532/64213 [04:23<23:14, 36.34it/s]

Invalid image position


 21%|██        | 13556/64213 [04:24<26:25, 31.95it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13812/64213 [04:32<25:03, 33.52it/s]

Invalid image position


 22%|██▏       | 13843/64213 [04:33<25:34, 32.83it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13884/64213 [04:34<24:32, 34.18it/s]

Invalid image position
Invalid image position


 22%|██▏       | 13898/64213 [04:35<31:33, 26.57it/s]

Invalid image position
Invalid image position


 23%|██▎       | 14654/64213 [04:59<33:26, 24.70it/s]

Invalid image position
Invalid image position


 23%|██▎       | 14830/64213 [05:06<29:31, 27.88it/s]

Invalid image position


 23%|██▎       | 14952/64213 [05:11<41:15, 19.90it/s]

Invalid image position
Invalid image position


 23%|██▎       | 14994/64213 [05:12<32:48, 25.00it/s]

Invalid image position
Invalid image position


 23%|██▎       | 15074/64213 [05:15<31:50, 25.72it/s]

Invalid image position


 24%|██▎       | 15112/64213 [05:17<34:22, 23.80it/s]

Invalid image position


 24%|██▍       | 15479/64213 [05:30<29:54, 27.16it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 24%|██▍       | 15544/64213 [05:33<23:57, 33.85it/s]

Invalid image position


 24%|██▍       | 15610/64213 [05:35<26:09, 30.97it/s]

Invalid image position
Invalid image position


 25%|██▍       | 15925/64213 [05:45<17:42, 45.43it/s]

Invalid image position
Invalid image position


 25%|██▌       | 16133/64213 [05:50<18:14, 43.94it/s]

Invalid image position


 26%|██▌       | 16429/64213 [05:59<26:12, 30.38it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17060/64213 [06:18<24:25, 32.17it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17277/64213 [06:25<32:09, 24.32it/s]

Invalid image position
Invalid image position


 27%|██▋       | 17295/64213 [06:25<28:05, 27.84it/s]

Invalid image position


 27%|██▋       | 17543/64213 [06:34<24:51, 31.29it/s]

Invalid image position
Invalid image position


 28%|██▊       | 17677/64213 [06:39<30:38, 25.32it/s]

Invalid image position


 28%|██▊       | 17965/64213 [06:49<26:10, 29.46it/s]

Invalid image position
Invalid image position


 28%|██▊       | 18209/64213 [06:58<30:48, 24.89it/s]

Invalid image position
Invalid image position
Invalid image position


 28%|██▊       | 18262/64213 [07:00<21:52, 35.00it/s]

Invalid image position
Invalid image position
Invalid image position


 29%|██▉       | 18639/64213 [07:14<29:21, 25.87it/s]

Invalid image position
Invalid image position


 29%|██▉       | 18706/64213 [07:16<28:59, 26.16it/s]

Invalid image position
Invalid image position


 30%|██▉       | 19127/64213 [07:31<23:59, 31.32it/s]

Invalid image position


 30%|███       | 19359/64213 [07:39<23:24, 31.93it/s]

Invalid image position


 30%|███       | 19426/64213 [07:41<33:54, 22.01it/s]

Invalid image position


 30%|███       | 19489/64213 [07:43<20:16, 36.76it/s]

Invalid image position
Invalid image position


 30%|███       | 19572/64213 [07:46<25:24, 29.28it/s]

Invalid image position


 31%|███       | 19694/64213 [07:51<26:11, 28.33it/s]

Invalid image position
Invalid image position


 31%|███       | 19730/64213 [07:52<25:01, 29.63it/s]

Invalid image position
Invalid image position


 31%|███       | 19992/64213 [08:01<28:33, 25.80it/s]

Invalid image position
Invalid image position


 31%|███       | 20037/64213 [08:02<27:00, 27.27it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20480/64213 [08:17<25:14, 28.88it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20693/64213 [08:25<20:53, 34.72it/s]

Invalid image position


 32%|███▏      | 20745/64213 [08:27<24:45, 29.27it/s]

Invalid image position


 32%|███▏      | 20788/64213 [08:28<26:02, 27.80it/s]

Invalid image position
Invalid image position


 32%|███▏      | 20832/64213 [08:30<21:49, 33.12it/s]

Invalid image position


 33%|███▎      | 21006/64213 [08:36<30:50, 23.34it/s]

Invalid image position


 33%|███▎      | 21076/64213 [08:38<20:15, 35.48it/s]

Invalid image position


 33%|███▎      | 21342/64213 [08:47<21:08, 33.79it/s]

Invalid image position
Invalid image position


 34%|███▍      | 21850/64213 [09:02<19:20, 36.51it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23015/64213 [09:38<23:55, 28.69it/s]

Invalid image position
Invalid image position


 36%|███▌      | 23271/64213 [09:47<17:30, 38.99it/s]

Invalid image position


 37%|███▋      | 23580/64213 [09:59<22:22, 30.27it/s]

Invalid image position
Invalid image position


 37%|███▋      | 23587/64213 [09:59<23:56, 28.28it/s]

Invalid image position
Invalid image position


 38%|███▊      | 24283/64213 [10:24<27:38, 24.07it/s]

Invalid image position
Invalid image position


 38%|███▊      | 24421/64213 [10:29<21:59, 30.16it/s]

Invalid image position


 38%|███▊      | 24715/64213 [10:38<19:16, 34.14it/s]

Invalid image position


 39%|███▉      | 25096/64213 [10:52<22:27, 29.03it/s]

Invalid image position
Invalid image position


 40%|███▉      | 25481/64213 [11:05<18:40, 34.57it/s]

Invalid image position


 40%|███▉      | 25589/64213 [11:08<17:55, 35.90it/s]

Invalid image position


 40%|████      | 25846/64213 [11:16<26:27, 24.17it/s]

Invalid image position


 41%|████      | 26127/64213 [11:27<16:22, 38.75it/s]

Invalid image position
Invalid image position


 41%|████      | 26240/64213 [11:31<24:55, 25.39it/s]

Invalid image position
Invalid image position


 41%|████      | 26309/64213 [11:34<22:41, 27.84it/s]

Invalid image position


 41%|████      | 26320/64213 [11:34<21:23, 29.53it/s]

Invalid image position


 41%|████▏     | 26598/64213 [11:45<19:14, 32.57it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26651/64213 [11:46<17:15, 36.27it/s]

Invalid image position


 42%|████▏     | 26698/64213 [11:48<26:31, 23.57it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26752/64213 [11:50<16:07, 38.71it/s]

Invalid image position
Invalid image position


 42%|████▏     | 26963/64213 [11:58<24:36, 25.22it/s]

Invalid image position


 44%|████▎     | 27995/64213 [12:33<17:20, 34.81it/s]

Invalid image position


 44%|████▍     | 28135/64213 [12:37<17:46, 33.82it/s]

Invalid image position
Invalid image position


 44%|████▍     | 28205/64213 [12:39<13:58, 42.97it/s]

Invalid image position
Invalid image position


 44%|████▍     | 28516/64213 [12:48<19:02, 31.23it/s]

Invalid image position
Invalid image position


 45%|████▍     | 28828/64213 [12:57<24:44, 23.83it/s]

Invalid image position


 45%|████▌     | 28923/64213 [13:01<30:17, 19.41it/s]

Invalid image position
Invalid image position


 45%|████▌     | 29026/64213 [13:05<19:58, 29.37it/s]

Invalid image position


 45%|████▌     | 29056/64213 [13:06<19:13, 30.47it/s]

Invalid image position
Invalid image position


 46%|████▌     | 29400/64213 [13:19<21:48, 26.61it/s]

Invalid image position


 46%|████▌     | 29574/64213 [13:25<17:51, 32.32it/s]

Invalid image position


 47%|████▋     | 30016/64213 [13:40<19:31, 29.20it/s]

Invalid image position
Invalid image position


 47%|████▋     | 30107/64213 [13:43<19:17, 29.47it/s]

Invalid image position
Invalid image position
Invalid image position
Invalid image position


 47%|████▋     | 30420/64213 [13:54<17:23, 32.37it/s]

Invalid image position
Invalid image position


 48%|████▊     | 30813/64213 [14:07<17:06, 32.53it/s]

Invalid image position
Invalid image position


 48%|████▊     | 30839/64213 [14:08<20:11, 27.55it/s]

Invalid image position


 48%|████▊     | 30867/64213 [14:09<16:00, 34.71it/s]

Invalid image position


 49%|████▊     | 31270/64213 [14:21<19:06, 28.74it/s]

Invalid image position


 49%|████▉     | 31564/64213 [14:31<23:24, 23.24it/s]

Invalid image position


 49%|████▉     | 31614/64213 [14:33<19:58, 27.20it/s]

Invalid image position
Invalid image position


 49%|████▉     | 31759/64213 [14:38<20:46, 26.04it/s]

Invalid image position
Invalid image position


 50%|████▉     | 32100/64213 [14:48<16:03, 33.32it/s]

Invalid image position
Invalid image position


 50%|█████     | 32400/64213 [14:58<14:33, 36.41it/s]

Invalid image position


 51%|█████     | 32866/64213 [15:12<17:03, 30.61it/s]

Invalid image position
Invalid image position


 51%|█████     | 32887/64213 [15:13<14:40, 35.56it/s]

Invalid image position


 52%|█████▏    | 33101/64213 [15:19<14:49, 34.99it/s]

Invalid image position


 52%|█████▏    | 33123/64213 [15:20<16:41, 31.05it/s]

Invalid image position
Invalid image position


 52%|█████▏    | 33331/64213 [15:25<12:40, 40.62it/s]

Invalid image position


 53%|█████▎    | 33848/64213 [15:41<13:54, 36.40it/s]

Invalid image position


 53%|█████▎    | 33998/64213 [15:45<13:14, 38.01it/s]

Invalid image position
Invalid image position


 53%|█████▎    | 34221/64213 [15:52<19:12, 26.03it/s]

Invalid image position


 53%|█████▎    | 34272/64213 [15:54<18:47, 26.55it/s]

Invalid image position


 53%|█████▎    | 34309/64213 [15:56<15:49, 31.48it/s]

Invalid image position


 54%|█████▎    | 34398/64213 [15:59<18:38, 26.66it/s]

Invalid image position
Invalid image position


 54%|█████▎    | 34447/64213 [16:01<22:28, 22.07it/s]

Invalid image position
Invalid image position


 54%|█████▍    | 34939/64213 [16:19<16:15, 30.01it/s]

Invalid image position


 55%|█████▌    | 35355/64213 [16:35<18:22, 26.19it/s]

Invalid image position


 56%|█████▌    | 35969/64213 [16:53<11:28, 41.03it/s]

Invalid image position


 57%|█████▋    | 36325/64213 [17:03<12:12, 38.08it/s]

Invalid image position


 57%|█████▋    | 36339/64213 [17:03<09:41, 47.91it/s]

Invalid image position


 57%|█████▋    | 36379/64213 [17:04<16:36, 27.94it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36438/64213 [17:06<12:37, 36.67it/s]

Invalid image position
Invalid image position


 57%|█████▋    | 36722/64213 [17:15<22:56, 19.98it/s]

Invalid image position
Invalid image position


 58%|█████▊    | 37389/64213 [17:38<17:57, 24.89it/s]

Invalid image position
Invalid image position


 58%|█████▊    | 37440/64213 [17:40<18:39, 23.92it/s]

Invalid image position


 59%|█████▊    | 37676/64213 [17:48<13:37, 32.44it/s]

Invalid image position


 59%|█████▉    | 37836/64213 [17:54<16:51, 26.08it/s]

Invalid image position
Invalid image position


 59%|█████▉    | 37948/64213 [17:58<15:29, 28.26it/s]

Invalid image position
Invalid image position


 59%|█████▉    | 37972/64213 [17:59<14:11, 30.81it/s]

Invalid image position


 60%|█████▉    | 38402/64213 [18:14<20:04, 21.44it/s]

Invalid image position


 60%|█████▉    | 38434/64213 [18:15<10:54, 39.41it/s]

Invalid image position


 60%|█████▉    | 38467/64213 [18:17<15:37, 27.45it/s]

Invalid image position


 60%|██████    | 38610/64213 [18:21<17:01, 25.06it/s]

Invalid image position
Invalid image position


 60%|██████    | 38798/64213 [18:28<16:37, 25.47it/s]

Invalid image position
Invalid image position


 60%|██████    | 38839/64213 [18:29<19:55, 21.23it/s]

Invalid image position


 61%|██████▏   | 39484/64213 [18:52<18:04, 22.81it/s]

Invalid image position
Invalid image position


 62%|██████▏   | 39783/64213 [19:04<14:35, 27.91it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40219/64213 [19:20<15:26, 25.91it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40484/64213 [19:30<15:01, 26.33it/s]

Invalid image position


 63%|██████▎   | 40521/64213 [19:32<18:05, 21.82it/s]

Invalid image position
Invalid image position


 63%|██████▎   | 40757/64213 [19:39<14:42, 26.58it/s]

Invalid image position
Invalid image position


 64%|██████▎   | 40817/64213 [19:41<09:25, 41.36it/s]

Invalid image position
Invalid image position


 64%|██████▍   | 41068/64213 [19:49<08:46, 43.99it/s]

Invalid image position


 65%|██████▍   | 41683/64213 [20:08<11:13, 33.43it/s]

Invalid image position


 66%|██████▌   | 42180/64213 [20:21<10:00, 36.67it/s]

Invalid image position


 66%|██████▌   | 42347/64213 [20:27<13:17, 27.41it/s]

Invalid image position
Invalid image position


 66%|██████▌   | 42428/64213 [20:29<10:58, 33.08it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 42821/64213 [20:43<13:22, 26.67it/s]

Invalid image position


 67%|██████▋   | 43011/64213 [20:49<11:43, 30.14it/s]

Invalid image position
Invalid image position


 67%|██████▋   | 43135/64213 [20:53<08:40, 40.53it/s]

Invalid image position


 68%|██████▊   | 43514/64213 [21:07<13:45, 25.06it/s]

Invalid image position
Invalid image position


 68%|██████▊   | 43786/64213 [21:16<12:19, 27.61it/s]

Invalid image position


 68%|██████▊   | 43914/64213 [21:21<12:14, 27.65it/s]

Invalid image position


 68%|██████▊   | 43931/64213 [21:21<12:41, 26.62it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44171/64213 [21:30<13:08, 25.43it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44194/64213 [21:30<10:54, 30.58it/s]

Invalid image position
Invalid image position


 69%|██████▉   | 44495/64213 [21:41<10:44, 30.59it/s]

Invalid image position


 69%|██████▉   | 44522/64213 [21:42<12:32, 26.16it/s]

Invalid image position
Invalid image position


 70%|██████▉   | 44647/64213 [21:47<12:03, 27.05it/s]

Invalid image position


 70%|██████▉   | 44830/64213 [21:54<12:02, 26.84it/s]

Invalid image position
Invalid image position


 70%|███████   | 45182/64213 [22:07<10:48, 29.36it/s]

Invalid image position
Invalid image position


 70%|███████   | 45237/64213 [22:09<12:06, 26.12it/s]

Invalid image position


 71%|███████   | 45370/64213 [22:15<07:19, 42.86it/s]

Invalid image position


 71%|███████   | 45444/64213 [22:17<11:25, 27.36it/s]

Invalid image position


 71%|███████   | 45639/64213 [22:24<10:24, 29.73it/s]

Invalid image position
Invalid image position


 71%|███████   | 45715/64213 [22:27<11:16, 27.33it/s]

Invalid image position
Invalid image position


 71%|███████▏  | 45760/64213 [22:29<13:23, 22.98it/s]

Invalid image position


 71%|███████▏  | 45860/64213 [22:32<09:16, 33.01it/s]

Invalid image position
Invalid image position


 72%|███████▏  | 46021/64213 [22:38<08:00, 37.84it/s]

Invalid image position
Invalid image position


 72%|███████▏  | 46179/64213 [22:43<08:43, 34.43it/s]

Invalid image position


 73%|███████▎  | 46729/64213 [23:04<10:17, 28.32it/s]

Invalid image position
Invalid image position


 73%|███████▎  | 46739/64213 [23:04<10:17, 28.28it/s]

Invalid image position
Invalid image position


 73%|███████▎  | 46785/64213 [23:06<08:28, 34.30it/s]

Invalid image position


 73%|███████▎  | 47016/64213 [23:14<10:54, 26.28it/s]

Invalid image position
Invalid image position


 74%|███████▎  | 47321/64213 [23:25<09:10, 30.66it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47416/64213 [23:29<10:46, 26.00it/s]

Invalid image position


 74%|███████▍  | 47514/64213 [23:33<09:28, 29.40it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47593/64213 [23:36<11:37, 23.82it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47610/64213 [23:37<10:28, 26.40it/s]

Invalid image position
Invalid image position


 74%|███████▍  | 47701/64213 [23:40<11:32, 23.84it/s]

Invalid image position
Invalid image position


 75%|███████▍  | 47945/64213 [23:51<11:10, 24.25it/s]

Invalid image position


 75%|███████▌  | 48162/64213 [23:59<08:59, 29.77it/s]

Invalid image position


 76%|███████▌  | 48485/64213 [24:10<10:15, 25.56it/s]

Invalid image position


 76%|███████▌  | 48739/64213 [24:20<10:05, 25.55it/s]

Invalid image position
Invalid image position


 76%|███████▌  | 48843/64213 [24:24<11:03, 23.16it/s]

Invalid image position


 77%|███████▋  | 49321/64213 [24:41<08:38, 28.72it/s]

Invalid image position
Invalid image position


 77%|███████▋  | 49445/64213 [24:45<07:59, 30.82it/s]

Invalid image position


 77%|███████▋  | 49630/64213 [24:52<08:02, 30.20it/s]

Invalid image position


 78%|███████▊  | 49993/64213 [25:03<07:15, 32.64it/s]

Invalid image position


 78%|███████▊  | 50009/64213 [25:04<08:21, 28.32it/s]

Invalid image position


 78%|███████▊  | 50065/64213 [25:06<08:47, 26.80it/s]

Invalid image position
Invalid image position


 78%|███████▊  | 50289/64213 [25:12<05:01, 46.12it/s]

Invalid image position
Invalid image position


 79%|███████▉  | 50622/64213 [25:21<07:51, 28.80it/s]

Invalid image position


 80%|███████▉  | 51108/64213 [25:37<06:51, 31.82it/s]

Invalid image position
Invalid image position


 80%|███████▉  | 51298/64213 [25:43<07:39, 28.08it/s]

Invalid image position


 81%|████████  | 51802/64213 [26:01<08:43, 23.72it/s]

Invalid image position
Invalid image position


 81%|████████  | 51828/64213 [26:02<08:16, 24.96it/s]

Invalid image position


 81%|████████  | 51880/64213 [26:04<06:23, 32.18it/s]

Invalid image position
Invalid image position


 81%|████████  | 52147/64213 [26:14<06:33, 30.65it/s]

Invalid image position
Invalid image position


 81%|████████▏ | 52269/64213 [26:19<08:58, 22.16it/s]

Invalid image position
Invalid image position


 81%|████████▏ | 52315/64213 [26:20<06:28, 30.66it/s]

Invalid image position


 82%|████████▏ | 52623/64213 [26:31<06:34, 29.39it/s]

Invalid image position


 82%|████████▏ | 52636/64213 [26:31<05:41, 33.94it/s]

Invalid image position


 82%|████████▏ | 52667/64213 [26:32<05:32, 34.77it/s]

Invalid image position
Invalid image position


 82%|████████▏ | 52774/64213 [26:35<04:41, 40.65it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53011/64213 [26:43<05:42, 32.67it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53051/64213 [26:44<06:38, 28.01it/s]

Invalid image position
Invalid image position


 83%|████████▎ | 53587/64213 [27:03<06:37, 26.75it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 53805/64213 [27:10<05:40, 30.56it/s]

Invalid image position


 84%|████████▍ | 53823/64213 [27:11<05:47, 29.87it/s]

Invalid image position


 84%|████████▍ | 54025/64213 [27:18<05:51, 29.02it/s]

Invalid image position
Invalid image position


 84%|████████▍ | 54066/64213 [27:20<06:57, 24.30it/s]

Invalid image position


 84%|████████▍ | 54111/64213 [27:21<06:15, 26.93it/s]

Invalid image position


 85%|████████▍ | 54520/64213 [27:36<04:39, 34.70it/s]

Invalid image position


 85%|████████▌ | 54674/64213 [27:41<05:50, 27.24it/s]

Invalid image position
Invalid image position


 85%|████████▌ | 54719/64213 [27:43<06:22, 24.83it/s]

Invalid image position
Invalid image position


 86%|████████▌ | 54960/64213 [27:52<04:36, 33.46it/s]

Invalid image position


 86%|████████▌ | 55120/64213 [27:57<05:46, 26.24it/s]

Invalid image position


 86%|████████▌ | 55215/64213 [28:00<03:57, 37.83it/s]

Invalid image position


 86%|████████▌ | 55370/64213 [28:06<05:36, 26.32it/s]

Invalid image position


 86%|████████▋ | 55491/64213 [28:10<04:19, 33.56it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55611/64213 [28:13<04:55, 29.16it/s]

Invalid image position


 87%|████████▋ | 55908/64213 [28:21<03:43, 37.14it/s]

Invalid image position
Invalid image position


 87%|████████▋ | 55963/64213 [28:24<05:53, 23.34it/s]

Invalid image position
Invalid image position


 88%|████████▊ | 56261/64213 [28:34<04:39, 28.45it/s]

Invalid image position


 88%|████████▊ | 56453/64213 [28:41<06:09, 21.01it/s]

Invalid image position


 88%|████████▊ | 56753/64213 [28:50<03:01, 41.18it/s]

Invalid image position


 89%|████████▉ | 57349/64213 [29:11<03:46, 30.35it/s]

Invalid image position
Invalid image position


 89%|████████▉ | 57442/64213 [29:15<04:05, 27.57it/s]

Invalid image position
Invalid image position


 90%|████████▉ | 57575/64213 [29:20<04:00, 27.58it/s]

Invalid image position
Invalid image position


 90%|█████████ | 57867/64213 [29:31<03:39, 28.89it/s]

Invalid image position


 91%|█████████ | 58244/64213 [29:45<04:04, 24.37it/s]

Invalid image position
Invalid image position


 91%|█████████ | 58312/64213 [29:48<03:50, 25.56it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58624/64213 [29:59<03:25, 27.15it/s]

Invalid image position
Invalid image position


 91%|█████████▏| 58674/64213 [30:01<03:26, 26.84it/s]

Invalid image position
Invalid image position


 92%|█████████▏| 58866/64213 [30:08<03:12, 27.71it/s]

Invalid image position


 93%|█████████▎| 59590/64213 [30:38<02:49, 27.35it/s]

Invalid image position
Invalid image position


 93%|█████████▎| 59614/64213 [30:39<02:57, 25.94it/s]

Invalid image position
Invalid image position


 94%|█████████▎| 60170/64213 [31:00<02:32, 26.59it/s]

Invalid image position
Invalid image position


 94%|█████████▍| 60278/64213 [31:04<02:30, 26.09it/s]

Invalid image position


 94%|█████████▍| 60403/64213 [31:09<02:42, 23.46it/s]

Invalid image position


 94%|█████████▍| 60451/64213 [31:11<02:03, 30.49it/s]

Invalid image position


 94%|█████████▍| 60476/64213 [31:12<02:18, 26.89it/s]

Invalid image position


 95%|█████████▍| 60796/64213 [31:23<01:54, 29.91it/s]

Invalid image position


 95%|█████████▍| 60800/64213 [31:23<01:55, 29.54it/s]

Invalid image position


 95%|█████████▍| 60899/64213 [31:27<01:31, 36.07it/s]

Invalid image position
Invalid image position


 95%|█████████▌| 61034/64213 [31:32<02:51, 18.51it/s]

Invalid image position
Invalid image position


 95%|█████████▌| 61282/64213 [31:40<01:24, 34.77it/s]

Invalid image position


 96%|█████████▌| 61325/64213 [31:41<01:13, 39.40it/s]

Invalid image position


 96%|█████████▌| 61516/64213 [31:47<01:15, 35.93it/s]

Invalid image position
Invalid image position


 96%|█████████▌| 61596/64213 [31:49<01:17, 33.67it/s]

Invalid image position


 96%|█████████▌| 61619/64213 [31:50<01:17, 33.33it/s]

Invalid image position
Invalid image position


 96%|█████████▌| 61691/64213 [31:52<01:33, 26.85it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62115/64213 [32:07<00:58, 36.12it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62238/64213 [32:12<01:04, 30.56it/s]

Invalid image position
Invalid image position


 97%|█████████▋| 62293/64213 [32:13<01:15, 25.32it/s]

Invalid image position
Invalid image position


 98%|█████████▊| 62653/64213 [32:25<00:52, 29.77it/s]

Invalid image position


 98%|█████████▊| 62754/64213 [32:29<00:56, 25.92it/s]

Invalid image position
Invalid image position


 98%|█████████▊| 62835/64213 [32:31<01:02, 22.13it/s]

Invalid image position
Invalid image position


 99%|█████████▊| 63289/64213 [32:48<00:36, 25.56it/s]

Invalid image position
Invalid image position


 99%|█████████▉| 63684/64213 [33:02<00:20, 26.16it/s]

Invalid image position


 99%|█████████▉| 63765/64213 [33:05<00:19, 23.17it/s]

Invalid image position


 99%|█████████▉| 63801/64213 [33:07<00:18, 22.51it/s]

Invalid image position
Invalid image position


100%|█████████▉| 64136/64213 [33:20<00:03, 24.27it/s]

Invalid image position


100%|██████████| 64213/64213 [33:23<00:00, 32.05it/s]
 20%|█▉        | 97/496 [00:02<00:20, 19.65it/s] 

Invalid image position
Invalid image position


100%|██████████| 496/496 [00:19<00:00, 26.07it/s]
 10%|▉         | 29/293 [00:00<00:08, 29.54it/s]

Invalid image position
Invalid image position


 28%|██▊       | 83/293 [00:03<00:12, 17.49it/s]

Invalid image position


 54%|█████▎    | 157/293 [00:07<00:06, 20.02it/s]

Invalid image position


 57%|█████▋    | 167/293 [00:08<00:05, 23.66it/s]

Invalid image position
Invalid image position


 74%|███████▎  | 216/293 [00:10<00:03, 19.33it/s]

Invalid image position
Invalid image position


 89%|████████▉ | 262/293 [00:13<00:01, 17.65it/s]

Invalid image position
Invalid image position


100%|██████████| 293/293 [00:15<00:00, 19.53it/s]


In [72]:
l = []

In [73]:
with open("MIMIC_JPG_train_trimmed.tsv", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for item in reader:
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")
        l.append((cur_img_count, next_img_count))


In [77]:
LIMIT = 2

with open("MIMIC_JPG_valid.tsv", "r") as f, open("MIMIC_JPG_valid_trimmed.tsv", "w") as f_write:
    reader = csv.reader(f, delimiter="\t")
    writer = csv.writer(f_write, delimiter="\t")
    for item in tqdm(reader):
        if "[NEXT_IMG]" not in item[-1]:
            cur_img = item[-1]
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), 0
        else:
            cur_img, next_img = item[-1].split("[NEXT_IMG]")
            cur_img_count, next_img_count = cur_img.count("LOC_DELIM"), next_img.count("LOC_DELIM")

        if cur_img_count > LIMIT:
            cur_img = "[VIEW_DELIM]".join(cur_img.split("[VIEW_DELIM]")[:LIMIT])
        if next_img_count > LIMIT:
            next_img = "[VIEW_DELIM]".join(next_img.split("[VIEW_DELIM]")[:LIMIT])

        views = cur_img.split("[VIEW_DELIM]")
        views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
        cur_img = "[VIEW_DELIM]".join(views)

        if next_img_count != 0:
            views = next_img.split("[VIEW_DELIM]")
            views = sorted(views, key=lambda view: view.split("[LOC_DELIM]")[-1])
            next_img = "[VIEW_DELIM]".join(views)

        if "[NEXT_IMG]" in item[-1]:
            cur_img = f"{cur_img}[NEXT_IMG]{next_img}"

        item[-1] = cur_img

        writer.writerow(item)

1553it [00:00, 12208.54it/s]


In [74]:
from collections import Counter
c = Counter(l)

In [75]:
c

Counter({(1, 1): 52654,
         (2, 2): 40642,
         (2, 0): 34681,
         (1, 2): 25935,
         (2, 1): 25884,
         (1, 0): 8285})

In [61]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28532,
         (1, 2): 21655,
         (2, 1): 21549,
         (1, 0): 8285,
         (3, 0): 5950,
         (2, 3): 4962,
         (3, 2): 4950,
         (3, 1): 4335,
         (1, 3): 4280,
         (3, 3): 2198})

In [47]:
c

Counter({(1, 1): 52654,
         (2, 0): 28731,
         (2, 2): 28534,
         (1, 2): 21655,
         (2, 1): 21550,
         (1, 0): 8285,
         (3, 0): 5378,
         (2, 3): 4514,
         (3, 2): 4501,
         (3, 1): 3869,
         (1, 3): 3794,
         (3, 3): 1646,
         (4, 0): 544,
         (1, 4): 439,
         (4, 1): 429,
         (4, 2): 422,
         (2, 4): 421,
         (4, 3): 240,
         (3, 4): 222,
         (4, 4): 59,
         (1, 5): 36,
         (5, 1): 30,
         (5, 2): 22,
         (5, 0): 20,
         (2, 5): 17,
         (2, 6): 9,
         (5, 3): 9,
         (6, 0): 8,
         (1, 6): 6,
         (3, 5): 6,
         (6, 1): 5,
         (4, 5): 4,
         (6, 6): 3,
         (6, 4): 3,
         (1, 8): 3,
         (6, 2): 3,
         (1, 7): 2,
         (4, 6): 2,
         (5, 4): 2,
         (7, 6): 1,
         (8, 4): 1,
         (9, 1): 1,
         (8, 1): 1,
         (8, 2): 1,
         (2, 11): 1,
         (11, 2): 1})