In [1]:
import shutil
import gc
import torch

import fastai
from fastai.vision import *
from fastai.widgets import *
from fastai.callbacks import *

print("vgg19 bn 2")

Config.DEFAULT_CONFIG = {
        'data_path': './../data/MURA-v1.1/',
        'model_path': './models/'
    }

Config.create('/tmp/myconfig.yml')
Config.DEFAULT_CONFIG_PATH = '/tmp/myconfig.yml'

path = Config.data_path()

fnames_train = get_image_files('../data/MURA-v1.1/train/XR_WRIST', recurse=True)
print(len(fnames_train))

fnames_valid = get_image_files('../data/MURA-v1.1/valid/XR_WRIST', recurse=True)
print(len(fnames_valid))

pat_label = re.compile(r'/XR_([^/]+)/[^/]+/[^/]+/[^/]+.png$')
pat_patient = re.compile(r'/[^/]+/patient([^/]+)/[^/]+/[^/]+.png$')
pat_study = re.compile(r'/([^/]+)_[^/]+/[^/]+.png$')

mura = ['elbow', 'finger', 'forearm', 'hand', 'humerus', 'shoulder', 'wrist']

study_train_dict = dict()
study_valid_dict = dict()

for m in mura:
    study_train_dict[m] = list()
    study_valid_dict[m] = list()

vgg19 bn 2
9752
659


In [2]:
size = 320
bs = 32

np.random.seed(24)


def create_from_ll(lls:LabelLists, bs:int=64, val_bs:int=None, ds_tfms:Optional[TfmList]=None,
                num_workers:int=defaults.cpus, dl_tfms:Optional[Collection[Callable]]=None, device:torch.device=None,
                test:Optional[PathOrStr]=None, collate_fn:Callable=data_collate, size:int=None, no_check:bool=False,
                resize_method:ResizeMethod=None, mult:int=None, padding_mode:str='reflection',
                mode:str='bilinear', tfm_y:bool=False)->'ImageDataBunch':
    "Create an `ImageDataBunch` from `LabelLists` `lls` with potential `ds_tfms`."
    lls = lls.transform(tfms=get_transforms(do_flip=True, max_rotate=30, max_warp=0.0, p_lighting=0, xtra_tfms=[]), size=size, resize_method=resize_method, mult=mult, padding_mode=padding_mode,
                        mode=mode, tfm_y=tfm_y)
    
    return lls.databunch(bs=bs, val_bs=bs, dl_tfms=dl_tfms, num_workers=num_workers, collate_fn=collate_fn,
                         device=device, no_check=True)

src = (ImageList.from_df(df=pd.read_csv('all_bones.csv'), path='../data/MURA-v1.1/data2/', folder=None, suffix='', cols=0)
                .split_from_df("is_valid")
                .label_from_df(label_delim=None, cols=1))
data = create_from_ll(src, size = size, bs = bs).normalize(imagenet_stats)

In [4]:

print('CLASSIFYING BONES')
kappa = KappaScore()
kappa.weights = "quadratic"

learner = cnn_learner(data, models.vgg19_bn, metrics=[error_rate, accuracy, kappa], wd=0.1, model_dir="./models/").to_fp32()
bone_map = {"ELBOW":0, "FINGER": 1, "FOREARM":2, "HAND":3, "HUMERUS":4, "SHOULDER":5, "WRIST":6}

CLASSIFYING BONES


In [36]:

def run_model_on_predictions(bone_name, model):
    print(bone_name)
    df = pd.read_csv("classified_" + bone_name + ".csv")
    print(df.head())
    df = df.loc[df['is_valid'] == True]
    df = df.reset_index(drop=True)
    df.to_csv("classified_" + bone_name + "2.csv", index = None)
    
    src = (ImageList.from_df(df=pd.read_csv("classified_" + bone_name + ".csv"), path='../data/MURA-v1.1/data2/', folder=None, suffix='', cols=0)
                .split_from_df("is_valid")
                .label_from_df(label_delim=None, cols=1))
    data = create_from_ll(src, size = size, bs = bs).normalize(imagenet_stats)
    
    
    classified = ImageList.from_df(df=pd.read_csv("classified_" + bone_name + "2.csv"), path='../data/MURA-v1.1/data2/', folder=None, suffix='', cols=0)
    
    learner = cnn_learner(data, models.vgg19_bn, metrics=[error_rate, accuracy, kappa], wd=0.1, model_dir="./models/").to_fp32()
    learner.load(model)
    
    count = 0
    
    predictions = {'actual': [], 'predicted': [], 'file': []}
    
    print('predicting')
    for img in classified:
        if count % 200 == 0:
            print(count)
        curr = df.loc[count]
        prediction = learner.predict(img)
        label = int(prediction[1])
        predictions['actual'].append(curr['value'])
        predictions['predicted'].append(label)
        predictions['file'].append(curr['name'])
        count += 1
        

    
    print()
    print()
    print()
    return predictions

In [27]:
# TODO: find model name
predictions_mapping = {}
for i in list(bone_map):
    if i == 'ELBOW':
        predictions_mapping[i] = run_model_on_predictions(i, 'vgg19bn-' + i.lower() + 'sfullval')
    else:
        predictions_mapping[i] = run_model_on_predictions(i, 'vgg19bn-' + i + 'fullval')

from sklearn.metrics import accuracy_score, cohen_kappa_score
# calculate kappa and accuracy
all_actual = []
all_predicted = []
for i in list(predictions_mapping):
    print(i)
    all_actual.extend(predictions_mapping[i]['actual'])
    all_predicted.extend(predictions_mapping[i]['predicted'])
    print('accuracy ', accuracy_score(actual, predict))
    print('kappa ', cohen_kappa_score(actual, predict))

ELBOW
                                                name  value  is_valid
0  train/0/FOREARM_patient09454_study1_negative_i...  False     False
1  train/0/FOREARM_patient09454_study1_negative_i...  False     False
2  train/0/FOREARM_patient09389_study1_negative_i...  False     False
3  train/0/FOREARM_patient09322_study1_negative_i...  False     False
4  train/0/FOREARM_patient03005_study1_negative_i...  False     False
predicting
0
200
400



FINGER
                                                name  value  is_valid
0  train/0/HAND_patient10964_study1_negative_imag...  False     False
1  train/0/HAND_patient10791_study1_negative_imag...  False     False
2  train/0/HAND_patient03231_study2_negative_imag...  False     False
3  train/0/HAND_patient11160_study1_negative_imag...  False     False
4  train/0/HAND_patient06055_study2_negative_imag...  False     False
predicting
0
200
400



FOREARM
                                                name  value  is_valid
0  train/0/FOREARM_pa

In [28]:
all_actual = []
all_predicted = []
for i in list(predictions_mapping):
    print(i)
    all_actual.extend(predictions_mapping[i]['actual'])
    all_predicted.extend(predictions_mapping[i]['predicted'])
print('accuracy ', accuracy_score(all_actual, all_predicted))
print('kappa ', cohen_kappa_score(all_actual, all_predicted))

ELBOW
FINGER
FOREARM
HAND
HUMERUS
SHOULDER
WRIST
accuracy  0.8282765092274007
kappa  0.6544268977742109


In [33]:
elbow = pd.read_csv("all_ELBOW.csv")
print(len(elbow.index))

print(len(elbow.loc[elbow['value'] == 1].index))
print(len(elbow.loc[elbow['value'] == 0].index))
print()
print()

# finger, forearm, wrist, shoulder, humerus, hand
finger = pd.read_csv("all_FINGER.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

finger = pd.read_csv("all_FOREARM.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

finger = pd.read_csv("all_WRIST.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

finger = pd.read_csv("all_SHOULDER.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

finger = pd.read_csv("all_HUMERUS.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

finger = pd.read_csv("all_HAND.csv")
print(len(finger.index))
print(len(finger.loc[finger['value'] == 1].index))
print(len(finger.loc[finger['value'] == 0].index))
print()
print()

print(abnormal)
print(normal)

5396
2236
3160


5567
2215
3352


2126
812
1314


10411
4282
6129


8942
4446
4496


1560
739
821


6003
1673
4330


16403
23602


In [37]:
# TODO: find model name
predictions_mapping2 = {}
for i in list(bone_map):
    if i == 'ELBOW':
        predictions_mapping2[i] = run_model_on_predictions(i, 'vgg19bn-' + i.lower() + 'sfullval')
    else:
        predictions_mapping2[i] = run_model_on_predictions(i, 'vgg19bn-' + i + 'fullval')

from sklearn.metrics import accuracy_score, cohen_kappa_score
# calculate kappa and accuracy
"""
all_actual = []
all_predicted = []
for i in list(predictions_mapping):
    print(i)
    all_actual.extend(predictions_mapping[i]['actual'])
    all_predicted.extend(predictions_mapping[i]['predicted'])
    print('accuracy ', accuracy_score(actual, predict))
    print('kappa ', cohen_kappa_score(actual, predict))
"""


ELBOW
                                                name  value  is_valid
0  train/0/FOREARM_patient09454_study1_negative_i...  False     False
1  train/0/FOREARM_patient09454_study1_negative_i...  False     False
2  train/0/FOREARM_patient09389_study1_negative_i...  False     False
3  train/0/FOREARM_patient09322_study1_negative_i...  False     False
4  train/0/FOREARM_patient03005_study1_negative_i...  False     False
predicting
0
200
400



FINGER
                                                name  value  is_valid
0  train/0/HAND_patient10964_study1_negative_imag...  False     False
1  train/0/HAND_patient10791_study1_negative_imag...  False     False
2  train/0/HAND_patient03231_study2_negative_imag...  False     False
3  train/0/HAND_patient11160_study1_negative_imag...  False     False
4  train/0/HAND_patient06055_study2_negative_imag...  False     False
predicting
0
200
400



FOREARM
                                                name  value  is_valid
0  train/0/FOREARM_pa

"\nall_actual = []\nall_predicted = []\nfor i in list(predictions_mapping):\n    print(i)\n    all_actual.extend(predictions_mapping[i]['actual'])\n    all_predicted.extend(predictions_mapping[i]['predicted'])\n    print('accuracy ', accuracy_score(actual, predict))\n    print('kappa ', cohen_kappa_score(actual, predict))\n"

In [87]:
grouped_by_study = {}

for i in list(predictions_mapping2):

    curr = predictions_mapping2[i]
    print(curr)
    for j in range(len(curr['actual'])):
        if j % 200 == 0:
            print(j)
        
        file_name = curr['file'][j]
        
        
        patient_name = file_name[file_name.index('patient'):file_name.index('study')-1]
        # print(patient_name)
        if patient_name in grouped_by_study:
            grouped_by_study[patient_name].append(('positive' in file_name, curr['predicted'][j]))
        else:
            grouped_by_study[patient_name] = [('positive' in file_name, curr['predicted'][j])]
    
        

        





{'actual': [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, F

In [88]:
actual_ans = []
predicted_ans = []
for patient in grouped_by_study:

    actual = grouped_by_study[patient][0][0]
    predicted = [i[1] for i in grouped_by_study[patient]]
    predicted = float(sum(predicted)) / len(predicted)
    
    actual_ans.append(actual)
    predicted_ans.append(predicted >= .5)

    

    
from sklearn.metrics import accuracy_score, cohen_kappa_score
actual_ans = [int(i) for i in actual_ans]
predicted_ans = [int(i) for i in predicted_ans]
cohen_kappa_score(actual_ans, predicted_ans)
#accuracy_score(actual_ans, predicted_ans)
        
    

0.5719874340584434

In [77]:
print(cohen_kappa_score(predictions_mapping2['HAND']['actual'], predictions_mapping2['HAND']['predicted']))

0.4887171294923426
