In [1]:
from fastai.vision.all import *
from fastai.vision.widgets import *

In [2]:
# augments the image to make it easier to extract text
def process_img(img):
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) # convert to grayscale
    img = imutils.rotate_bound(img, 90) # rotate 
    img = cv2.resize(img, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC) # make 3 times larger
    img = cv2.GaussianBlur(img,(3,3),0) # add gaussian blur after enlarging
    return img
    
# extracts the text and returns a set of unique words extracted from the text
def extract_words(img):
    raw_words = pytesseract.image_to_string(img)
    # Removes non-alphanumeric characters and underscores and makes lowercase
    pattern = re.compile('[\W_]+')
    words = pattern.sub(' ', raw_words).lower()

    tokens = set(word_tokenize(words))
    #print('extracted words:', words)
    return tokens

def clean_words(word_data):
    # remove words that are 2 chars or lower to reduce noise
    word_data_remove_2char = [ x for x in word_data if len(x) > 2]
    #word_data_remove_2char_google = [ set(x for x in r if len(x) > 2) for r in word_data_google]

    # format list of token sets into a list of strings which are tokens joined by spaces
    # [ set('a', 'b'), set ( 'c', 'd', 'e')]  into   ['a b', 'c d e']
    word_data_formatted = ' '.join(word_data_remove_2char)
    #word_data_google_formatted = [ ' '.join(sets) for sets in word_data_remove_2char_google]
    return word_data_formatted

def file_to_sentence(a):
    # given a file path, apply all transforms and returns a single string
    # with all extracted words over 2 characters
    a = load_img(a)
    a = process_img(a)
    a = extract_words(a)
    a = clean_words(list(a))
    return a    

In [3]:
def img_to_sentence(pil_img):
    # given a file path, apply all transforms and returns a single string
    # with all extracted words over 2 characters
    
    a = np.array(pil_img)
    a = process_img(a)
    a = extract_words(a)
    a = clean_words(list(a))
    return a    

def bow_predictor_app_inp(pil_img):
    # input either a single file string or a list of file strings to be processed
    # returns a prediction (1=KM, 0=0ther), the probability the image is KM, and the max probability ( probability of predicted class -
    # so if prediction is 0=other, max prob will tell you the probabilty that the curve is 0)
    
    # Load linear support vector classifier 
    with open("Y. model - bag of words2.pkl", 'rb') as file:  
        bow_model = pickle.load(file)

    # Load count vectorizer
    with open('Y. Count vectorizer vocab.pkl', 'rb') as f:
        vectorizer_vocab = pickle.load(f)
    
    # get an error if only one img is passed and use map functionality
#     try:
#         X_bow = list(map(file_to_sentence, X_input))
#     except:
#         # if single image, make it look like a list
#         X_bow = list(map(file_to_sentence, [X_input]))

    X_bow = [img_to_sentence(pil_img)]
        
    vectorizer = CountVectorizer(vocabulary=vectorizer_vocab)
    X_bow = vectorizer.transform(X_bow)
    
    bow_pred = bow_model.predict(X_bow)
    bow_prob = bow_model.predict_proba(X_bow)
    bow_max_prob = [max(x) for x in bow_prob]
    bow_prob = bow_prob[:,1] # probability the graph is KM
    
    return bow_pred, bow_prob, bow_max_prob

def fastai_predictor_app_inp(pil_img):
    # input either a single file string or a list of file strings to be processed
    # returns a prediction (1=KM, 0=0ther), the probability the image is KM, and the max probability ( probability of predicted class -
    # so if prediction is 0=other, max prob will tell you the probabilty that the curve is 0)
    learn_inf = load_learner('model 11 - 25 epoch.pkl')
    X_fastai = pil_img

    ignored,fastai_pred,prob_tensor = learn_inf.predict(X_fastai)

    fastai_pred = 1 - fastai_pred # because in DL model 1=Other. but need to convert so 1=KM
    fastai_prob = prob_tensor.numpy()[0] # prob graph is KM
    fastai_max_prob = max(prob_tensor).numpy() # returns max values from (0 prob, 1prob) tensor for each input

    return fastai_pred, fastai_prob, fastai_max_prob


def ensemble_predictor_app_inp(pil_img):
    # Function that calculates the ensemble prediction by averaging probablities
    # from BOW and fastai model. returns prob and prediction
    
    # Calculate probabilities from 2 models
    fastai_pred, fastai_prob, fastai_max_prob = fastai_predictor_app_inp(pil_img)
    bow_pred, bow_prob, bow_max_prob= bow_predictor_app_inp(pil_img)
    
    a = bow_prob
    b= fastai_prob
    c= [a,b]
    
    ensemble_prob = np.mean(c, axis=0)
    ensemble_pred = np.round(ensemble_prob)
    return ensemble_pred,ensemble_prob 

In [4]:
learn_inf = load_learner('model 11 - 25 epoch.pkl')

btn_upload = widgets.FileUpload()
out_pl = widgets.Output()
lbl_pred = widgets.Label()
btn_run = widgets.Button(description='Classify')

def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
        
    # Choose classification method
    
    # Bow prediction -------------------------------------------------
    #pred, prob, max_prob = bow_predictor_app_inp(img)
    
    # Fastai prediction ----------------------------------------------
    pred, prob, max_prob = fastai_predictor_app_inp(img) # not working
    
    # Ensemble prediction --------------------------------------------
    #pred, prob = ensemble_predictor_app_inp(img) 
    
    lbl_pred.value = f'Prediction: {pred}, Prob: {prob}'

btn_run.on_click(on_click_classify)

# Kaplan Meir Classifier
Use the buttons below to upload an image, then click to classify the image. 
1 = Kaplan Meier
0 = Not Kaplan Meier

In [5]:
VBox([widgets.Label('Please upload an image'), 
      btn_upload, btn_run, out_pl, lbl_pred])

VBox(children=(Label(value='Please upload an image'), FileUpload(value={}, description='Upload'), Button(descr…