In [0]:
# mount drive

from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/My Drive/Colab Notebooks')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# libs

import h5py
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

In [0]:
def getPlots(idx,X_valid,y_valid,y_pred):
    plt.figure(figsize=(10, 5))
    plt.subplot(131)
    plt.imshow(X_valid[idx, :, :, :])
    plt.title('Image')
    plt.xticks([])
    plt.yticks([])
    plt.subplot(132)
    plt.imshow(y_valid[idx, :, :, 0])
    plt.title('Ground Truth')
    plt.xticks([])
    plt.yticks([])
    plt.subplot(133)
    plt.imshow(y_pred[idx, :, :, 0])
    plt.title('Segmented Image')
    plt.xticks([])
    plt.yticks([])
    plt.savefig(save_folder + 'validation/im_{}.png'.format(idx), dpi=200)
    
def getData():
    images = []
    gt = []
    for i in range(1, 101):
        if (i <= 9):
            # fileName = 'cells/00{}cell.png'.format(i)
            fileName_img = 'data/training/images/satImage_00{}.png'.format(i)
            fileName_gt = 'data/training/groundtruth/satImage_00{}.png'.format(i)
        elif (i < 100):
            # fileName = 'cells/0{}cell.png'.format(i)
            fileName_img = 'data/training/images/satImage_0{}.png'.format(i)
            fileName_gt = 'data/training/groundtruth/satImage_0{}.png'.format(i)
        else:
            # fileName = 'cells/{}cell.png'.format(i)
            fileName_img = 'data/training/images/satImage_{}.png'.format(i)
            fileName_gt = 'data/training/groundtruth/satImage_{}.png'.format(i)
        im = plt.imread(fileName_img)
        im_gt = plt.imread(fileName_gt)
        
        im_sz = im.shape[1]
        
        for i in range(int(im_sz/16)):
          for j in range(int(im_sz/16)):
            h = i*16
            v = j*16
            images.append(im[h:(h+16),v:(v+16)].flatten())
            gt.append(patch_to_label(im_gt[h:(h+16),v:(v+16)]))

    images = np.stack(images, axis=0)
    gt = np.stack(gt, axis=0)

    return [images, gt]

In [0]:
foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch

# assign a label to a patch
def patch_to_label(patch):
    df = np.mean(patch)
    if df > foreground_threshold:
        return 1
    else:
        return 0

In [0]:
save_folder = "./Baseline_LogReg/"
frac_train = 0.95

if not os.path.exists(save_folder):
    os.makedirs(save_folder)
    
if not os.path.isfile('training_data.h5'):
    [X, y] = getData()
    file_data = h5py.File('training_data.h5','w')
    file_data['images'] = X
    file_data['groundTruth'] = y
    file_data.close()
else:
    file_data = h5py.File('training_data.h5','r')
    X = file_data['images'][()]
    y = file_data['groundTruth'][()]
    file_data.close()

print(X.shape)
print(y.shape)
n_samples = X.shape[0]
    
# Split the training and test dataset
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33,
                                                     random_state=42)

print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)

(62500, 768)
(62500,)
(41875, 768)
(41875,)
(20625, 768)
(20625,)


In [0]:
lr = LogisticRegression(solver='liblinear', penalty='l2', n_jobs=-1, max_iter=10^3)
lr.fit(X_train, y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=9,
                   multi_class='warn', n_jobs=-1, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [0]:
y_pred = lr.predict(X_valid)
print(f1_score(y_pred, y_valid))
print(accuracy_score(y_pred, y_valid))

y_pred2 = lr.predict(X_train)
print(f1_score(y_pred2, y_train))
print(accuracy_score(y_pred2, y_train))

0.0
0.7403151515151515
0.000549349935909174
0.7393194029850746


In [0]:
sv = SVC(gamma='auto')
sv.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [0]:
y_pred = sv.predict(X_valid)
print(f1_score(y_pred, y_valid))
print(accuracy_score(y_pred, y_valid))

y_pred2 = sv.predict(X_train)
print(f1_score(y_pred2, y_train))
print(accuracy_score(y_pred2, y_train))

  'recall', 'true', average, warn_for)


0.0
0.741430303030303
0.0
0.7407044776119402


  'recall', 'true', average, warn_for)


In [0]:
nb = MultinomialNB()
nb.fit(X_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [0]:
y_pred = nb.predict(X_valid)
print(f1_score(y_pred, y_valid))
print(accuracy_score(y_pred, y_valid))

y_pred2 = nb.predict(X_train)
print(f1_score(y_pred2, y_train))
print(accuracy_score(y_pred2, y_train))

0.004277312422028159
0.7291151515151515
0.005420528064346913
0.728334328358209
