In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
from tqdm import tqdm
import tensorflow as tf
import random

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report 
from sklearn.metrics import roc_curve, auc

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
label_csv = pd.read_csv("../input/train_labels.csv")
label_csv_train, label_csv_test = train_test_split(label_csv, test_size=0.3, random_state=1)

In [None]:
def import_data(labl):
    x_sample=[]
    y=[]
    for img in labl['id']:
        #if (labl['id'].where(labl['id'] == img)).empty == False:
            path = os.path.join("../input/train/", img +'.tif')
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (50,50))
            x_sample.append(np.array(img))
            
    y = np.array(labl['label'])
    return x_sample, y

In [None]:
def error_rate(p, t):
    return np.mean(p != t)
def standardize(x):
    return (x/255)

In [None]:
def convolution(x, w, b, pad):
    conv_out = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding=pad)
    conv_out = tf.nn.bias_add(conv_out, b)
    return conv_out
def pooling(conv_out):
    pool_out = tf.nn.max_pool(conv_out, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
    return tf.nn.relu(pool_out)

In [None]:
def init_filter(shape, poolsz):
    # w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2]) / np.prod(poolsz))
    w = np.random.randn(*shape) * np.sqrt(2.0 / np.prod(shape[:-1]))
    #w = np.random.randn(*shape)
    return w.astype(np.float32)

In [None]:
#hyperparameters
max_iter = 50
lr=0.01
K=1
poolsz = (2,2)

In [None]:
#bias and weights
#Convolution
w1_shape = (3,3,1,20)
w1_init = init_filter(w1_shape, poolsz)
b1_init = np.zeros(w1_shape[-1], dtype = np.float32)

w2_shape = (3,3,20,50)
w2_init = init_filter(w2_shape, poolsz)
b2_init = np.zeros(w2_shape[-1], dtype = np.float32)

w3_shape = (3,3,50,100)
w3_init = init_filter(w3_shape, poolsz)
b3_init = np.zeros(w3_shape[-1], dtype = np.float32)

w4_shape = (7,7,100,K)
w4_init = init_filter(w4_shape, poolsz)
b4_init = np.zeros(w4_shape[-1], dtype = np.float32)


In [None]:
#tf variables and place holders
x = tf.placeholder(tf.float32, shape=(None,50,50,1), name='x')
t = tf.placeholder(tf.float32, shape=(None, K), name = 't')
w1 = tf.Variable(w1_init.astype(np.float32))
b1 = tf.Variable(b1_init.astype(np.float32))
w2 = tf.Variable(w2_init.astype(np.float32))
b2 = tf.Variable(b2_init.astype(np.float32))
w3 = tf.Variable(w3_init.astype(np.float32))
b3 = tf.Variable(b3_init.astype(np.float32))
w4 = tf.Variable(w4_init.astype(np.float32))
b4 = tf.Variable(b4_init.astype(np.float32))

In [None]:
#define model
c_out1 = convolution(x, w1, b1, "SAME")
z1 = pooling(c_out1)

c_out2 = convolution(z1, w2, b2, "SAME")
z2 = pooling(c_out2)

c_out3 = convolution(z2, w3, b3, "SAME")
z3 = pooling(c_out3)

c_out4 = convolution(z3, w4, b4, "VALID")

calcY = tf.reshape(c_out4,[-1,K])
#cost
cost = tf.reduce_sum(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=calcY,
            labels=t
        )
    )
#optimizer - adam
train_op = tf.train.AdamOptimizer(0.001).minimize(cost)
#optimizer - RMS Prop
#train_op = tf.train.RMSPropOptimizer(0.01, decay=0.99, momentum=0.9).minimize(cost)
#softmax - predict probability for each class
predictor_y_prob = tf.nn.sigmoid(calcY)
#y predicted
predictor_y = tf.round(predictor_y_prob)


In [None]:
def process(size):
    df0 = label_csv_train[label_csv_train.label == 0].sample(size, random_state = random.randint(1,99))
    df1 = label_csv_train[label_csv_train.label == 1].sample(size, random_state = random.randint(1,99))
    label_csv = pd.concat([df0, df1], ignore_index=True).reset_index()
    label_csv = label_csv[["id", "label"]]
    x, y = import_data(label_csv)
    x = np.array(x)
    x = x.reshape(-1,50,50,1)
    x = standardize(x)
    y = y.reshape(-1, 1)
    return x,y

In [None]:
#arrays
train_costs = []

#initializing session
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)
    
    for i in range(50):
        print("i",i)
        for j in range(50):
            print("j",j)
            x_sample, y_sample = process(5000)
            session.run(train_op, feed_dict={x:x_sample, t: y_sample})
            #train
            train_cost = session.run(cost, feed_dict={x: x_sample, t: y_sample})
            train_costs.append(train_cost)
            #z = session.run(z2r, feed_dict={x:x_train})
            
    x_sample, y_sample = process(5000)
    y_train_predicted = session.run(predictor_y, feed_dict={x: x_sample})
    y_train_predicted_prob = session.run(predictor_y_prob, feed_dict={x: x_sample})
    
    y_test_predicted_prob = session.run(predictor_y_prob, feed_dict={x: x_sample})
    y_test_predicted = session.run(predictor_y, feed_dict={x: x_sample})
    #wtest = session.run(calcY, feed_dict={x: x_test})

In [None]:
err = error_rate(y_sample, y_test_predicted)
err

In [None]:
FPR, TPR, _ = roc_curve(y_sample, y_test_predicted_prob)
AUC = auc(FPR, TPR)
plt.figure()
plt.plot(FPR, TPR, label='ROC curve (area = %0.2f)' % AUC)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.02])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()