In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Used to change filepaths
from pathlib import Path

# We set up matplotlib, pandas, and the display function
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import display
import pandas as pd

# import numpy to use in this cell
# ... YOUR CODE FOR TASK 1 ...
import numpy as np
# import Image from PIL so we can use it later
# ... YOUR CODE FOR TASK 1 ...
from PIL import Image
# generate test_data
# ... YOUR CODE FOR TASK 1 ...
test_data = np.random.beta(1,1,size=(100, 100, 3))
# display the test_data
# ... YOUR CODE FOR TASK 1 ...
plt.imshow(test_data)

In [None]:
# open the image
img = Image.open('../input/bee_imgs/bee_imgs/001_043.png')

# Get the image size
img_size = img.size

print("The image size is: {}".format(img_size))

# Just having the image as the last line in the cell will display it in the notebook
img

**#Image Manipulation**

In [None]:
# Crop the image to 25, 25, 75, 75
img_cropped = img.crop([25, 25, 75, 75])
display(img_cropped)

# rotate the image by 45 degrees
img_rotated = img.rotate(45,expand=25)
display(img_rotated)

# flip the image left to right
img_flipped = img.transpose(Image.FLIP_LEFT_RIGHT)
display(img_flipped)

**#Images in the form of array being plotted as Red, Green and Blue Channels**

In [None]:
# Turn our image object into a NumPy array
img_data = np.array(img)

# get the shape of the resulting array
img_data_shape = img_data.shape

print("Our NumPy array has the shape: {}".format(img_data_shape))

# plot the data with `imshow`
plt.imshow(img_data)
plt.show()

# plot the red channel
plt.imshow(img_data[:, :, 0], cmap=plt.cm.Reds_r)
plt.show()

# plot the green channel
plt.imshow(img_data[:, :, 1], cmap=plt.cm.Greens_r)
plt.show()

# plot the blue channel
plt.imshow(img_data[:, :, 2], cmap=plt.cm.Blues_r)
plt.show()

**#Exploring The Color Channels**

In [None]:
def plot_kde(channel, color):
    """ Plots a kernel density estimate for the given data.
        
        `channel` must be a 2d array
        `color` must be a color string, e.g. 'r', 'g', or 'b'
    """
    data = channel.flatten()
    return pd.Series(data).plot.density(c=color)

# create the list of channels
channels = ['r', 'g', 'b']
    
def plot_rgb(image_data):
    # use enumerate to loop over colors and indexes
    for ix, color in enumerate(channels):
        plot_kde(image_data[:, :, ix], color)
        
    plt.show()

plot_rgb(img_data)


> **#Honey bees RGB**

In [None]:
# load bee_12.jpg as honey
honey = Image.open('../input/bee_imgs/bee_imgs//001_134.png')

# display the honey bee image
display(honey)

# NumPy array of the honey bee image data
honey_data = np.array(honey)

# plot the rgb densities for the honey bee image
plot_rgb(honey_data)

#Bumble bee RGB

In [None]:
# load bee_3.jpg as bumble
bumble = Image.open('../input/bee_imgs/bee_imgs//001_140.png')

# display the bumble bee image
display(bumble)

# NumPy array of the bumble bee image data
bumble_data = np.array(bumble)

# plot the rgb densities for the bumble bee image
plot_rgb(bumble_data)

**#CNN for Honey bee health Classification**

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#to import images
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score


import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
directory = "../input/bee_imgs/bee_imgs/"
#64x64 for faster training.
picture3 = image.load_img(directory+"041_073.png", target_size=(64,64))
picture3

In [None]:
df = pd.read_csv("../input/bee_data.csv")
#create a list to hold the 4d image tensors data
X_pics = [image.load_img(directory+img_name,target_size=(64,64)) for img_name in df["file"]]

#a list of np tensors
X = [np.array(image.img_to_array(i)) for i in X_pics]
#rescale for training, using minmax scaling
X = [i/255.0 for i in X]

In [None]:
#verified to be in order. Should be identical to the picture above
X_pics[2] #third picture

In [None]:
#summary of the target/labels
print(df.health.value_counts())
target_ids = []
for i in df.health:
    if i not in target_ids:
        target_ids.append(i)

In [None]:
#Dividing images into different health classes
    
y_keys = {"healthy":np.array([1,0,0,0,0,0]),
         "few varrao, hive beetles":np.array([0,1,0,0,0,0]),
         "Varroa, Small Hive Beetles":np.array([0,0,1,0,0,0]),
         "ant problems":np.array([0,0,0,1,0,0]),
         "hive being robbed":np.array([0,0,0,0,1,0]),
         "missing queen":np.array([0,0,0,0,0,1])}
y = [y_keys[i] for i in df.health]

In [None]:
#helper function
#input as 1 type of target only, return some random indices for image showing
def random_imgs(df,num_images,X_pics):
    index_lst = df["file"].sample(n=num_images,random_state=1).index
    image_lst = []
    for i in index_lst:
        image_lst.append(X_pics[i])
    return image_lst

In [None]:
healthy = random_imgs(df[df["health"]=="healthy"],4,X_pics)
hive_beetles = random_imgs(df[df["health"] == "few varrao, hive beetles"],4,X_pics)
ant_probs = random_imgs(df[df["health"] == "ant problems"],4,X_pics)
hive_robbed = random_imgs(df[df["health"] == "hive being robbed"],4,X_pics)
varroa = random_imgs(df[df["health"] == "Varroa, Small Hive Beetles"],4,X_pics)

In [None]:

#only plot 2x2 images. Helper function. One can always generalize the function if neccessary
def plot_bees(img_lst,title):
    fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(8,8))
    ax[0].imshow(img_lst[0])
    ax[0].set_title(title)
    ax[1].imshow(img_lst[1])
    #ax[1].set_title(title)
    ax[2].imshow(img_lst[2])
    #ax[2].set_title(title)
    ax[3].imshow(img_lst[3])
    #ax[3].set_title(title)
    
    plt.show()
    
#plot_bees(healthy,"healthy")

In [None]:
plot_bees(healthy,"healthy")
plot_bees(hive_beetles,"few varrao, hive beetles")
plot_bees(ant_probs,"ant problems")
plot_bees(hive_robbed,"hive being robbed")
plot_bees(varroa,"Varroa, Small Hive Beetles")

### Convolution Network

In [None]:
#Keras CNN
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Activation
from keras.layers import BatchNormalization
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras import callbacks


from sklearn.model_selection import train_test_split
history = callbacks.History() #need to be defined first

In [None]:
#LeNet's conv->pool->conv patterns
def train_cnn():
    #to combat overfitting, better optimization for CNN, we'll be using Batch normalization PRIOR to activation.
    #There has been a debate on where to use it, but the consensus has been to use it prior/after non-linearity (activation)
    model = Sequential()

    #3x3 matrix with 11 feature maps in total, conventional. 3d array for colored img, RGB. 255 in term of intensity max/min
    model.add(Convolution2D(11,3,3, input_shape=(64,64,3)))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2),padding="SAME"))
    

    model.add(Convolution2D(21,3,3, activation="relu"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2),padding="SAME"))

    #third convo layer with more feature filter size, 41 for better detection.
    model.add(Convolution2D(41,3,3, activation="relu"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2),padding="SAME"))

    #Flattening to input the fully connected layers
    model.add(Flatten())

    #dense layer section with after flattening
    #hidden layer, 200
    model.add(Dense(200, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(6, activation="softmax"))
    
    #smaller learning rate to optimize better. Default has periodic dips
    model.compile(optimizer=optimizers.rmsprop(lr=0.0001), loss="categorical_crossentropy",metrics=["accuracy"])

    return model


In [None]:
#splitting into train,test, val datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y,random_state=1)


In [None]:
#uncomment for training
model1 = train_cnn()
history1 = model1.fit(np.array(X_train),np.array(y_train),validation_data=(np.array(X_val),np.array(y_val)),
                      verbose=True,shuffle=True,epochs=50)

In [None]:
def model_plot(history,epochs,title,y_range=[0.5,1.0],save=0 ):
    train_losses = history.history["loss"]
    val_losses = history.history["val_loss"]
    plt.plot([i for i in range(0,epochs)],train_losses,val_losses)
    plt.legend(["Train Loss","Val Loss"])
    plt.title(title)
    
    if save == 1:
        plt.savefig(title+"_Losses.jpg")
    plt.show()
    
    
    train_losses = history.history["acc"]
    val_losses = history.history["val_acc"]
    plt.plot([i for i in range(0,epochs)],train_losses,val_losses)
    plt.legend(["Train_acc","Val_acc"])
    plt.title(title)
    plt.ylim(y_range)
    
    if save == 1:
        plt.savefig(title+"_Accuracy.jpg")
    plt.show()

In [None]:
model_plot(history1,epochs=len(history1.epoch),title="baseline_cnn")