In [None]:
import os

from skimage import io

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.

In [None]:
PLANET_KAGGLE_ROOT = os.path.abspath("../input/")
PLANET_KAGGLE_TIF_DIR = os.path.join(PLANET_KAGGLE_ROOT, 'train-tif-v2')
PLANET_KAGGLE_TEST_TIF_DIR = os.path.join(PLANET_KAGGLE_ROOT, 'test-tif-v2')
PLANET_KAGGLE_LABEL_CSV = os.path.join(PLANET_KAGGLE_ROOT, 'train_v2.csv')
assert os.path.exists(PLANET_KAGGLE_ROOT)
assert os.path.exists(PLANET_KAGGLE_TIF_DIR)
assert os.path.exists(PLANET_KAGGLE_TEST_TIF_DIR)
assert os.path.exists(PLANET_KAGGLE_LABEL_CSV)

In [None]:
labels_df = pd.read_csv(PLANET_KAGGLE_LABEL_CSV)
labels_df.head()

In [None]:
tagSet = set()

for tagStr in labels_df['tags']:
    tagSet.update(tagStr.split())
    
for tag in tagSet:
    labels_df[tag] = labels_df['tags'].apply(lambda x: 1 if tag in x.split() else 0)
    
labels_df.head()
    

In [None]:
cloud_df = labels_df[['image_name', 'clear', 'partly_cloudy', 'cloudy', 'haze']]
cloud_df.describe()

In [None]:
cloud_df_samp = cloud_df.sample(n=2000)
cloud_df_samp.describe()

In [None]:
def loadImage(imageName, suffix='tif', dirPath=PLANET_KAGGLE_TIF_DIR):
    
    fileName = '.'.join((imageName, suffix))
    path = os.path.abspath(os.path.join(dirPath, fileName))
    if os.path.exists(path):
        #print('Found image {}'.format(path))
        return io.imread(path)
    
    print('Load failed: could not find image {}'.format(path))
    
imageName = cloud_df.iloc[9870,:]['image_name']
sampleImage = loadImage(imageName)
labels_df.loc[labels_df['image_name'] == imageName]['tags']

In [None]:
sampleImage.shape

In [None]:
def image2grey_avg(image):
    #image is exppected to be a numpy 3d array where the first two dimensions
    #are the raster of pixels and the third dimension is the rgb values.
    #This method averages the rgb values to produce 2d matrix of greyscale values.
    
    assert(image.shape[2] == 3)
    return np.apply_along_axis(np.mean, axis=2, arr=image)
    
avg_grey = image2grey_avg(sampleImage[:,:,:3])
plt.imshow(avg_grey, cmap='Greys')

In [None]:
def dot_lum(rgb):
    return np.dot(rgb, (.21, .72, .07))

def image2grey_lum(image):
    #image is exppected to be a numpy 3d array where the first two dimensions
    #are the raster of pixels and the third dimension is the rgb values.
    #This method averages the rgb values to produce 2d matrix of greyscale values.
    
    assert(image.shape[2] == 3)
    return np.apply_along_axis(dot_lum, axis=2, arr=image)
    
lum_grey = image2grey_lum(sampleImage[:,:,:3])
plt.imshow(lum_grey, cmap='Greys')

In [None]:
def compress_image(width, height, image):
    #image is presumed to be a 2D array of values
    retMat = np.empty((width, height))
    x_slice = int(np.ceil(image.shape[0]/width))
    y_slice = int(np.ceil(image.shape[1]/height))
    for x in range(width):
        for y in range(height):
            s = (x*x_slice,(x+1)*x_slice,y*y_slice,(y+1)*y_slice)
            slc = image[s[0]:s[1],s[2]:s[3]]
            retMat[x][y] = np.mean(slc)
            
    return retMat

compressed_image = compress_image(16, 16, sampleImage[:,:,:3])
compressed_nir = compress_image(16, 16, sampleImage[:,:,3])
compressed_image_avg_grey = compress_image(16, 16, avg_grey)
compressed_image_lum_grey = compress_image(16, 16, lum_grey)

plt.figure()
plt.imshow(compressed_image, cmap='Greys')

plt.figure()
plt.imshow(compressed_nir, cmap='Greys')

plt.figure()
plt.imshow(compressed_image_avg_grey, cmap='Greys')

plt.figure()
plt.imshow(compressed_image_lum_grey, cmap='Greys')

In [None]:
v = compressed_image.reshape((-1,1))
v.shape

In [None]:
cloud_df_samp.head()

In [None]:
def feat_ext_nir(x):
    print(x.shape)
    img = loadImage(x)
    compressed_img = compress_image(16, 16, img[:,:,3])
    return compressed_image.ravel()

cloud_nir_names = cloud_df_samp['image_name'].values
print(cloud_nir_names.shape)
cloud_feat_nir = np.apply_along_axis(feat_ext_nir, 1, cloud_nir_names)
type(cloud_feat_nir)
#cloud_feat_nir = [feat_ext_nir(x) for x in cloud_df_samp['image_name']]
#cloud_feat_nir[:5]

In [None]:
cloud_feat_nir.head()

In [None]:
cloud_train_nir = pd.concat([cloud_df_samp, cloud_feat_nir], axis=1, join_axes=[cloud_df_samp.index])
cloud_train_nir.describe()

In [None]:
cloud_train_nir.head()