In [1]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display
#%matplotlib inline

import pandas as pd
import numpy as np

from PIL import Image

from skimage.feature import hog
from skimage.color import rgb2gray

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC

from sklearn.metrics import roc_curve, auc

In [13]:
def get_image(row_id, root="./images/"):
    """
    Converts an image number into the file path where the image is located,
    opens the image, and returns the image as a numpy array.
    """
    filename = "{}.jpg".format(row_id)
    file_path = os.path.join(root, filename)
    img = Image.open(file_path)
    return np.array(img)


def create_features(img):
    # flatten three channel color image
    color_features = img.flatten()
    # convert image to greyscale
    grey_image = rgb2gray(img)
    # get HOG features from greyscale image
    hog_features = hog(grey_image, block_norm='L2-Hys', pixels_per_cell=(16, 16))
    # combine color and hog features into a single array
    flat_features = np.hstack(color_features)
    return flat_features


def create_feature_matrix(label_dataframe):
    features_list = []

    for img_id in label_dataframe.index:
        # load image
        img = get_image(img_id)
        # get features for image
        image_features = create_features(img)
        print(image_features)
        features_list.append(image_features)

    # convert list of arrays into a matrix
    feature_matrix = np.array(features_list)
    return feature_matrix

In [11]:
labels = pd.read_csv("./stringing-genus.csv", index_col=0)
print(labels)

        genus
index        
1           1
1-0         1
1-2         1
1-3         1
1-4         1
...       ...
159-19      1
164         1
192-0       1
192-13      1
192-19      1

[187 rows x 1 columns]


In [14]:
# run create_feature_matrix on our dataframe of images
feature_matrix = create_feature_matrix(labels)

# get shape of feature matrix
print('Feature matrix shape is: ', feature_matrix.shape)

[191 195 230 ...  10   8  21]
[  7  77 146 ...  47  53  39]
[32 24  5 ... 61 46 25]
[236 240 251 ... 103 109 109]
[ 9 12  8 ... 21 17 11]
[137 105  90 ...  85  62  54]
[192 188 185 ... 154 146 125]
[236 228 215 ... 233 234 229]
[68 51 23 ... 63 55 34]
[ 19  22  31 ... 182 164 160]
[255 255 255 ... 206 206 206]
[138 131 125 ... 136 132 131]
[68 51 23 ... 61 53 32]
[148 148 148 ... 189 187 188]
[172 104  39 ... 205 135  83]
[222 230 232 ... 193 187 187]
[70 73 78 ... 85 75 74]
[255 255 255 ... 179 181 176]
[161 132  98 ... 252 253 239]
[154 153 161 ... 119 122 113]
[102  99  92 ...  83  78  72]
[  6  81 148 ...   0   5   0]
[255 255 255 ... 255 255 255]
[177 127  58 ...  25  55 177]
[192 147  79 ... 185 185 187]
[255 255 255 ... 255 255 255]
[249 249 249 ... 248 248 248]
[255 255 255 ... 255 255 255]
[227 228 223 ... 234 235 230]
[190 190 190 ... 192 192 190]
[255 255 255 ... 255 255 255]
[229 230 225 ... 231 232 227]
[64 55 50 ... 13 14  6]
[136 128 125 ... 135 131 130]
[131 117 108 ...

  grey_image = rgb2gray(img)


[145 128 121 ...  61  59 255]
[255 255 255 ... 255 255 255]
[  6   6   6 ... 142  96  21]
[ 14  16  15 ... 170 174 175]
[52 38 53 ... 41 33 44]
[170 169 164 ... 127 122 128]
[80  2 15 ... 86 66 57]
[212 193 199 ... 147 117  89]
[255 255 255 ... 255 255 255]
[ 96  95  41 ... 231 221 209]
[174 170 159 ... 220 212 199]
[ 99 100  43 ... 214 192 169]
[239 239 241 ... 235 235 235]
[201 149   1 ...  26  50 156]
[34 26  7 ... 77 60 40]
[181 201 236 ... 194 172 115]
[133 109 107 ...  86  85  90]
[48 29 25 ...  9 27 77]
[ 59  53  65 ... 200 167   4]
[191 192 210 ...   6  35  79]
[218 218 216 ... 200 197 190]
[ 10  10  10 ...  27  64 106]
[ 13  17  20 ... 208 198 188]
[206 195 167 ...  31  21  19]
[132 129 136 ... 130 135 129]
[255 255 255 ... 159 124  84]
[176 141 113 ... 156 127 109]
[34 34 34 ... 96 63 48]
[175 165 156 ... 188 178 168]
[255 255 255 ... 255 255 255]
[ 94  97  90 ... 197 207 196]
[ 97  84  68 ... 174 176 171]
[0 0 0 ... 0 0 0]
[181 183 182 ... 137 132 128]
[215 103  91 ... 161  

  grey_image = rgb2gray(img)


[187   9 187 ...   9   9   9]
[ 57 101 138 ...  98  88  79]
[255 255 255 ... 255 255 255]
[0 0 0 ... 0 0 0]
[209 213 216 ... 216 213 206]
[141  99  59 ...  45  37  34]
[255 255 255 ... 255 255 255]
[126 124 111 ... 171 171 159]
[221 221 221 ... 221 217 218]
[80 61 80 ... 16 11  5]
[122  55  39 ...  23  16  24]
[19 10  5 ... 50 39 19]
[192 160 111 ... 119  90  58]
[230 231 235 ... 193 187 187]
[196 194 199 ...  76  73  66]
[100 144 179 ... 128 109 102]
[ 28  26  27 ... 143 124 107]
[ 3 10  3 ... 92 56 30]
[  0   0   0 ... 196 206  78]
[199 186 167 ... 231 223 202]
[200 188 176 ... 161 170 255]
[ 15  15  17 ...  49  40 255]
[194 198 184 ... 218 228 220]
[218 211 167 ... 122 110 110]
[ 53  69  68 ... 222 220 205]
[234 230 222 ... 212 208 255]
[56 47 42 ... 47 41 41]
[133 119 108 ... 100  98  77]
[255 253 246 ...  15  13  16]
[ 23  23  33 ... 179 169 160]
[255 255 255 ... 255 255 255]
[225 217 214 ... 226 219 211]
Feature matrix shape is:  (187,)


  feature_matrix = np.array(features_list)


In [15]:
# define standard scaler
ss = StandardScaler()
# run this on our feature matrix
bees_stand = ss.fit_transform(feature_matrix)

pca = PCA(n_components=500)
# use fit_transform to run PCA on our standardized matrix
bees_pca = ss.fit_transform(bees_stand)
# look at new shape
print('PCA matrix shape is: ', bees_pca.shape)

ValueError: setting an array element with a sequence.