# General Image Classification

Given a Dataframe with pixel values of images. Each image takes up 144 rows by 120 columns of the DataFrame.
The columns are [index, target, img_id, px0,..., px119]

In [None]:
# List of pixel Columns for the DataFrame: 120 pixels wide
imgCols = ['px0', 'px1', 'px2', 'px3', 'px4', 'px5', 'px6', 'px7', 
               'px8', 'px9', 'px10', 'px11', 'px12', 'px13', 'px14', 'px15', 
               'px16', 'px17', 'px18', 'px19', 'px20', 'px21', 'px22', 'px23',
               'px24', 'px25', 'px26', 'px27', 'px28', 'px29', 'px30', 'px31', 
               'px32', 'px33', 'px34', 'px35', 'px36', 'px37', 'px38', 'px39', 
               'px40', 'px41', 'px42', 'px43', 'px44', 'px45', 'px46', 'px47', 
               'px48', 'px49', 'px50', 'px51', 'px52', 'px53', 'px54', 'px55', 
               'px56', 'px57', 'px58', 'px59', 'px60', 'px61', 'px62', 'px63',
               'px64', 'px65', 'px66', 'px67', 'px68', 'px69', 'px70', 'px71',
               'px72', 'px73', 'px74', 'px75', 'px76', 'px77', 'px78', 'px79',
               'px80', 'px81', 'px82', 'px83', 'px84', 'px85', 'px86', 'px87',
               'px88', 'px89', 'px90', 'px91', 'px92', 'px93', 'px94', 'px95',
               'px96', 'px97', 'px98', 'px99', 'px100', 'px101', 'px102', 'px103',
               'px104', 'px105', 'px106', 'px107', 'px108', 'px109', 'px110', 'px111',
               'px112', 'px113', 'px114', 'px115', 'px116', 'px117', 'px118', 'px119']

In [None]:
# img: numpy.array tuple from matplotlib.image.imread()
def Convert_Image_DF(img):
    aHigh = []
    aWidth = []
    
    for High in img:
        aWidth = []         
        
        for Width in High:
            RGB = "|".join(map(str, Width))        
            aWidth.append(RGB)

        aHigh.append(aWidth)
        
    return pd.DataFrame(data=aHigh, columns=imgCols)

def Convert_DF_Image(df):
    arr_row = []

    for index, row in df.iterrows():
        arr_col = []

        for col in row:
            arr_split = col.split("|")
            
            # In this particular case, the pixel tuple that has
            # four values is already between the values of
            # 0 and 1. No need to divide by 255.
            if len(arr_split) == 4:
                row = [np.float32(item) for item in arr_split]
                # Remove the transparency value in order to match
                # with the 3 value tuple
                row.pop()
                arr_col.append(row)
                
            else:
                row = [int(item)/255 for item in arr_split]
                arr_col.append(row)
            
        arr_row.append(arr_col)

    return arr_row

In [None]:
# Load the training frame
train_data = pd.read_csv(training_path, index_col = "index")

# Unique Image Ids and Target Values
df = train_data[["target", "img_id"]].drop_duplicates()

# Separate target from predictors
y=[train_data.loc[train_data.img_id == i].target.mean() for i in train_data.img_id.unique()]
y = to_categorical(y)

train_image = []

for i in train_data.img_id.unique():
    IMAGE_ID = i

    df = train_data.loc[train_data.img_id == IMAGE_ID]

    # Only care about the encoded image data portion of the dataframe
    df = df[imgCols]

    img = Convert_DF_Image(df)
    train_image.append(img)

X = np.array(train_image)

# Divide data into training and validation subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

# Add Layers
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(144, 120, 3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
# Last Dense must match up with the amount of unique targets you have
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

# Preprocessing of training data, fit model 
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
# Load the testing data
test_data = pd.read_csv(testing_path, index_col = "index")

test_image = []

for i in test_data.img_id.unique():
    IMAGE_ID = i

    df = test_data.loc[test_data.img_id == IMAGE_ID]

    # Only care about the encoded image data portion of the dataframe
    df = df[imgCols]

    img = Convert_DF_Image(df)
    test_image.append(img)

final_test_X = np.array(test_image)

# Create predictions from testing data and write them to predictions_path
predictions = model.predict_classes(final_test_X)

# Disperse the predictions along the original Dataframe
for num,i in enumerate(test_data.img_id.unique()):
    test_data.loc[test_data.img_id == i, "target"] = predictions[num]
    print("Num: ", predictions[num])
    print(test_data.loc[test_data.img_id == i, "target"])

predict_frame = pd.DataFrame(test_data.target, columns=["target"], index = test_data.index)
predict_frame.to_csv(predictions_path)