In [1]:
#import torch
from torch.utils.data import Dataset, DataLoader
#import cv2

import pandas as pd
import numpy as np
import os
import keras.preprocessing.image as kimage
from keras.models import load_model
#import pickle

#from common import *
#from torch.autograd import Variable
#import torch.nn as nn
#import torch.nn.functional as F
#from excited_inception_v3 import SEInception3
#from inception_v3 import Inception3
#from xception import Xception
from tqdm import tqdm_notebook

Using TensorFlow backend.


In [2]:
FILELIST_CSV ="FileList.csv"


CATEGORIES_CSV="categories.csv" 
category_ids = np.array(pd.read_csv(CATEGORIES_CSV, header=None).values).flatten()
category_ids = [str(item) for item in category_ids]

#CATEGORIES_PKL='class_order.pkl'
#category_ids = pickle.load(open(CATEGORIES_PKL, "rb"))

#INRESV2_CATEGORIES="categories_incepResv2.csv"
#category_ids = np.array(pd.read_csv(INRESV2_CATEGORIES, header=None).values).flatten()
#category_ids = [str(item) for item in category_ids]

INCLUDE_FLIPPED_IMAGES=True

In [3]:
#result_ids=np.array([], dtype=int)
#result_classes=np.array([], dtype=int)

buffer_pred=np.array([], dtype=np.float32) #buffer to keep temporary predictios
buffer_id=np.array([], dtype=np.float32) # buffers to keep temporary ids

#result_csv="Inceptionv3pytorch_result.csv"
result_predictions="ensemble.h5"

#result_csv = os.path.join("/home/cvpr/Desktop/Nabin/cdiscount/", result_csv)
result_predictions=os.path.join("/home/cvpr/Desktop/Nabin/cdiscount/ensemble", result_predictions)

In [4]:
import tables
hdf5_file = tables.open_file(result_predictions, mode='a')

In [5]:
predictions_mean = hdf5_file.create_earray(hdf5_file.root, 'keras_xcp_mean', 
                                           tables.Float32Atom(), shape=(0, 5270),
                                          expectedrows=1768182)
predictions_median = hdf5_file.create_earray(hdf5_file.root, 'keras_xcp_median', 
                                           tables.Float32Atom(), shape=(0, 5270),
                                          expectedrows=1768182)
predictions_max = hdf5_file.create_earray(hdf5_file.root, 'keras_xcp_max', 
                                           tables.Float32Atom(), shape=(0, 5270),
                                          expectedrows=1768182)

In [6]:
class CDiscountKeras(Dataset):
    
    def __init__(self, csv_file, img_size=(180, 180)):
        '''
        Parameters:
            csv_file: a csv file with all details about images
            img_size: a tuple (image rows, image columns)
        '''
        super(CDiscountKeras, self).__init__()
        self.csv_file  = pd.read_csv(csv_file)
        self.img_files = self.csv_file["file_name"].values
        self.ids = self.csv_file["id"].values
        self.img_nums=self.csv_file["img_num"].values
        self.img_size=img_size
        
        
    def __getitem__(self, index):
        filename = self.img_files[index]
        img_id = self.ids[index]
        img_num = self.img_nums[index]
        
        image = kimage.load_img(filename,target_size=self.img_size)
        image = kimage.img_to_array(image)
        image = self.transform1(image)
        
        return image, img_id, img_num
    
    
    def __len__(self):
        return len(self.img_files)
    
    
    def transform1(self,x, reverse_mode=False):
        '''
        converts values from [0,255] 
        to [0, 1] and vice versa
                Parameter:
            reverse_mode: boolean, default is False
                          False:[0, 255]-->[0, 1] 
                          True: [0, 1]-->[0, 255]
        '''
        if reverse_mode:
            return x * 255.
        else:
            return x/255.
    
    def transform2(self,x, reverse_mode=False):
        '''
        converts values from [0,255] 
        to [-1, 1] and vice versa
        
        Parameter:
            reverse_mode: boolean, default is False
                          False:[0, 255]-->[-1, 1] 
                          True: [-1, 1]-->[0, 255]
        '''
        if reverse_mode:
            return ((x / 2.0)+0.5)*255.0
        else:
            return ((x / 255.0)-0.5)*2.0    

In [7]:
cdiscount_test_data=CDiscountKeras(csv_file=FILELIST_CSV)
dataloader = DataLoader(cdiscount_test_data, batch_size=256, num_workers=16)
data=iter(dataloader)
total_batches = len(data)

In [8]:
model=load_model('xception_v2.hdf5')

In [9]:
def process_buffer(data=None, index=None):
    pred_df = pd.DataFrame(data=data,  index=index, columns= category_ids, dtype=np.float32)
    #merge rows with same index (product id)
    pred_df_mean = pred_df.groupby(pred_df.index).mean()
    pred_df_median = pred_df.groupby(pred_df.index).median()
    pred_df_max = pred_df.groupby(pred_df.index).max()
    
    predictions_mean.append(pred_df_mean.values)
    predictions_median.append(pred_df_median.values)
    predictions_max.append(pred_df_max.values)
    #hdf5_ids.append(pred_df.index.values.reshape(-1, 1))
    
    #find the column with maximum predicted value
    #pred_df["category_id"] = pred_df.idxmax(axis=1)

    #result_ids = pred_df.index.tolist()
    #result_classes = pred_df["category_id"].values
    #return result_ids, result_classes
    return

In [10]:
#total_batches=200
bar = tqdm_notebook(total=total_batches)
for batch_number in range(total_batches):
    
    current_batch=data.next()
    
    first_image_number=current_batch[2][0]
    
    ids=np.array(current_batch[1].numpy())
    if INCLUDE_FLIPPED_IMAGES:
        ids=np.concatenate((ids,ids), axis=0)
    
    X_data=np.array(current_batch[0].numpy())
    if INCLUDE_FLIPPED_IMAGES:
        X_data = np.concatenate((X_data, X_data[:,:,::-1,:]), axis=0)
    
    ## Predictions of model
    predictions  = model.predict(X_data, verbose=0)

    # if it is a first batch, simply put predictions in buffer
    if batch_number==0:
        buffer_pred=predictions
        buffer_id=ids

    else:
        #if first image number is zero
        # 1. first process the items of buffer
        if first_image_number == 0:
            process_buffer(data=buffer_pred,  index=buffer_id)
            #res_ids, res_classes=process_buffer(data=buffer_pred,  index=buffer_id)
            #result_ids = np.concatenate((result_ids, res_ids), axis=0)
            #result_classes = np.concatenate((result_classes, res_classes), axis=0)
            
            # 2. Then put only current predictions and ids in the buffers
            buffer_pred=predictions
            buffer_id=ids
        else:
            # if first image number is not zero simply append predictins and ids to buffer
            buffer_pred = np.concatenate((buffer_pred, predictions), axis=0)
            buffer_id = np.concatenate((buffer_id, ids), axis=0)
    
    if batch_number == total_batches-1:
        process_buffer(data=buffer_pred,  index=buffer_id)
        #res_ids, res_classes=process_buffer(data=buffer_pred,  index=buffer_id)
        #result_ids = np.concatenate((result_ids, res_ids), axis=0)
        #result_classes = np.concatenate((result_classes, res_classes), axis=0)
    
    del ids, predictions, X_data, current_batch
    bar.update()
    
hdf5_file.close()

A Jupyter Widget