## Beyond Bags of Features: Spatial Pyramid Matching for Recognizing Natural Scene Categories

In [0]:

!ls -lha kaggle.json

!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6

-rw-r--r-- 1 root root 64 Dec  5 15:11 kaggle.json
Uninstalling kaggle-1.5.6:
  Successfully uninstalled kaggle-1.5.6
Requirement already up-to-date: pip in /usr/local/lib/python3.6/dist-packages (19.3.1)
Collecting kaggle==1.5.6
[?25l  Downloading https://files.pythonhosted.org/packages/62/ab/bb20f9b9e24f9a6250f95a432f8d9a7d745f8d24039d7a5a6eaadb7783ba/kaggle-1.5.6.tar.gz (58kB)
[K     |████████████████████████████████| 61kB 1.9MB/s 
Building wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.6-cp36-none-any.whl size=72859 sha256=23c62fb8b455d6f15a715b075ab68a9a4c90c0ec515a5593c0ce096f76b41217
  Stored in directory: /root/.cache/pip/wheels/57/4e/e8/bb28d035162fb8f17f8ca5d42c3230e284c6aa565b42b72674
Successfully built kaggle
Installing collected packages: kaggle
Successfully installed kaggle-1.5.6


In [0]:
# 캐글연동을 위한 토큰 입력
! mkdir -p ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json


# 버전이 1.5.6 이 아니면, 진행할 수 없다
! kaggle -v

Kaggle API 1.5.6


In [0]:
! kaggle competitions download -c 2019-ml-finalproject
! unzip 2019-ml-finalproject.zip

In [0]:
! yes | pip3 uninstall opencv-python
! yes | pip3 uninstall opencv-contrib-python
! yes | pip3 install opencv-python==3.4.2.16
! yes | pip3 install opencv-contrib-python==3.4.2.16
! yes | pip3 install kmc2

Uninstalling opencv-python-4.1.2.30:
  Would remove:
    /usr/local/lib/python3.6/dist-packages/cv2/*
    /usr/local/lib/python3.6/dist-packages/opencv_python-4.1.2.30.dist-info/*
Proceed (y/n)?   Successfully uninstalled opencv-python-4.1.2.30
Uninstalling opencv-contrib-python-4.1.2.30:
  Would remove:
    /usr/local/lib/python3.6/dist-packages/opencv_contrib_python-4.1.2.30.dist-info/*
Proceed (y/n)?   Successfully uninstalled opencv-contrib-python-4.1.2.30
Collecting opencv-python==3.4.2.16
[?25l  Downloading https://files.pythonhosted.org/packages/fa/7d/5042b668a8ed41d2a80b8c172f5efcd572e3c046c75ae029407e19b7fc68/opencv_python-3.4.2.16-cp36-cp36m-manylinux1_x86_64.whl (25.0MB)
[K     |████████████████████████████████| 25.0MB 4.3MB/s 
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: opencv-python
Successfully installed opencv-python-3.4.2.16
Collecting opencv-contrib-pyth

## 라이브러리

In [0]:
import cv2
import os
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.metrics import classification_report
import scipy.cluster.vq as vq
import pandas as pd
import kmc2
from sklearn.cluster import MiniBatchKMeans


## 데이터 로드

In [0]:
df_data=pd.read_csv('./Label2Names.csv',header=None)

DATA_ROOT_TRAIN="./train"
train_des=list()
train_labels=list()

for cls in tqdm(os.listdir(DATA_ROOT_TRAIN)):
  img_list=os.listdir(DATA_ROOT_TRAIN+'/'+cls)
  img_list.sort()
  

  if cls=='BACKGROUND_Google':
    label=102
  else:
    label=(df_data.index[df_data[1]==cls]+1).tolist()[0]

  for img in img_list:
    image=cv2.imread(DATA_ROOT_TRAIN+'/'+cls+'/'+img)
    image=cv2.resize(image,(256,256))
    train_des.append(image)
    train_labels.append(label)

100%|██████████| 102/102 [00:04<00:00, 23.62it/s]


In [0]:
train_des=np.array(train_des)
train_labels=np.array(train_labels)

In [0]:
DATA_ROOT_TEST="./testAll_v2"
test_des=list()
img_list=os.listdir(DATA_ROOT_TEST)
img_list.sort()

for img in tqdm(img_list):
  image=cv2.imread(DATA_ROOT_TEST+'/'+img)
  image=cv2.resize(image,(256,256))
  test_des.append(image)

100%|██████████| 1692/1692 [00:02<00:00, 780.59it/s]


In [0]:
DSIFT_STEP_SIZE=8
def extract_DenseSift_descriptors(img):
  sift=cv2.xfeatures2d.SIFT_create()
  dsift_step_size=DSIFT_STEP_SIZE
  keypoints=[cv2.KeyPoint(x,y,dsift_step_size)
    for y in range(0,img.shape[0],dsift_step_size)
      for x in range(0,img.shape[1],dsift_step_size)]
  keypoints, descriptors=sift.compute(img,keypoints)
  return descriptors

In [0]:
def input_vector_encoder(feature,codebook):
  code,_=vq.vq(feature,codebook)
  word_hist,bin_edges=np.histogram(code,bins=range(codebook.shape[0]+1),normed=True)
  return word_hist

In [0]:

def build_spatial_pyramid(image,descriptor,level):
  step_size=DSIFT_STEP_SIZE
  h=image.shape[0]//step_size
  w=image.shape[1]//step_size
  idx_crop=np.array(range(len(descriptor))).reshape(h,w)
  size=idx_crop.itemsize
  height,width=idx_crop.shape
  bh,bw=2**(3-level),2**(3-level)
  shape=(height//bh,width//bw,bh,bw)
  strides=size*np.array([width*bh,bw,width,1])

  crops=np.lib.stride_tricks.as_strided(
      idx_crop,shape=shape,strides=strides
  )
  des_idxs=[col_block.flatten().tolist() for row_block in crops
            for col_block in row_block]
  pyramid=[]
  for idxs in des_idxs:
    pyramid.append(np.asarray([descriptor[idx] for idx in idxs]))
  return pyramid

In [0]:
def spatial_pyramid_matching(image,descriptor,codebook,level):
  pyramid=[]
  if level==0:
    pyramid+=build_spatial_pyramid(image,descriptor,level=0)
    code=[input_vector_encoder(crop,codebook) for crop in pyramid]
    return np.asarray(code).flatten()
  if level==1:
    pyramid+=build_spatial_pyramid(image,descriptor,level=0)
    pyramid+=build_spatial_pyramid(image,descriptor,level=1)
    code=[input_vector_encoder(crop,codebook) for crop in pyramid]
    code_level_0=0.5*np.asarray(code[0]).flatten()
    code_level_1=0.5*np.asarray(code[1:]).flatten()
    return np.concatenate((code_level_0,code_level_1))
  if level==2:
    pyramid+=build_spatial_pyramid(image,descriptor,level=0)
    pyramid+=build_spatial_pyramid(image,descriptor,level=1)
    pyramid+=build_spatial_pyramid(image,descriptor,level=2)
    code=[input_vector_encoder(crop,codebook) for crop in pyramid]
    code_level_0=0.25*np.asarray(code[0]).flatten()
    code_level_1=0.25*np.asarray(code[1:5]).flatten()
    code_level_2=0.5*np.asarray(code[5:]).flatten()
    return np.concatenate((code_level_0,code_level_1,code_level_2))

In [0]:
def histogramIntersection(M,N):
  m=M.shape[0]
  n=N.shape[0]

  result=np.zeros((m,n))
  for i in range(m):
    for j in range(n):
      temp=np.sum(np.minimum(M[i],N[j]))
      result[i][j]=temp
  return result

## 1. Dense sift기술자 추출

In [0]:
from time import time

t0=time()

xtrain=[]
for img in train_des:
  x=extract_DenseSift_descriptors(img)
  xtrain.append(x)

x_train_dex=np.vstack((descriptor for descriptor in xtrain))

xtest=[]
for img in test_des:
  x=extract_DenseSift_descriptors(img)
  xtest.append(x)

x_test_dex=np.vstack((descriptor for descriptor in xtest))

print(time()-t0)

  # Remove the CWD from sys.path while we load stuff.


456.36689496040344


## 2. Kmeans군집화로 코드북 생성

In [0]:
t0=time()

codebooksize=400
seeding=kmc2.kmc2(np.array(x_train_dex).reshape(-1,128),codebooksize)
Kmeans=MiniBatchKMeans(codebooksize,init=seeding).fit(np.array(x_train_dex).reshape(-1,128))
codebook=Kmeans.cluster_centers_

print(time()-t0)

  """
  init_size=init_size)


808.218183517456


## 3. 기술자와 코드북을 이용해서 이미지당 피라미드(level=1) 히스토그램 생성

In [0]:
t0=time()

x_train=[spatial_pyramid_matching(train_des[i],xtrain[i],codebook,level=1) for i in range(len(train_des))]
x_test=[spatial_pyramid_matching(test_des[i],xtest[i],codebook, level=1) for i in range(len(test_des))] 

print(time()-t0)


  This is separate from the ipykernel package so we can avoid doing imports until


308.98332715034485


In [0]:
x_train=np.asarray(x_train)
x_test=np.asarray(x_test)

## 4. 학습데이터에 대해 histogramIntersection 커널을 가진 SVM분류기로 학습

In [0]:
t0=time()

from sklearn.svm import SVC
gramMatrix=histogramIntersection(x_train,x_train)

C_range=10.0**np.arange(-3,3)
gamma_range=10.0**np.arange(-3,3)
param_grid=dict(gamma=gamma_range.tolist(),C=C_range.tolist())

clf=GridSearchCV(SVC(kernel='precomputed'),param_grid,cv=5,n_jobs=-2)
clf.fit(gramMatrix,train_labels)

time()-t0

936.9802219867706

## 테스트 데이터에 대한 라벨 값 예측

In [0]:
predictMatrix=histogramIntersection(x_test,x_train)
label=clf.predict(predictMatrix)


## 제출형식

In [0]:
result=np.array(label).reshape(-1,1)
img_list=np.array(img_list).reshape(-1,1)

In [0]:
total_result=np.hstack([img_list,result])

In [0]:
df=pd.DataFrame(total_result,columns=["Id","Category"])
df.to_csv('results-hrkim-v3.csv',index=False,header=True)

In [0]:
pd.read_csv('results-hrkim-v3.csv')

Unnamed: 0,Id,Category
0,image_0001.jpg,45
1,image_0002.jpg,89
2,image_0003.jpg,72
3,image_0004.jpg,34
4,image_0005.jpg,14
...,...,...
1687,image_1688.jpg,17
1688,image_1689.jpg,61
1689,image_1690.jpg,29
1690,image_1691.jpg,38


In [0]:
! kaggle competitions submit -c 2019-ml-finalproject -f results-hrkim-v3.csv -m "Final_Term_Project"


100% 29.6k/29.6k [00:05<00:00, 5.12kB/s]
Successfully submitted to 2019.Fall.PatternRecognition 

+ 성능: 0.55673~0.56855
