### 초기설정

In [2]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=4d47fc7fc797f440cbbf7a66eba863a6dc066858a1587a36fbef6f9647a0518d
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [1]:
from efficientnet_pytorch import EfficientNet
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# from keras.applications.vgg16 import VGG16 
# from keras.models import Model
# from keras.applications.vgg16 import preprocess_input 
import os
import logging
import torch
import pickle
from tqdm import tqdm
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
#dataset path
data_path = '/content/drive/MyDrive/Colab/Sketch_RNN_Together/Dataset/test'
#feature path
feat_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/features.npy"
#filename path
filename_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/filenames.npy"
#cluster_list
cluster_list_path = '/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/cluster_list.txt'
cluster_list = open(cluster_list_path , 'r').read().split('\n')

# p = r"/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/sktch_log.pkl"

In [None]:
#make data_list : .npy files
os.chdir(data_path)
data_list = []
with os.scandir(data_path) as files:
    for file in files:
        if file.name.endswith('.npy'):
            data_list.append(file.name) 

### Feature Extraction

In [None]:
#transform
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

def feature_extraction(image, model):
    img = tfms(Image.fromarray(image.reshape(28,28)).convert("RGB")).unsqueeze(0)
    features = model.extract_features(img)
    return features

In [None]:
#efficientNet
#feature extraction
model = EfficientNet.from_pretrained('efficientnet-b0')
data = {}
cluster_list = []
for npy in tqdm(data_list[-10:]):
    cluster_list.append(npy.split('.')[0])
    image_list = np.load(npy) 
    for i, image in enumerate(image_list[:2000]):
      # try:
        feat = feature_extraction(image, model)
        feat = feat.detach().numpy().reshape(-1)
        name = npy.split('.')[0] + '_' + str(i)
        data[name] = (feat)
        if(i % 200 == 0):
          np.save(feat_path, np.array(list(data.values())))
          np.save(filename_path, np.array(list(data.keys())))
      # except:
      #   with open(p,'wb') as file:
      #     pickle.dump(data,file)

np.save(feat_path, np.array(list(data.values())))
np.save(filename_path, np.array(list(data.keys())))

  0%|          | 0/10 [00:00<?, ?it/s]

Loaded pretrained weights for efficientnet-b0


100%|██████████| 10/10 [1:24:34<00:00, 507.47s/it]


### Dimension Reduction

In [None]:
#PCA
# feat = np.load(feat_path)
# from sklearn.decomposition import PCA
# pca = PCA(n_components=2, random_state=0)
# pca.fit(feat)
# x = pca.transform(feat)

In [None]:
#TSNE
feat = np.load(feat_path)
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, init='pca', random_state=0)
x = tsne.fit_transform(feat)

### Clustering

In [None]:
def clustering_method(cmd, cluster_num, input):
  if cmd == 'kmeans':
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=cluster_num, n_jobs=-1, random_state = 0)
    kmeans.fit(input)
    return kmeans.labels_

  elif cmd == 'minibatch':
    from sklearn.cluster import MiniBatchKMeans
    mb_kmeans = MiniBatchKMeans(n_clusters=cluster_num,random_state=0, batch_size=6)
    mb_kmeans.fit(input)
    return mb_kmeans.labels_

  elif cmd == 'dbscan':
    from sklearn.cluster import DBSCAN
    dbscan = DBSCAN(eps=1.8, min_samples=2)
    dbscan.fit(input)
    return dbscan.labels_
  elif cmd == 'birch':
    from sklearn.cluster import Birch
    brc = Birch(n_clusters=cluster_num)
    brc.fit(input)
    return brc.labels_

In [None]:
labels = clustering_method('birch', len(cluster_list), x)
import plotly.express as px
from plotly.offline import plot
fig = px.scatter(x, x=0, y=1, color=labels)
plot(fig)

### Testing

In [None]:
filenames = np.load(filename_path)
groups = {}
# holds the cluster id and the images { id: [images] }
for f, cluster in zip(filenames, labels):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(f)
    else:
        groups[cluster].append(f)

#make cluster_dict for calculate acc
cluster_dict = {}
for cluster in groups:
  image_count = []
  image_name = []
  for image in groups[cluster]:
    image_name.append(image.split('_')[0])
  for name in cluster_list:
    image_count.append(image_name.count(name))

  cluster_dict[cluster] = cluster_list[image_count.index(max(image_count))]

#Acc
from sklearn.metrics import f1_score
pred = []
gt = []    
for cluster in groups:
    for food in groups[cluster]:
        pred.append(cluster_dict[cluster])
        gt.append(food.split('_')[0].split('-')[0])

print("F1 ACC: " + str(f1_score(gt, pred,average='micro') * 100))