### Imports

In [1]:
import os
import math
from six.moves import cPickle as cp

import numpy as np
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
sns.set()

In [2]:
import mixtureofconcave as subm
import determinantal as logsubm
import plottingtools

### Plottingtools

In [None]:
def plotall(direc, n):
    ncol = np.floor(np.sqrt(n)); nrow = n//ncol + 1
    
    plt.figure(figsize=(20,20))
    for i in range(n):
        plt.subplot(nrow, ncol, i+1)
        img = plt.imread(direc + "{}.jpg".format(i+1))
        plt.axis("off")
        plt.imshow(img)

In [None]:
def plotsumm(direc, n, S):
    k = len(S)
    
    plt.figure(figsize=(2*k,2))
    for i in range(k):
        plt.subplot(1, k, i+1)
        img = plt.imread(direc + "{}.jpg".format(S[i]+1))
        plt.axis("off")
        plt.imshow(img)
        

In [None]:
# plotall(data_dir+"set{}/".format(idx+1), 100)
# plotsumm(data_dir+"set{}/".format(idx+1), 100, np.random.choice(100, 10))

### Corel Images

Source directory to download from
https://archive.ics.uci.edu/ml/machine-learning-databases/CorelFeatures-mld/

(Note : these are very low-level features)

In [None]:
data_dir = os.getcwd() + "/corelpictures/"

In [None]:
# Note : linelist[62480] from LayoutHistogram is problematic. Contains 1424 lines fudged in 1.

feadict = {}
for featype in ["ColorHistogram", "ColorMoments", "LayoutHistogram", "CoocTexture"]:
    
    featfile = open(data_dir + featype + ".asc")
    linelist = featfile.readlines()
    featfile.close()
    
    print(len(linelist), len(linelist[0]))
    
    fd = np.array([linelist[i].split()[1:] for i in range(50000)]).astype(float)
    
    feadict[featype] = fd

### Stanford Dogs

In [3]:
data_dir = os.getcwd() + "/stanforddogs/"

In [4]:
from scipy import io

trdata = {}
io.loadmat(data_dir + "train_data.mat", mdict=trdata);
tedata = {}
io.loadmat(data_dir + "test_data.mat", mdict=tedata);

trainimgs = trdata["train_info"][0][0][0][:,0]
trainimgs = np.array([trainimgs[i].astype(str) for i in range(len(trainimgs))])[:,0]
testimgs = tedata["test_info"][0][0][0][:,0]
testimgs = np.array([testimgs[i].astype(str) for i in range(len(testimgs))])[:,0]
allimgs = np.append(trainimgs, testimgs)

traindata = trdata["train_fg_data"]
print("train data", traindata.shape)
testdata = tedata["test_fg_data"]
print("test data", testdata.shape)
alldata = np.vstack((traindata, testdata))

traincats = trdata["train_info"][0][0][2][:,0].astype(int)
testcats = tedata["test_info"][0][0][2][:,0].astype(int)
allcats = np.append(traincats, testcats)

print("all data:",
      allimgs.shape, "images with",
      alldata.shape, "features in",
      len(np.unique(allcats)), "classes"
     )

train data (12000, 5376)
test data (8580, 5376)
all data: (20580,) images with (20580, 5376) features in 120 classes


### Get learnt weights

### Summarize

In [None]:
[n_samples, m_features] = X.shape

In [None]:
#%% Grouping for collection5 (idx=4)
assignA = np.array([
                    0,0,0,0,0,0,0,0,
                    8,
                    1,1,1,1,1,1,1,1,1,1,1,1,1,
                    2,2,2,2,2,2,2,
                    1,1,1,1,1,1,1,1,1,1,1,
                    3,3,3,3,3,3,3,3,3,3,3,3,3,3,
                    4,4,
                    5,5,5,5,
                    4,4,4,4,4,4,4,4,4,
                    5,5,5,5,5,5,5,
                    6,6,6,6,6,
                    7,7,7,7,7,7,
                    8,8,8,8,8,8,8,
                    9,9,9,9,
                    10,10
                   ])

assignB = np.array([
                    0,0,0,0,0,0,0,0,
                    6,
                    1,1,1,1,1,1,1,1,1,1,1,1,1,
                    2,2,2,2,2,2,2,
                    1,1,1,1,1,1,1,1,1,1,1,
                    2,2,2,2,2,2,2,2,2,2,2,2,2,2,
                    3,3,
                    3,3,3,3,
                    3,3,3,3,3,3,3,3,3,
                    3,3,3,3,3,3,3,
                    4,4,4,4,4,
                    5,5,5,5,5,5,
                    6,6,6,6,6,6,6,
                    7,7,7,7,
                    7,7
                   ])

np.random.seed(0)
assignR = np.random.choice(7, 100)

In [None]:
#%% Quotas for collection5 (idx=4), assignA

p_groups = len(np.unique(assignA))
memcolors = np.array(["#11{:02X}dd".format(pp) for pp in np.arange(0, 256, 256//p_groups)])

Memvec = np.eye(p_groups)[assignA].astype(int)
print(np.sum(Memvec, axis=0))

quo = np.array([1,1,1,1,1,0,1,1,1,1,0])

k_budget = 15

In [None]:
#%% Quotas for collection5 (idx=4), assignB

p_groups = len(np.unique(assignB))
memcolors = np.array(["#11{:02X}dd".format(pp) for pp in np.arange(0, 256, 256//p_groups)])

Memvec = np.eye(p_groups)[assignB].astype(int)
print(np.sum(Memvec, axis=0))

quo = np.array([1,0,1,1,1,1,1,0])

k_budget = 10

In [None]:
#%% Quotas for collection5 (idx=4), assignR

p_groups = len(np.unique(assignR))
memcolors = np.array(["#11{:02X}dd".format(pp) for pp in np.arange(0, 256, 256//p_groups)])

Memvec = np.eye(p_groups)[assignR].astype(int)
print(np.sum(Memvec, axis=0))

quo = np.array([1,1,0,2,1,0,1])

k_budget = 15

In [None]:
S_s, objectives_s = subm.greedygains_submod(None, X, wlearnt, k_budget)
S_sq, objectives_sq = subm.greedyDMquota_submod(None, X, wlearnt, Memvec, quo, k_budget)
S_r = np.random.choice(n_samples, k_budget)
objective_sr = subm.submodgains(X, np.sum(X[S_r,:], axis=0), 0, None, wlearnt)

print("Objective without quota", objectives_s[-1])
plotsumm(data_dir+"set{}/".format(idx+1), n_samples, np.sort(S_s))
print("Objective with quota", objectives_sq[-1])
plotsumm(data_dir+"set{}/".format(idx+1), n_samples, np.sort(S_sq))
print("Objective of random selection", objective_sr)
plotsumm(data_dir+"set{}/".format(idx+1), n_samples, np.sort(S_r))

plt.figure(figsize=(15,5))
plt.subplot(1,3,1)
plottingtools.vizbalance_MQ(np.arange(n_samples), Memvec, "darkcyan", quo, S_s, "darkorange", "SPP")
plt.subplot(1,3,2)
plottingtools.vizbalance_MQ(np.arange(n_samples), Memvec, "darkcyan", quo, S_sq, "yellowgreen", "SPP-DMQ")
plt.subplot(1,3,3)
plottingtools.vizbalance_MQ(np.arange(n_samples), Memvec, "darkcyan", quo, S_r, "indianred", "Random")

In [None]:
# In sorted order
print(np.sort(S_s))
print(np.sort(S_sq))

In [None]:
# In unsorted order
print(S_s); print(assignR[S_s])
plotsumm(data_dir+"set{}/".format(idx+1), n_samples, S_s)
print(S_sq); print(assignR[S_sq])
plotsumm(data_dir+"set{}/".format(idx+1), n_samples, S_sq)

In [None]:
plotall(data_dir+"set{}/".format(idx+1), 100)