# Ternary Classifier Predictions

Uses a previously trained network to classify images and then saves the resulting prediction averages.

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rc
import numpy as np
import sqlite3
import pickle
import json
import math
import random
import pandas as pd

In [None]:
image_pkl_filename = "ternary_classifier_predictions.pkl"

# READ PKL
with open(image_pkl_filename, "rb") as read_file:
    image_data = pickle.load(read_file)

In [None]:
image_data

In [None]:
test_data = image_data[:]

In [None]:
arr = np.array(image_data)

In [None]:
arr

In [None]:
# chosen categories
selected_cats = ["cs.CV", "stat.ML", "nlin.CG", "cs.GR", "cs.AI", "astro-ph", 
                 "astro-ph.GA", "astro-ph.IM", "cond-mat.str-el", "cs.LG", 
                 "cs.IT", "math-ph"]
selected_cats.sort()
scats = np.array(selected_cats)
scats

In [None]:
cats = [x[0] for x in arr[:, :1][:]]
# cats = np.array(cats)
cats

In [None]:
cats.shape

In [None]:
scats.shape

In [None]:
# find indexes of these categories
index = np.where(scats == cats)

In [None]:
index = []

for scat in scats:
    ind = cats.index(scat)
    print(ind)
    index.append(ind)

In [None]:
index

In [None]:
# random indexes
index = np.random.choice(arr.shape[0], 15, replace=False)

In [None]:
test_data = arr[index]

In [None]:
test_data

### Generate plot

Loop over all of the data and retrieve the category, year and predictions. Calculate the totals for each class prediction and convert to percentages for diagram, sensor and unsure. Plot each category to a subplot with bars for each year within that category. Delete additional plots, add title and legend, then save.

In [None]:
xdim = 15
ydim = 12

fig, ax = plt.subplots(ydim, xdim)
fig.subplots_adjust(hspace=0.2, wspace=0.2)
fig.set_size_inches(40, 30)

plt.yticks([])
plt.xticks([])

dt = 0
st = 0
ut = 0

for i, cat in enumerate(test_data[:]):
    
    y = math.floor(i / xdim)
    x = i - (y * xdim)
    print(x, y)
    
    print("category - ",cat[0])
    
    diagrams = []
    sensors = []
    unsures = []
    sum_totals = []
    
#     a = np.array([])
    
    for class_totals in cat[3]:
        print(class_totals)
#         np.append(a, class_totals)
        year_total = 0
        for res in class_totals:
            year_total += res
        sum_totals.append(year_total)
        
        diagrams.append(class_totals[0])
        sensors.append(class_totals[1])
        unsures.append(class_totals[2])
        
        dt += class_totals[0]
        st += class_totals[1]
        ut += class_totals[2]
                
    print("t",sum_totals)
    print("d",diagrams)
    print("s",sensors)
    print("u",unsures)
    
    diagram_percentages = [i / j * 100 for i, j in zip(diagrams, sum_totals)]
    sensor_percentages = [i / j * 100 for i, j in zip(sensors, sum_totals)]
    unsure_percentages = [i / j * 100 for i, j in zip(unsures, sum_totals)]
    
    print("d%",diagram_percentages)
    print("s%",sensor_percentages)
    print("u%",unsure_percentages)
    
#     print(a)

#     barWidth = 5 / len(diagram_percentages)
#     print("barWidth:",barWidth)

   
    indexes = [i for i,_ in enumerate(diagrams)]
    print("indexes:",indexes)
    
#     ax[i, j].plot(data[idx][1], data[idx][2], '--r.')
#     ax[i, j].title.set_text(data[idx][0])
            
    ax[y,x].bar(indexes, diagram_percentages, color='#26bfb8', edgecolor='white')
    ax[y,x].bar(indexes, sensor_percentages, bottom=diagram_percentages, color='#e6d929', edgecolor='white')
    ax[y,x].bar(indexes, unsure_percentages, bottom=[i+j for i,j in zip(diagram_percentages, sensor_percentages)], color='#e62929', edgecolor='white')

    ax[y,x].set_xticklabels([])
    ax[y,x].set_yticklabels([])
    
    ax[y,x].title.set_text(cat[0])

    print("*" * 20)

fig.suptitle("Ternary classifier predictions on arXiv primary categories", x=0.5, y=0.92, size=28)

for i in range(len(test_data), xdim * ydim):
    print("i:",i)
    y = math.floor(i / xdim)
    x = i - (y * xdim)
    print(x, y)
    
    fig.delaxes(ax[y][x])

colors = {'unsure':'#e62929', 'sensor':'#e6d929', 'diagram':'#26bfb8'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
# ax[11, 14].legend(handles, labels, loc="lower right")
fig.legend(handles, labels, loc=(0.908,0.08))
    
fig.savefig("plot_ternary_classifier_predictions.svg", dpi=300, bbox_inches='tight', pad_inches=0.5)

print("dt:",dt)
print("st:",st)
print("ut:",ut)

### Methods paper version

Updated to only show key categories

In [None]:
xdim = 4
ydim = 3

fig, ax = plt.subplots(ydim, xdim)
fig.subplots_adjust(hspace=0.2, wspace=0.2)
fig.set_size_inches(16, 12)

plt.yticks([])
plt.xticks([])

dt = 0
st = 0
ut = 0

for i, cat in enumerate(test_data[:]):
    
    y = math.floor(i / xdim)
    x = i - (y * xdim)
    print(x, y)
    
    print("category - ",cat[0])
    
    diagrams = []
    sensors = []
    unsures = []
    sum_totals = []
    
#     a = np.array([])
    
    for class_totals in cat[3]:
        print(class_totals)
#         np.append(a, class_totals)
        year_total = 0
        for res in class_totals:
            year_total += res
        sum_totals.append(year_total)
        
        diagrams.append(class_totals[0])
        sensors.append(class_totals[1])
        unsures.append(class_totals[2])
        
        dt += class_totals[0]
        st += class_totals[1]
        ut += class_totals[2]
                
    print("t",sum_totals)
    print("d",diagrams)
    print("s",sensors)
    print("u",unsures)
    
    diagram_percentages = [i / j * 100 for i, j in zip(diagrams, sum_totals)]
    sensor_percentages = [i / j * 100 for i, j in zip(sensors, sum_totals)]
    unsure_percentages = [i / j * 100 for i, j in zip(unsures, sum_totals)]
    
    print("d%",diagram_percentages)
    print("s%",sensor_percentages)
    print("u%",unsure_percentages)
    
#     print(a)

#     barWidth = 5 / len(diagram_percentages)
#     print("barWidth:",barWidth)

   
    indexes = [i for i,_ in enumerate(diagrams)]
    print("indexes:",indexes)
    
#     ax[i, j].plot(data[idx][1], data[idx][2], '--r.')
#     ax[i, j].title.set_text(data[idx][0])
            
    ax[y,x].bar(indexes, diagram_percentages, color='#26bfb8', edgecolor='white')
    ax[y,x].bar(indexes, sensor_percentages, bottom=diagram_percentages, color='#e6d929', edgecolor='white')
    ax[y,x].bar(indexes, unsure_percentages, bottom=[i+j for i,j in zip(diagram_percentages, sensor_percentages)], color='#e62929', edgecolor='white')

    ax[y,x].set_xticklabels([])
    ax[y,x].set_yticklabels([])
    
    ax[y,x].title.set_text(cat[0])

    print("*" * 20)

# fig.suptitle("Ternary classifier predictions on arXiv primary categories", x=0.5, y=0.92, size=28)

for i in range(len(test_data), xdim * ydim):
    print("i:",i)
    y = math.floor(i / xdim)
    x = i - (y * xdim)
    print(x, y)
    
    fig.delaxes(ax[y][x])

colors = {'unsure':'#e62929', 'sensor':'#e6d929', 'diagram':'#26bfb8'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
# ax[11, 14].legend(handles, labels, loc="lower right")
# fig.legend(handles, labels, loc=(0.91,0.08))
fig.legend(handles, labels, loc=(0.92,0.075))
# fig.legend(handles, labels, loc=(0.073,0.025))
    
# fig.savefig("plot_ternary_classifier_predictions.svg", dpi=300, bbox_inches='tight', pad_inches=0.5)

print("dt:",dt)
print("st:",st)
print("ut:",ut)

In [None]:
fig.savefig("plot_ternary_classifier_predictions_subset.svg", dpi=300, bbox_inches='tight', pad_inches=0.1)
fig.savefig("plot_ternary_classifier_predictions_subset.png", dpi=300, bbox_inches='tight', pad_inches=0.1)

In [None]:
# get totals for whole dataset
tt = dt + st + ut
dper = dt/tt
sper = st/tt
uper = ut/tt
print(tt)
print(dper)
print(sper)
print(uper)