In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import machine_learning as ml

In [None]:
G_INS = ['P300', 'R300']
G_CS = ['ABA', 'CFR', 'ECO', 'EFA', 'KOX', 'KPN', 'PMA', 'SAU', 'SHO', 'SMA']

In [None]:
from plot import color_pick, marker_pick

def plot_2D_scatter(data: np.array, labels: np.array, fig_size=(20, 10), fig_dpi=300, fig_title=None):
    assert len(data) == len(labels)
    fig = plt.figure(figsize=fig_size, dpi=fig_dpi)
    ax = fig.add_subplot(111)
    for in_idx, ins in enumerate(G_INS):
        for c_idx, c in enumerate(G_CS):
            label = ins + '-' + c
            ax.scatter(x=data[labels == label, 0], y=data[labels == label, 1], c=color_pick(c_idx),
                       marker=marker_pick(in_idx), s=18, label=label)
    # show legend
    plt.legend(bbox_to_anchor=(1.01, 0), fontsize='10', loc=3, borderaxespad=0)
    # show title
    if fig_title is not None:
        plt.title(fig_title)

In [None]:
df = pd.read_csv('../dataset.csv', index_col=0)

In [None]:
spectra = []
labels = []
for idx, row in df.iterrows():
    data = np.loadtxt(row['data_file'], delimiter='\t', dtype=float).T
    spectra.append(data[1])
    # label = row['label_name'].split('_')[0]
    labels.append(row['label_name'].replace('_', '-'))
spectra = np.array(spectra)
labels = np.array(labels)

In [None]:
pca = ml.do_tSNE(spectra, components=2)

In [None]:
plot_2D_scatter(pca, labels)
plt.savefig('../result/tSNE_2D_cs.png', bbox_inches='tight', pad_inches=0.05, dpi=300)