# Compression using t-SNE
For hyperparameter tuning refer: https://distill.pub/2016/misread-tsne/ | Official t-SNE FAQ: https://lvdmaaten.github.io/tsne/
* Will use 2 broad approaches:
    1. Compressing the default dataset to lower dims
    2. Compressing the features extrated dataset to lower dims

In [15]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
from utils import Preprocessing
from sklearn.manifold import TSNE

# Approach 1:

In [2]:
df = pd.read_csv('ds01_withStress.csv')
df.drop('Elapsed time', axis = 1, inplace = True)

In [8]:
df

Unnamed: 0,Driver,ECG,EMG,foot GSR,hand GSR,HR,marker,RESP,stress
0,06,-0.048,0.124,9.051,19.072,90.0,12.36,39.97,1
1,06,-0.052,0.124,9.051,19.072,90.0,12.36,39.97,1
2,06,-0.057,0.124,9.051,19.072,90.0,12.36,39.97,1
3,06,-0.063,0.124,9.051,19.072,90.0,12.36,39.97,1
4,06,-0.067,0.124,9.051,19.072,90.0,12.36,39.97,1
...,...,...,...,...,...,...,...,...,...
48040,17b,-0.022,0.124,5.322,8.557,63.0,10.15,37.48,-1
48041,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1
48042,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1
48043,17b,-0.016,0.124,5.322,8.557,63.0,10.15,37.48,-1


In [23]:
def tsne_helper(n_compo, perpx, lr, driver, iters = 5000,show = True, return_df = False):
    
    tsne_v1 = TSNE(n_components = n_compo, perplexity = perpx, learning_rate = lr, n_iter = iters)
    df_tsne_v1 = tsne_v1.fit_transform(df[df['Driver']==driver].drop('Driver', axis = 1))
    
    if n_compo == 3:
        dftv1 = pd.DataFrame(df_tsne_v1, columns = ['Col1', 'Col2', 'Col3'])
        dftv1['stress'] = df[df['Driver']==driver]['stress']
        if show:
            fig = px.scatter_3d(dftv1, x='Col1', y='Col2', z='Col3',
                  color='stress')
            fig.show()

    elif n_compo == 2:
        dftv1 = pd.DataFrame(df_tsne_v1, columns = ['Col1', 'Col2'])
        dftv1['stress'] = df[df['Driver']==driver]['stress']
        if show:
            fig = px.scatter(df, x="Col1", y="Col2", color = 'stress')
            fig.show()

    if return_df:
        return dftv1, tsne_v1
    
    else:
        return tsne_v1

In [19]:
# t-SNE v1
n_compo = 3
perpx = 50
lr = 200
iters = 3500
driver = '06'
tsne_v1_1 = tsne_helper(n_compo, perpx, lr, driver)

In [21]:
# t-SNE v1.2
n_compo = 3
perpx = 100
lr = 200
iters = 5000
driver = '06'
tsne_v1_2 = tsne_helper(n_compo, perpx, lr, driver)

In [24]:
# t-SNE v1.3
n_compo = 2
perpx = 100
lr = 200
iters = 5000
driver = '06'
tsne_v1_3 = tsne_helper(n_compo, perpx, lr, driver)

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['Driver', 'ECG', 'EMG', 'foot GSR', 'hand GSR', 'HR', 'marker', 'RESP', 'stress'] but received: Col1