In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots 
from sklearn.manifold import TSNE

In [3]:
samples = pd.read_csv('datasets/samples.csv')
samples.head()

Unnamed: 0,id,data1,data2
0,1,71.69,4.98
1,2,68.82,4.54
2,3,81.37,2.28
3,4,-2.24,2.16
4,5,94.1,11.55


In [4]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x = samples['data1'], y = samples['data2'], mode = 'markers')
)

fig.show()

In [5]:
def dNorm(data):
        a = 0.0000001
        Min = data.min(axis = 0, skipna = True) - a
        Max = data.max(axis = 0, skipna = True)
        normDf = (data - Min)/(Max - Min)
        return normDf

def normData(df, directedCols, undirectedCols):
    def dNorm(data):
        a = 0.0000001
        Min = data.min(axis = 0, skipna = True) - a
        Max = data.max(axis = 0, skipna = True)
        dNormDf = (data - Min)/(Max - Min)
        return dNormDf
    def udNorm(data):
        a = 0.0000001
        Min = data.min(axis = 0, skipna = True) 
        Max = data.max(axis = 0, skipna = True) + a
        udNormDf = 1- (data - Min)/(Max - Min)
        return udNormDf
    dNorm_ = dNorm(df[directedCols])
    udNorm_ = udNorm(df[undirectedCols])

    dataNorm = pd.concat([dNorm_, udNorm_], axis = 1)
    return dataNorm

In [6]:
treatSamples = normData(samples, ['data1'], ['data2'])
nonTreatSamples = dNorm(samples[['data1', 'data2']])

display(treatSamples.head())
nonTreatSamples.head()

Unnamed: 0,data1,data2
0,0.766787,0.889287
1,0.738024,0.899128
2,0.8638,0.949676
3,0.025857,0.95236
4,0.991381,0.74234


Unnamed: 0,data1,data2
0,0.766787,0.110713
1,0.738024,0.100872
2,0.8638,0.050324
3,0.025857,0.04764
4,0.991381,0.25766


In [7]:
model = TSNE()

tsne1 = model.fit_transform(samples)
tsne2 = model.fit_transform(treatSamples)
tsne3 = model.fit_transform(nonTreatSamples)

x1, y1 = tsne1[:,0], tsne1[:,1]
x2, y2 = tsne2[:,0], tsne2[:,1]
x3, y3 = tsne3[:,0], tsne3[:,1]

In [8]:
model.n_iter_

999

In [9]:
fig = make_subplots(
    rows = 1, cols = 3,
    subplot_titles=('Raw Data', 'Treat Normalization Data', 'Non-Treat Normalization Data')
)

X = [x1,x2,x3]
Y = [y1,y2,y3]

for index in range(3):
    fig.add_trace(
        go.Scatter(x = X[index], y = Y[index], mode = 'markers'), row = 1, col = index + 1
        )

fig.update_layout(showlegend = False, title = {'text': 'TSNE model with the same dataset'},
                    template = 'seaborn')

fig.show()

In [41]:
def gg():
    x = 5
    print('hee')
    return (x,'Hello')
G = gg()

hee


In [42]:
G

(5, 'Hello')

In [32]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import silhouette_score

param_grid = {'learning_rate':[50,100,150,200,500,1000], 'perplexity': [5,10,np.sqrt(1000),20,25,50]}

tsneGSCV = GridSearchCV(estimator=model, scoring=silhouette_score ,param_grid=param_grid, refit = True)

In [36]:
fig = go.Figure()

data = TSNE(learning_rate=50, perplexity=np.sqrt(1000)).fit_transform(treatSamples)

fig.add_trace(
    go.Scatter(x = data[:,0], y = data[:,1], mode = 'markers')
)

fig.show()

In [37]:
[i.index for i in [2,4,5]]

AttributeError: 'int' object has no attribute 'index'

In [43]:
learning_rate =[50,500,1000]
perplexity = [5,np.sqrt(1000),50]

fig = make_subplots(
    rows = 3, cols = 3
)

for row, lr in enumerate(learning_rate):
    for col, pp in enumerate(perplexity):
        # Fit model for each parameters
        data = TSNE(learning_rate=lr, perplexity=pp).fit_transform(treatSamples)

        # Add traces for plot
        fig.add_trace(
            go.Scatter(x = data[:,0], y = data[:,1], mode = 'markers'), row = row + 1, col = col + 1
            )
fig.update_layout(showlegend = False, template = 'seaborn')

fig.show()