# MDS

In [None]:
#資料集一：請找出台北高鐵站、新竹高鐵站、台中高鐵站、台南高鐵站、高雄高鐵站、花蓮瑞穗、台東池上彼此之間的距離，
#然後用MDS畫在2D平面上。也請使用Google地圖標記其位置。

In [None]:
import numpy as np
import math
from sklearn.manifold import MDS
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [None]:
#載入各地經緯度
column, row = 7, 2
each_point_position = np.zeros((column, row))
#台北高鐵站
each_point_position[0][0] = 25.04788
each_point_position[0][1] = 121.51629
#新竹高鐵站
each_point_position[1][0] = 24.80840
each_point_position[1][1] = 121.04027
#台中高鐵站
each_point_position[2][0] = 24.11235
each_point_position[2][1] = 120.61613
#台南高鐵站
each_point_position[3][0] = 22.92482
each_point_position[3][1] = 120.28569
#高雄高鐵站
each_point_position[4][0] = 22.68811
each_point_position[4][1] = 120.30909
#花蓮瑞穗
each_point_position[5][0] = 23.49816
each_point_position[5][1] = 121.37869
#台東池上
each_point_position[6][0] = 23.12291
each_point_position[6][1] = 121.21626

In [None]:
#經緯度轉換距離
def getDistance(latA, lonA, latB, lonB):
    ra = 6378140  # 赤道半徑
    rb = 6356755  # 極半徑
    flatten = (ra - rb) / ra  # Partial rate of the earth
    # change angle to radians
    radLatA = math.radians(latA)
    radLonA = math.radians(lonA)
    radLatB = math.radians(latB)
    radLonB = math.radians(lonB)

    pA = math.atan(rb / ra * math.tan(radLatA))
    pB = math.atan(rb / ra * math.tan(radLatB))
    x = math.acos(math.sin(pA) * math.sin(pB) + math.cos(pA) * math.cos(pB) * math.cos(radLonA - radLonB))
    c1 = (math.sin(x) - x) * (math.sin(pA) + math.sin(pB)) ** 2 / math.cos(x / 2) ** 2
    c2 = (math.sin(x) + x) * (math.sin(pA) - math.sin(pB)) ** 2 / math.sin(x / 2) ** 2
    dr = flatten / 8 * (c1 - c2)
    distance = ra * (x + dr)
    distance = round(distance / 1000, 4)
    return distance

In [None]:
column, row = 7, 7
different_distances = np.zeros((column, row))
for i in range(0,7):
    for j in range(0,7):
        if(i==j):
            different_distances[i][j] = 0
        else:
            different_distances[i][j] = getDistance(each_point_position[i][0],each_point_position[i][1],each_point_position[j][0],each_point_position[j][1])
different_distances = different_distances.round(3)
different_distances

In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(different_distances)
mds = MDS(n_components=2,random_state=0)
X_2d = mds.fit_transform(X_scaled)

In [None]:
X_2d

In [None]:
import plotly.express as px 
from jupyter_dash import JupyterDash 
import dash_core_components as dcc 
import dash_html_components as html 
from dash.dependencies import Input, Output
import pandas as pd
df =  pd.DataFrame()
# 加載數據
df['x']= X_2d[:,0]
df['y']=X_2d[:,1]
df['Class'] = ['台北高鐵站','新竹高鐵站','台中高鐵站','台南高鐵站','高雄高鐵站','花蓮瑞穗','台東池上']
# 構建應用
app = JupyterDash(__name__) 
app.layout = html.Div([ 
    html.H1("JupyterDash Demo"), 
    dcc.Graph(id='graph'), 
    html.Label([ 
        "colorscale", 
        dcc. Dropdown( 
            id='colorscale-dropdown', clearable=False, 
            value='plasma', options=[ 
                {'label': c, 'value': c} 
                for c in px.colors.named_colorscales() 
            ]) 
    ]) , 
])
# 定義更新圖的回調
@app .callback( 
    Output('graph', 'figure'), 
    [Input("colorscale-dropdown", "value")] 
) 
def update_figure(colorscale): 
    return px.scatter( 
        df, x="x", y="y", color="Class", 
        color_continuous_scale=colorscale, 
        render_mode="webgl", title="Tips" 
    )
# 運行應用程序並在筆記本中內聯顯示結果
app.run_server(mode='inline')

# T-Sen_資料集一

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn import manifold
from sklearn.preprocessing import MinMaxScaler
#所需的欄位
usecols = ['Class', 'Drink', 'Rank', 'Amount', 'Dissimilarity_value']

df = pd.read_csv('ML_Drink_Dataset.csv')
df.set_axis(usecols, axis='columns', inplace=True)
df

# 1-of-k方法一

In [None]:
df_1_k = df.drop(['Dissimilarity_value','Class'],axis=1)


# scaler = MinMaxScaler()
# df_1_k['Amount'] = scaler.fit_transform(df_1_k['Amount'].values.reshape(-1,1))
labelencoder = LabelEncoder()
df_1_k['Drink'] = labelencoder.fit_transform(df['Drink'])
#df['Class'] = labelencoder.fit_transform(df['Class'])

In [None]:
df_1_k

In [None]:
#onehotencoder = OneHotEncoder(categorical_features=[1])
scaler_0 = MinMaxScaler()
scaler_1 = MinMaxScaler()
scaler_2 = MinMaxScaler()
scaler_3 = MinMaxScaler()
scaler_4 = MinMaxScaler()
scaler_5 = MinMaxScaler()
scaler_6 = MinMaxScaler()
df_1_k.loc[df_1_k['Drink']==0,'Amount'] = scaler_0.fit_transform(df_1_k.loc[df_1_k['Drink']==0,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==1,'Amount'] = scaler_1.fit_transform(df_1_k.loc[df_1_k['Drink']==1,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==2,'Amount'] = scaler_2.fit_transform(df_1_k.loc[df_1_k['Drink']==2,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==3,'Amount'] = scaler_3.fit_transform(df_1_k.loc[df_1_k['Drink']==3,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==4,'Amount'] = scaler_4.fit_transform(df_1_k.loc[df_1_k['Drink']==4,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==5,'Amount'] = scaler_5.fit_transform(df_1_k.loc[df_1_k['Drink']==5,'Amount'].values.reshape(-1,1))
df_1_k.loc[df_1_k['Drink']==6,'Amount'] = scaler_6.fit_transform(df_1_k.loc[df_1_k['Drink']==6,'Amount'].values.reshape(-1,1))

#df_1_k['Amount'] = scaler.fit_transform(df_1_k['Amount'].values.reshape(-1,1))

ct = ColumnTransformer([("Drink", OneHotEncoder(), [1])], remainder = 'passthrough')
data_str_ohe=ct.fit_transform(df_1_k)
pd.DataFrame(data_str_ohe)

In [None]:
X_tsne = manifold.TSNE(n_components=2, init='random', random_state=5, verbose=1).fit_transform(data_str_ohe)

In [None]:
import plotly.express as px 
from jupyter_dash import JupyterDash 
import dash_core_components as dcc 
import dash_html_components as html 
from dash.dependencies import Input, Output
import pandas as pd
df_3 =  pd.DataFrame()
# 加載數據
df_3['x']= X_tsne[:,0]
df_3['y']=X_tsne[:,1]
df_3['Class'] = df['Drink']
# 構建應用
app = JupyterDash(__name__) 
app.layout = html.Div([ 
    html.H1("JupyterDash Demo"), 
    dcc.Graph(id='graph'), 
    html.Label([ 
        "colorscale", 
        dcc. Dropdown( 
            id='colorscale-dropdown', clearable=False, 
            value='plasma', options=[ 
                {'label': c, 'value': c} 
                for c in px.colors.named_colorscales() 
            ]) 
    ]) , 
])
# 定義更新圖的回調
@app .callback( 
    Output('graph', 'figure'), 
    [Input("colorscale-dropdown", "value")] 
) 
def update_figure(colorscale): 
    return px.scatter( 
        df_3, x="x", y="y", color="Class", 
        color_continuous_scale=colorscale, 
        render_mode="webgl", title="Tips" 
    )
# 運行應用程序並在筆記本中內聯顯示結果
app.run_server(mode='inline')

In [None]:
df_2 = pd.DataFrame(dict(Feature_1=X_tsne[:,0], Feature_2=X_tsne[:,1],label=df['Rank']))
df_2
df_2.plot(x="Feature_1", y="Feature_2", kind='scatter', c='label', colormap='viridis')

# 相似度_方法二

In [None]:
labelencoder = LabelEncoder()
#df['Class'] = labelencoder.fit_transform(df['Class'])
df_Dissimilarity = df.drop(['Drink','Class'],axis=1)
# ct = ColumnTransformer([("Drink", OneHotEncoder(), [3])], remainder = 'passthrough')
# df_Dissimilarity_one=ct.fit_transform(df_Dissimilarity)
pd.DataFrame(df_Dissimilarity)


In [None]:
X_tsne_one = manifold.TSNE(n_components=2, init='random', random_state=42, verbose=1).fit_transform(df_Dissimilarity)
df_1 = pd.DataFrame(dict(Feature_1=X_tsne_one[:,0], Feature_2=X_tsne_one[:,1],label=df['Rank']))
df_1.plot(x="Feature_1", y="Feature_2", kind='scatter', c='label', colormap='viridis')

In [None]:
import plotly.express as px 
from jupyter_dash import JupyterDash 
import dash_core_components as dcc 
import dash_html_components as html 
from dash.dependencies import Input, Output
import pandas as pd
df_2 =  pd.DataFrame()
# 加載數據
df_2['x']= X_tsne_one[:,0]
df_2['y']=X_tsne_one[:,1]
df_2['Class'] = df['Drink']
# 構建應用
app = JupyterDash(__name__) 
app.layout = html.Div([ 
    html.H1("JupyterDash Demo"), 
    dcc.Graph(id='graph'), 
    html.Label([ 
        "colorscale", 
        dcc. Dropdown( 
            id='colorscale-dropdown', clearable=False, 
            value='plasma', options=[ 
                {'label': c, 'value': c} 
                for c in px.colors.named_colorscales() 
            ]) 
    ]) , 
])
# 定義更新圖的回調
@app .callback( 
    Output('graph', 'figure'), 
    [Input("colorscale-dropdown", "value")] 
) 
def update_figure(colorscale): 
    return px.scatter( 
        df_2, x="x", y="y", color="Class", 
        color_continuous_scale=colorscale, 
        render_mode="webgl", title="Tips" 
    )
# 運行應用程序並在筆記本中內聯顯示結果
app.run_server(mode='inline')

# T-Sen_資料集二

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn import manifold
from sklearn.preprocessing import MinMaxScaler
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.preprocessing import normalize
#所需的欄位

df_150 = pd.read_csv('ML_Drink_Dataset_150.csv')
df_150

df_300 = pd.read_csv('ML_Drink_Dataset_300.csv')
df_300

# 1-of-k

In [None]:
df_150_1_k = df_150.drop(['Class'],axis=1)
labelencoder = LabelEncoder()
df_150_1_k['Drink'] = labelencoder.fit_transform(df_150_1_k['Drink'])
ct = ColumnTransformer([("Drink", OneHotEncoder(), [1])], remainder = 'passthrough')
df_150_1_k_one=ct.fit_transform(df_150_1_k)
pd.DataFrame(df_150_1_k_one)


In [None]:
X_tsne_150 = manifold.TSNE(n_components=2, init='pca', random_state=5, verbose=1).fit_transform(df_150_1_k_one)

In [None]:
df_300_1_k = df_300.drop(['Class'],axis=1)
labelencoder = LabelEncoder()
df_300_1_k['Drink'] = labelencoder.fit_transform(df_300_1_k['Drink'])
ct = ColumnTransformer([("Drink", OneHotEncoder(), [1])], remainder = 'passthrough')
df_300_1_k_one=ct.fit_transform(df_300_1_k)
pd.DataFrame(df_300_1_k_one)


In [None]:
X_tsne_300 = manifold.TSNE(n_components=2, init='pca', random_state=5, verbose=1).fit_transform(df_300_1_k_one)

In [None]:
df_150_2 = pd.DataFrame(dict(Feature_1=X_tsne_150[:,0], Feature_2=X_tsne_150[:,1],label=df_150['Drink']))
df_300_2 = pd.DataFrame(dict(Feature_1=X_tsne_300[:,0], Feature_2=X_tsne_300[:,1],label=df_300['Drink']))

In [None]:
result = pd.concat([df_150_2,df_300_2])

In [None]:
result

In [None]:
import plotly.express as px 
from jupyter_dash import JupyterDash 
import dash_core_components as dcc 
import dash_html_components as html 
from dash.dependencies import Input, Output
import pandas as pd
df_4 =  pd.DataFrame()
# 加載數據
df_4['x']= result['Feature_1']
df_4['y']=result['Feature_2']
df_4['Class'] = result['label']
# 構建應用
app = JupyterDash(__name__) 
app.layout = html.Div([ 
    html.H1("JupyterDash Demo"), 
    dcc.Graph(id='graph'), 
    html.Label([ 
        "colorscale", 
        dcc. Dropdown( 
            id='colorscale-dropdown', clearable=False, 
            value='plasma', options=[ 
                {'label': c, 'value': c} 
                for c in px.colors.named_colorscales() 
            ]) 
    ]) , 
])
# 定義更新圖的回調
@app .callback( 
    Output('graph', 'figure'), 
    [Input("colorscale-dropdown", "value")] 
) 
def update_figure(colorscale): 
    return px.scatter( 
        df_4, x="x", y="y", color="Class", 
        color_continuous_scale=colorscale, 
        render_mode="webgl", title="Tips" 
    )
# 運行應用程序並在筆記本中內聯顯示結果
app.run_server(mode='inline')

# 相似值-------以下是有點亂的

In [None]:
df_150_1_S = df_150.drop(['Class','Drink'],axis=1)
#df_150_1_S['similarity'] = [1,1,2,2]
df_300_1_S = df_300.drop(['Class','Drink'],axis=1)
#df_300_1_S['similarity'] = [1,1,2]
pd.DataFrame(df_150_1_S)

In [None]:
from numpy import dot
from numpy.linalg import norm

column, row = 4, 4
different_distances = np.zeros((column, row))
for i in range(0,4):
    for k in range(0,4):
        a = df_150_1_S.values[i] # 7UP
        b = df_150_1_S.values[k] # Coke
        cos_sim = dot(a, b)/(norm(a)*norm(b))
        different_distances[i][k] = cos_sim
different_distances

column, row = 3, 3
different_distances_2 = np.zeros((column, row))
for i in range(0,3):
    for k in range(0,3):
        a = df_150_1_S.values[i] # 7UP
        b = df_150_1_S.values[k] # Coke
        cos_sim = dot(a, b)/(norm(a)*norm(b))
        different_distances_2[i][k] = cos_sim
different_distances_2
different_distances = pd.DataFrame(different_distances)
different_distances_2 = pd.DataFrame(different_distances_2)

result_3 = pd.concat([df_150_1_S,different_distances],axis=1)
result_3

result_4 = pd.concat([df_300_1_S,different_distances_2],axis=1)
result_4

In [None]:
different_distances

In [None]:
X_tsne_150_2 = manifold.TSNE(n_components=2, init='random', random_state=5, verbose=1).fit_transform(result_3)
X_tsne_300_2 = manifold.TSNE(n_components=2, init='random', random_state=5, verbose=1).fit_transform(result_4)

In [None]:
df_150_3 = pd.DataFrame(dict(Feature_1=X_tsne_150_2[:,0], Feature_2=X_tsne_150_2[:,1],label=df_150['Drink']))
df_300_3 = pd.DataFrame(dict(Feature_1=X_tsne_300_2[:,0], Feature_2=X_tsne_300_2[:,1],label=df_300['Drink']))

In [None]:
result_2 = pd.concat([df_150_3,df_300_3])

In [None]:
result_2

In [None]:
import plotly.express as px 
from jupyter_dash import JupyterDash 
import dash_core_components as dcc 
import dash_html_components as html 
from dash.dependencies import Input, Output
import pandas as pd
df_5 =  pd.DataFrame()
# 加載數據
df_5['x']= result_2['Feature_1']
df_5['y']=result_2['Feature_2']
df_5['Class'] = result_2['label']
# 構建應用
app = JupyterDash(__name__) 
app.layout = html.Div([ 
    html.H1("JupyterDash Demo"), 
    dcc.Graph(id='graph'), 
    html.Label([ 
        "colorscale", 
        dcc. Dropdown( 
            id='colorscale-dropdown', clearable=False, 
            value='plasma', options=[ 
                {'label': c, 'value': c} 
                for c in px.colors.named_colorscales() 
            ]) 
    ]) , 
])
# 定義更新圖的回調
@app .callback( 
    Output('graph', 'figure'), 
    [Input("colorscale-dropdown", "value")] 
) 
def update_figure(colorscale): 
    return px.scatter( 
        df_5, x="x", y="y", color="Class", 
        color_continuous_scale=colorscale, 
        render_mode="webgl", title="Tips" 
    )
# 運行應用程序並在筆記本中內聯顯示結果
app.run_server(mode='inline')