In [1]:
import os, time, glob, socket
import pandas as pd
import numpy as np

# Visualization
import plotly.express as px # for data visualization
import matplotlib.pyplot as plt
import plotly.io as pio
import imageio
from PIL import Image
import io

# Skleran
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split, LeaveOneOut
from sklearn.neighbors import KernelDensity
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets import load_digits # for MNIST data
from sklearn.manifold import Isomap # for Isomap dimensionality reduction

In [2]:
fname = r"/Users/joycelin/Documents/UW/Course/STAT527/Project/project-data.csv"
dt = pd.read_csv(fname, sep=",", engine="python", encoding='utf-8', header=None)
dt.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,2.529516,-1.060382,0.593345,-3.577957,0.769041,-1.780638,-1.434324,0.193027,1.223478,0.681172,...,-0.114835,0.023744,-0.10254,0.388987,0.159817,0.620489,0.141224,0.022058,-0.626743,-0.051491
1,0.480485,1.250334,-2.639274,-0.296482,0.945529,0.242964,-0.192175,0.445318,-2.053904,-0.103153,...,-0.034879,-0.503634,0.795026,-0.533807,-0.474493,-0.144656,-0.052307,0.46632,0.562019,-0.009372
2,-0.542213,-1.708934,-0.369222,-2.360628,2.082012,0.259526,-2.128058,0.946741,-0.664654,0.760146,...,0.195754,-0.368247,0.041147,-0.711409,-0.522927,0.27175,0.160948,-0.259978,0.680244,0.307635
3,-1.972673,-1.560447,-1.109872,-0.869376,0.200576,-1.693065,0.247207,-0.186584,0.794374,0.989349,...,-0.045691,0.224225,0.039724,-0.117851,-0.453361,0.390219,-0.279517,0.035607,0.397154,0.007859
4,-0.481061,1.72924,1.700382,-0.612105,-1.476692,3.007863,1.486375,0.919901,-0.587185,-0.98926,...,-0.156012,-0.023793,-0.147526,0.09657,0.007069,-0.23138,0.276729,-0.217759,0.873191,0.242444


In [3]:
dt = dt.to_numpy()
dt

array([[ 2.52951582, -1.06038173,  0.59334488, ...,  0.02205803,
        -0.62674267, -0.05149062],
       [ 0.4804851 ,  1.25033412, -2.63927432, ...,  0.46631986,
         0.56201922, -0.00937202],
       [-0.5422129 , -1.70893425, -0.36922228, ..., -0.25997751,
         0.68024441,  0.30763455],
       ...,
       [-1.77362995,  1.8379631 ,  0.11096452, ...,  0.23005182,
         0.02117275,  0.10568809],
       [-0.76591322,  2.36672966,  1.75651949, ..., -0.47785445,
        -0.37310217, -0.27159109],
       [-3.28418168, -0.52142029, -0.54244079, ...,  0.02730655,
        -0.15005253,  0.09866906]])

In [None]:
reconerror = []
for nNbr in range(5, 101, 5):
    iso = Isomap(n_neighbors = nNbr, n_components = 3)
    iso.fit_transform(dt)
    curerror = iso.reconstruction_error()
    reconerror.append(curerror)
    print("nNbr =", nNbr, "reconerror =", curerror)
    
pd.DataFrame({'nNbr': range(5, 101, 5), 'reconerror': reconerror}).to_csv('/Users/joycelin/Documents/UW/Course/STAT527/Project/reconerror.csv', index=False)

In [None]:
plt.figure(figsize=[10,6])
plt.plot(range(5, 101, 5), reconerror, 'o-', linewidth=2, color='blue')
plt.xlabel('Number of Neighbors')
plt.ylabel('Reconstruction Error')
plt.title('Scree Plot')
plt.savefig('/Users/joycelin/Documents/UW/Course/STAT527/Project/images/reconerror_screeplot.png')
plt.show()

In [49]:
### Step 1 - Configure the Isomap function, note we use default hyperparameter values in this example
embed3 = Isomap(n_neighbors = 15, n_components = 3)

### Step 2 - Fit the data and transform it, so we have 3 dimensions instead of 64
X_trans3 = embed3.fit_transform(dt)
    
### Step 3 - Print shape to test
print('The new shape of X: ',X_trans3.shape)

pd.DataFrame(X_trans3).to_csv('/Users/joycelin/Documents/UW/Course/STAT527/Project/isomap_data_dim3.csv')

The new shape of X:  (12000, 3)


In [71]:
fname = r"/Users/joycelin/Documents/UW/Course/STAT527/Project/isoclass_dim3.csv"
y = pd.read_csv(fname, sep=",", engine="python", encoding='utf-8')
y = y.to_numpy()
y = y.ravel()
y.shape

(12000,)

In [111]:
colors = ['red', 'green', 'indigo', 'olive', 'gold', 'blue', 'violet', 'brown', 'pink', 'gray', 'orange', 'cyan', 'navy', 'teal', 'lightgreen', 'lightskyblue', 'silver', 'tan', 'yellow', 'purple', 'moccasin', 'bisque', 'wheat', 'peachpuff', 'navajowhite', 'salmon', 'crimson', 'palevioletred', 'darksalmon', 'lightcoral', 'hotpink', 'palegoldenrod', 'plum', 'maroon', 'darkkhaki', 'orchid', 'thistle', 'lightgray', 'lightblue', 'lightyellow', 'lavender', 'linen']
fig = px.scatter_3d(None, 
                    x=X_trans3[:,0], y=X_trans3[:,1], z=X_trans3[:,2],
                    color=y.astype(str),
                    color_discrete_sequence=colors,  # Assign colors to clusters
                    height=900, width=900
                   )

# Update chart looks
# https://plotly.com/python/3d-camera-controls/
fig.update_layout(#title_text="Scatter 3D Plot",
                  showlegend=True,
                  legend=dict(orientation="h", yanchor="top", y=0.05, xanchor="center", x=0.5),
                  scene_camera=dict(up=dict(x=0, y=0, z=1), 
                                    center=dict(x=0, y=0, z=-0.2),
                                    eye=dict(x=1.5, y=1.5, z=0.1)),
                                    #eye=dict(x=1.5, y=-1.5, z=0.1)),
                                    #eye=dict(x=-1.5, y=-1.5, z=0.1)),
                                    #eye=dict(x=0, y=0.1, z=2)),
                                    #eye=dict(x=0, y=0.1, z=-2)),
                  margin=dict(l=50, r=50, b=50, t=50),  # Increase plot's margin
                  scene = dict(xaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                         ),
                               yaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          ),
                               zaxis=dict(backgroundcolor='lightgrey',
                                          color='black', 
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                         )))

# Update marker size
fig.update_traces(marker=dict(size=2))

pio.write_image(fig, '/Users/joycelin/Documents/UW/Course/STAT527/Project/images/isomap3D_k14_1.png')
#pio.write_image(fig, '/Users/joycelin/Documents/UW/Course/STAT527/Project/images/isomap3D_k14_2.png')
#pio.write_image(fig, '/Users/joycelin/Documents/UW/Course/STAT527/Project/images/isomap3D_k14_3.png')
#pio.write_image(fig, '/Users/joycelin/Documents/UW/Course/STAT527/Project/images/isomap3D_k14_4.png')

fig.show()

In [48]:
fig = px.scatter_3d(None, 
                    x=X_trans3[:,0], y=X_trans3[:,1], z=X_trans3[:,2],
                    color=y.astype(str),
                    color_discrete_sequence=colors,  # Assign colors to clusters
                    height=900, width=900
                   )

# Generate a sample dataframe for the plot
df = pd.DataFrame({'X': X_trans3[:, 0], 'Y': X_trans3[:, 1], 'Z': X_trans3[:, 2], 'Cluster': y.astype(str)})

# Create rotating frames
frames = []
for frame in range(0, 361, 10):  # Rotate in increments of 10 degrees
    frame_data = df.copy()
    frame_data['frame'] = frame
    frames.append(frame_data)

# Update the camera projection type
fig.update_layout(scene_camera=dict(projection=dict(type='orthographic')))

# Create a rotating GIF
frames = []
for frame in range(0, 361, 10):  # Rotate in increments of 10 degrees
    fig.update_layout(scene_camera=dict(eye=dict(x=-1.5, y=1.5, z=0.5),  # Update the eye parameter for rotation
                                         center=dict(x=0, y=0, z=-0.2),
                                         up=dict(x=0, y=0, z=1),
                                         projection=dict(type='orthographic')))
    image_bytes = fig.to_image(format='png')
    image_array = np.array(Image.open(io.BytesIO(image_bytes)))
    frames.append(image_array)

# Save the plot as a rotating GIF
imageio.mimsave('/Users/joycelin/Documents/UW/Course/STAT527/Project/images/isomap3D_k17.gif', frames, fps=10)

## Perturbed Data

In [2]:
fname = r"/Users/joycelin/Documents/UW/Course/STAT527/Project/data/pertubdata.csv"
dt = pd.read_csv(fname, sep=",", engine="python", encoding='utf-8', header=None)
dt.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,2.533779,-1.057613,0.597402,-3.564305,0.770448,-1.78081,-1.447337,0.201477,1.232219,0.680521,...,-0.116667,0.023516,-0.098278,0.390508,0.16349,0.622664,0.144029,0.025968,-0.624228,-0.0475
1,0.485321,1.251729,-2.628039,-0.288043,0.953708,0.237741,-0.176208,0.446149,-2.04425,-0.112325,...,-0.035348,-0.502443,0.793704,-0.531857,-0.473927,-0.143555,-0.051928,0.465785,0.562639,-0.009336
2,-0.5379,-1.710245,-0.375042,-2.365696,2.082851,0.259119,-2.126124,0.953262,-0.666788,0.75691,...,0.19361,-0.366115,0.043233,-0.711453,-0.519856,0.273942,0.161426,-0.258133,0.677012,0.309625
3,-1.95782,-1.558156,-1.114491,-0.874159,0.194179,-1.69253,0.246817,-0.190664,0.78162,0.986911,...,-0.044163,0.222698,0.041637,-0.11684,-0.454569,0.3871,-0.277755,0.036258,0.394248,0.0075
4,-0.468602,1.73035,1.700582,-0.613767,-1.483812,3.011085,1.488203,0.922072,-0.585487,-0.992135,...,-0.162807,-0.021039,-0.142828,0.095691,0.003041,-0.232639,0.280021,-0.220739,0.871271,0.244079


In [3]:
dt = dt.to_numpy()
dt

array([[ 2.5337793 , -1.05761298,  0.59740179, ...,  0.02596819,
        -0.62422837, -0.04749957],
       [ 0.48532087,  1.25172867, -2.62803885, ...,  0.46578464,
         0.56263925, -0.00933577],
       [-0.53789957, -1.71024475, -0.37504175, ..., -0.2581329 ,
         0.67701183,  0.30962486],
       ...,
       [-1.7750162 ,  1.84232413,  0.11087149, ...,  0.23190594,
         0.02590313,  0.10359356],
       [-0.77038341,  2.36572945,  1.75843504, ..., -0.47658329,
        -0.37323246, -0.26861464],
       [-3.2752993 , -0.52355284, -0.52911184, ...,  0.02653282,
        -0.15162761,  0.10058246]])

In [4]:
### Step 1 - Configure the Isomap function, note we use default hyperparameter values in this example
embed3 = Isomap(n_neighbors = 15, n_components = 3)

### Step 2 - Fit the data and transform it, so we have 3 dimensions instead of 64
X_trans3 = embed3.fit_transform(dt)
    
### Step 3 - Print shape to test
print('The new shape of X: ',X_trans3.shape)

pd.DataFrame(X_trans3).to_csv('/Users/joycelin/Documents/UW/Course/STAT527/Project/data/isomap_perturbeddata_dim3.csv')

The new shape of X:  (12000, 3)
