<a href="https://colab.research.google.com/github/rutuja-patil24/CMPE-255-Data_Mining/blob/main/Assignment_10/Dimensionality_Reduction_Tabular_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install numpy pandas matplotlib scikit-learn tensorflow umap-learn plotly




In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, fetch_openml
from sklearn.preprocessing import StandardScaler
import plotly.express as px

# Load Iris dataset (tabular data)
iris = load_iris()
iris_data = iris.data
iris_labels = iris.target
iris_feature_names = iris.feature_names

# Standardize Iris dataset
scaler = StandardScaler()
iris_data_scaled = scaler.fit_transform(iris_data)

# Display Iris dataset in DataFrame format
iris_df = pd.DataFrame(iris_data, columns=iris_feature_names)
iris_df['target'] = iris_labels
iris_df['target_name'] = iris_df['target'].map(lambda x: iris.target_names[x])
iris_df.head()


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,target_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [17]:
from sklearn.decomposition import PCA

# Apply PCA
pca = PCA(n_components=2)
iris_pca = pca.fit_transform(iris_data_scaled)

# Create a DataFrame for visualization
pca_df = pd.DataFrame(iris_pca, columns=['PC1', 'PC2'])
pca_df['target'] = iris_labels
pca_df['target_name'] = pca_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(pca_df, x='PC1', y='PC2', color='target_name',
                 title="PCA on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [18]:
from sklearn.decomposition import KernelPCA

# Apply Kernel PCA with RBF kernel
kernel_pca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
iris_kernel_pca = kernel_pca.fit_transform(iris_data_scaled)

# Create DataFrame
kernel_pca_df = pd.DataFrame(iris_kernel_pca, columns=['KPCA1', 'KPCA2'])
kernel_pca_df['target'] = iris_labels
kernel_pca_df['target_name'] = kernel_pca_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(kernel_pca_df, x='KPCA1', y='KPCA2', color='target_name',
                 title="Kernel PCA on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [26]:
from sklearn.decomposition import IncrementalPCA

# Apply Incremental PCA
ipca = IncrementalPCA(n_components=2, batch_size=10)
iris_ipca = ipca.fit_transform(iris_data_scaled)

# Create DataFrame
ipca_df = pd.DataFrame(iris_ipca, columns=['IPCA1', 'IPCA2'])
ipca_df['target'] = iris_labels
ipca_df['target_name'] = ipca_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(ipca_df, x='IPCA1', y='IPCA2', color='target_name',
                 title="Incremental PCA on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [19]:
from sklearn.manifold import TSNE

# Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
iris_tsne = tsne.fit_transform(iris_data_scaled)

# Create DataFrame
tsne_df = pd.DataFrame(iris_tsne, columns=['tSNE1', 'tSNE2'])
tsne_df['target'] = iris_labels
tsne_df['target_name'] = tsne_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(tsne_df, x='tSNE1', y='tSNE2', color='target_name',
                 title="t-SNE on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [20]:
import umap

# Apply UMAP
umap_model = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, random_state=42)
iris_umap = umap_model.fit_transform(iris_data_scaled)

# Create DataFrame
umap_df = pd.DataFrame(iris_umap, columns=['UMAP1', 'UMAP2'])
umap_df['target'] = iris_labels
umap_df['target_name'] = umap_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(umap_df, x='UMAP1', y='UMAP2', color='target_name',
                 title="UMAP on Iris Dataset", labels={'color': 'Target'})
fig.show()



n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define Autoencoder
input_dim = iris_data_scaled.shape[1]
encoding_dim = 2

autoencoder = Sequential([
    Dense(encoding_dim, activation='relu', input_shape=(input_dim,)),
    Dense(input_dim, activation='sigmoid')
])

autoencoder.compile(optimizer='adam', loss='mse')

# Train Autoencoder
autoencoder.fit(iris_data_scaled, iris_data_scaled, epochs=50, batch_size=16, verbose=0)

# Extract Encoder
encoder = Sequential(autoencoder.layers[:1])
iris_autoencoded = encoder.predict(iris_data_scaled)

# Create DataFrame
autoencoder_df = pd.DataFrame(iris_autoencoded, columns=['Encoded1', 'Encoded2'])
autoencoder_df['target'] = iris_labels
autoencoder_df['target_name'] = autoencoder_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(autoencoder_df, x='Encoded1', y='Encoded2', color='target_name',
                 title="Autoencoder on Iris Dataset", labels={'color': 'Target'})
fig.show()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


In [22]:
from sklearn.manifold import Isomap

# Apply ISOMAP
isomap = Isomap(n_neighbors=10, n_components=2)
iris_isomap = isomap.fit_transform(iris_data_scaled)

# Create DataFrame
isomap_df = pd.DataFrame(iris_isomap, columns=['ISOMAP1', 'ISOMAP2'])
isomap_df['target'] = iris_labels
isomap_df['target_name'] = isomap_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(isomap_df, x='ISOMAP1', y='ISOMAP2', color='target_name',
                 title="ISOMAP on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [23]:
from sklearn.manifold import Isomap

# Apply ISOMAP
isomap = Isomap(n_neighbors=10, n_components=2)
iris_isomap = isomap.fit_transform(iris_data_scaled)

# Create DataFrame
isomap_df = pd.DataFrame(iris_isomap, columns=['ISOMAP1', 'ISOMAP2'])
isomap_df['target'] = iris_labels
isomap_df['target_name'] = isomap_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(isomap_df, x='ISOMAP1', y='ISOMAP2', color='target_name',
                 title="ISOMAP on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [24]:
from sklearn.manifold import MDS

# Apply MDS
mds = MDS(n_components=2, random_state=42)
iris_mds = mds.fit_transform(iris_data_scaled)

# Create DataFrame
mds_df = pd.DataFrame(iris_mds, columns=['MDS1', 'MDS2'])
mds_df['target'] = iris_labels
mds_df['target_name'] = mds_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(mds_df, x='MDS1', y='MDS2', color='target_name',
                 title="MDS on Iris Dataset", labels={'color': 'Target'})
fig.show()


In [25]:
from sklearn.manifold import MDS

# Apply MDS
mds = MDS(n_components=2, random_state=42)
iris_mds = mds.fit_transform(iris_data_scaled)

# Create DataFrame
mds_df = pd.DataFrame(iris_mds, columns=['MDS1', 'MDS2'])
mds_df['target'] = iris_labels
mds_df['target_name'] = mds_df['target'].map(lambda x: iris.target_names[x])

# Interactive Plotly scatter plot
fig = px.scatter(mds_df, x='MDS1', y='MDS2', color='target_name',
                 title="MDS on Iris Dataset", labels={'color': 'Target'})
fig.show()
