This Notebook shows five methods to reduce dimensionality. For a deeper dive look at [this notebook](https://www.kaggle.com/frankmollard/a-story-about-unsupervised-learning).

In [None]:
import os
from time import time
import psutil
!pip install numba==0.50 
import numpy as np 
import pandas as pd 

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap

!pip install trimap
import trimap

!pip install pacmap==0.4
import pacmap

import seaborn as sns
import matplotlib.pyplot as plt 
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings("ignore")
seed=123
np.random.seed(seed)

In [None]:
Data = pd.read_csv("../input/tabular-playground-series-dec-2021/train.csv")
Data.drop(columns=[c for c in Data.columns if "Soil_Type" in c or "Id" in c], inplace=True)

In [None]:
def Normalization(DF, cols):
    DF=DF.copy()
    for c in cols:
        DF[f"{c}"] = ((DF["{}".format(c)]-DF["{0}".format(c)].mean()) / DF["{}".format(c)].std())
        
    return DF

In [None]:
Data = Normalization(DF=Data, cols=Data.drop(columns=["Cover_Type"]).columns)
Data=Data.sample(frac=0.1, random_state=seed)

In [None]:
methods={
    "PCA": PCA(2),
    "tSNE": TSNE(n_components=2, perplexity=25),
    "UMAP": umap.UMAP(n_components=2, n_neighbors=10, min_dist=0.75),
    "TriMap": trimap.TRIMAP(verbose=False, n_inliers=5, n_outliers=7, n_random=12),
    "PaCMAP": pacmap.PaCMAP(n_dims=2, n_neighbors=7)
}

In [None]:
def viz(df, mdls, rows=5, cols=1):

    fig, p = plt.subplots(nrows=rows, ncols=cols, figsize=(30,20*rows))

    c=0
    r=0
    k=0

    for c in range(cols):
        for r in range(int(rows/cols)):
            start = time()
            print(list(mdls.values())[k], "Transforming...")
            transformed=list(mdls.values())[k].fit_transform(df[df.columns[1:]].values)
            end = time()
            print(
                list(mdls.values())[k],
                "Transformed in",
                str(round(end - start)),
                "seconds.",
                "RAM usage:",
                str(psutil.virtual_memory()[2]) + "%"
            )
            Reduced = pd.DataFrame(transformed)
            if cols != 1:
                scatter = p[r, c].scatter(
                    x=Reduced[0],
                    y=Reduced[1],
                    c=df["Cover_Type"],
                    cmap="tab10",
                    alpha=0.9
                    )
                p[r, c].set_title(list(mdls.keys())[k], fontsize=20)
                lgnd = p[r, c].legend(
                    *scatter.legend_elements(),
                    loc="upper left", 
                    title="Cover_Type",
                    title_fontsize=18,
                    fontsize=18
                )
                p[r, c].add_artist(lgnd)
                p[r, c].grid(False)

            else:
                scatter = p[r].scatter(
                    x=Reduced[0],
                    y=Reduced[1],
                    c=df["Cover_Type"],
                    cmap="tab10",
                    alpha=0.9
                    )
                p[r].set_title(list(mdls.keys())[k], fontsize=20)
                lgnd = p[r].legend(
                    *scatter.legend_elements(),
                    loc="upper left", 
                    title="Cover_Type",
                    title_fontsize=18,
                    fontsize=18
                )
                p[r].add_artist(lgnd)
                p[r].grid(False)

            k+=1

    plt.show()

In [None]:
viz(Data, methods)