In [1]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, LatentDirichletAllocation, FastICA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import pandas as pd
import numpy as np

In [2]:
def pca_graph(X, y):
    
    graph_func(X, y, model=PCA, model_name="PCA", x_axis="Principal Component 1", y_axis="Principal Component 2")

In [3]:
def lda_graph(X, y):
    
    graph_func(X, y, model=LinearDiscriminantAnalysis, model_name="LDA")

In [4]:
def ldia_graph(X, y):
    
    graph_func(X, y, model=LatentDirichletAllocation)

In [5]:
def ica_graph(X, y):
    graph_func(X, y, model=FastICA)

In [6]:
def graph_func(X, y, model, model_name="Visualisation", x_axis="Axis 1", y_axis="Axis 2"):
    if not isinstance(X, pd.DataFrame):
        raise TypeError("Only DataFrame obj is accepted for X")
    if not isinstance(y, pd.Series):
        raise TypeError("Only Series obj is accepted for y")
    if not (X.index == y.index).all():
        raise IndexError("Indices of X and y must match. NaN values are to be expected otherwise")
    model =  model(n_components=2)
    X_r = model.fit(X, y).transform(X)
    X_rDf = pd.DataFrame(data = X_r, columns = ['axis 1', 'axis 2'])
    data = pd.concat([X_rDf, y], axis=1)
    rat1, rat2 = model.explained_variance_ratio_
    fig = plt.figure(figsize = (8,8))
    ax = fig.add_subplot(1,1,1) 
    ax.set_xlabel('%s (%.2f%%)'%(x_axis, rat1*100) , fontsize = 15)
    ax.set_ylabel('%s (%.2f%%)'%(y_axis, rat2*100) , fontsize = 15)
    ax.set_title('2 Component %s'%model_name, fontsize = 20)

    targets = y.unique()
    targets.sort(kind="quicksort")
    colors = ['r', 'g', 'b', "y", "b"][:len(targets)]
    
    for target, color in zip(targets,colors):
        ax.scatter(data.loc[data[y.name or 0] == target, 'axis 1']
                   , data.loc[data[y.name or 0] == target, 'axis 2']
                   , c = color
                   , s = 50)
    ax.legend(targets)
    ax.grid()