In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px


#this list contains the path for the three data txt files and performs PCA by looping through the three files
files=["/content/pca_a.txt","/content/pca_b.txt", "/content/pca_c.txt"]         # Please change the file path here 
# files=["/content/pca_demo.txt"]
for filename in files:
  #load each data to thte dataframe
  dt = pd.read_csv(filename,delimiter="\t",header=None)
  #preprocessing the dataset before performing the transformation and store the label column in a seperate array
  disease = dt.iloc[:,-1]     
  x=dt.iloc[:, :-1]

  #normalizing the data points
  result = x.copy()
  x = (x - np.min(x)) / (np.max(x) - np.min(x)).values


#PCA implementation
#generating mean adjusted dataset
  mean = np.mean(x)
  adjusted_x_train = x - mean

#finding the covariance matrix and generating the eigen value and eigen vector
  covariance_matrix = np.cov(adjusted_x_train.T,bias=False)
  eigenvalue, eigenvector = np.linalg.eig(covariance_matrix)

  eigen_pair = [(np.abs(eigenvalue[i]), eigenvector[:, i])for i in range(len(eigenvalue))]
  eigen_pair.sort(key=lambda k: k[0], reverse=True)
  res = [eigen_pair[i][1][:, np.newaxis] for i in range(2)]

  w = np.hstack(res)
  X_train_pca = adjusted_x_train.dot(w)
  X_train_pca['Disease'] = disease
  
  #removing the imaginery part from complex number when required before plotting
  X_train_pca[0]= [x.real for x in X_train_pca[0]]
  X_train_pca[1] = [x.real for x in X_train_pca[1]]

#Visualizing the two dimensional transformed data points using the scatter plot
  fig = px.scatter(X_train_pca, x=0, y=1, color='Disease', labels= { "0": "Principal component 1", "1":"Principal component 2", "y_train":"Disease Classification"},title = "Principal Component Analysis - Disease Classification"+filename)
  fig.show()



In [None]:
#SVD implementation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

#import package for performing SVD based transformation
from scipy import linalg

#this list contains the path for the three data txt files and performs PCA by looping through the three files
files=["/content/pca_a.txt","/content/pca_b.txt", "/content/pca_c.txt"]         # Please change the file path here
# files=["/content/pca_demo.txt"]

for filename in files:
  dt = pd.read_csv(filename,delimiter="\t",header= None)
  #preprocessing the dataset before performing the transformation and store the label column in a seperate array
  disease = dt.iloc[:,-1]     
  x=dt.iloc[:, :-1]

#Performing Normalization
  x = (x - np.min(x)) / (np.max(x) - np.min(x)).values

#Transforming by calling the SVD function
  U, s, Vh = linalg.svd(x, full_matrices=False)
#storing only the first two components seperately as we require only two dimensional datapoints
  svd_components = pd.DataFrame(U[:,0:2])
  svd_components['Disease'] = disease

#generating scatter plot 
  fig = px.scatter(svd_components, x=0, y=1, color='Disease', labels= { "0": "Component 1", "1":"Component 2", "y_train":"Disease Classification"},title = "Singular Value Decomposition - Disease Classification"+filename)
  fig.show()

 

In [None]:
#t-sne Implementation
#importing pacakges 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from pandas import DataFrame

#importing the package for tsne
from sklearn.manifold import TSNE

#this list contains the path for the three data txt files and performs PCA by looping through the three files
files=["/content/pca_a.txt","/content/pca_b.txt", "/content/pca_c.txt"]         # Please change the file path here
# files=["/content/pca_demo.txt"]

for filename in files:
  dt = pd.read_csv(filename,delimiter="\t")
#preprocessing the dataset before performing the transformation and store the label column in a seperate array
  disease = dt.iloc[:,-1]     
  x=dt.iloc[:, :-1]

#normalization
  Normalized_x = (x - np.min(x)) / (np.max(x) - np.min(x)).values

#performing fit and transform using the tsne inbuilt function
  tsne_components = TSNE(n_components=2).fit_transform(Normalized_x) 


  tsne_components = DataFrame (tsne_components,columns=[0,1])
  tsne_components['Disease'] = disease
  # print(tsne_components)

#Visualization using the scatter plot
  fig = px.scatter(tsne_components, x=0, y=1, color='Disease', labels= { "0": "Component 1", "1":"Component 2", "y_train":"Disease Classification"},title = "t-distributed Stochastic Neighbor Embedding - Disease Classification"+filename)
  fig.show()
