# Principle Component Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("./winequality-red.csv")
pd.set_option('display.float_format', lambda x: '%.2f' % x)
df.head()

In [None]:
df.describe().T

In [None]:
y = df['quality']
df = df.drop('quality', axis=1)

In [None]:
def compute_pca(df, n_components):
    # Standardize the data
    X_std = StandardScaler().fit_transform(df)
    # Create covariance matrix
    cov_mat = np.cov(X_std.T)
    # Calculate eigenvalues and eigenvectors
    eig_vals, eig_vecs = np.linalg.eig(cov_mat)
    # Sort eigenvalues and eigenvectors
    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
    eig_pairs.sort(key=lambda x: x[0], reverse=True)
    # Create matrix with eigenvectors
    matrix_w = np.hstack([eig_pairs[i][1].reshape(df.shape[1],1) for i in range(n_components)])
    # Project data onto new subspace
    Y = X_std.dot(matrix_w)
    return Y

In [None]:
def compute_pca(X, n_components=2):
  
    #Normalization
    X_demeaned = StandardScaler(with_std=True).fit_transform(X)
    print("Matrix X After Normalization:- \n",X_demeaned[:10])
    
    #Calculation of covariance matrix 
    covariance_matrix = np.cov(X_demeaned, rowvar=False)
    print("\nCovariance matrix:-\n",covariance_matrix)
    
    #eigen value and eigen vector calculation
    eigen_vals, eigen_vecs = np.linalg.eigh(covariance_matrix)
    print(f"\nEigenvalues\n{eigen_vals}\n\nEigenvectors\n{eigen_vecs}\n")
   
    idx_sorted = np.argsort(eigen_vals)
    
    #Decreasing order
    decreasing_index = idx_sorted[::-1]

    # sort eigenvectors using the idx_sorted_decreasing indices
    eigen_vecs_sorted = eigen_vecs[:,decreasing_index]

    # n_components is desired dimension
    # of rescaled data array, or eigen_vecs_sorted
    eigen_vecs_subset = eigen_vecs_sorted[:,:n_components]

    # transform the data by multiplying the transpose of the eigenvectors 
    # with the transpose of the de-meaned data
    # Then take the transpose of that product.
    X_reduced = np.dot(eigen_vecs_subset.T,X_demeaned.T).T
    return X_reduced

In [None]:
# df = compute_pca(df, 2)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)
X_train.shape[1]

In [None]:
# CNN using tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

model = Sequential([
	    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
	    layers.Dense(64, activation='relu'),
	    layers.Dense(1, activation='sigmoid'),
])
model.compile(optimizer='adam',
	      loss='binary_crossentropy',
		metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, validation_split=0.2)