In [146]:
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LinearRegression



class Data(object):

    def __init__(self, path):
        self.dataset = self.build_dataset(path)

    @staticmethod
    def build_dataset(path):
        
        structure = [('x1', float),('x2', float),('x3', float),('x4', float),
                     ('x5', float),('x6', float),('y', float)]
        
        with open(path, encoding="utf8") as data_csv:
            data_gen = ( ( float(line.split(';')[0]), float(line.split(';')[1]), float(line.split(';')[2]), 
                           float(line.split(';')[3]), float(line.split(';')[4]), float(line.split(';')[5]), 
                           float(line.split(';')[6]) ) for i, line in enumerate(data_csv) )
            data = np.fromiter(data_gen, structure)

        return data

    def split(self, porc):
        X = np.column_stack((self.dataset['x1'],self.dataset['x2'],self.dataset['x3'],
                             self.dataset['x4'],self.dataset['x5'],self.dataset['x6']))
        y = self.dataset['y']
        
        idx = np.random.permutation(X.shape[0])
        
        X_train = X[0:int(porc * X.shape[0])]
        y_train = y[0:int(porc * X.shape[0])]
        X_test  = X[int(porc * X.shape[0]):X.shape[0]]
        y_test  = y[int(porc * X.shape[0]):X.shape[0]]

        return X_train, X_test, y_train, y_test

class Imp_NaN(object):

    def __init__(self, X):
        self.X = X
        
    def nan_mice(self):
        imp = IterativeImputer(estimator = LinearRegression(), missing_values = np.nan, max_iter = 10, random_state = 0)
        return imp.fit_transform(self.X)
    
    def nan_mean(self):
        imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
        return imp.fit_transform(self.X)

class pca_a_mano(object):
    
    def __init__(self, X, n_comp = 3):
        self.X = X
        self.components = n_comp
    
    def pca(self):
        #d = self.X.T.shape[0]
        X_centered = self.X- np.mean(self.X, axis = 0)
        S = np.cov(X_centered.T)
        eigvalue, eigvector = np.linalg.eig(S)
        idx = eigvalue.argsort()[::-1]
        eigvalue = eigvalue[idx]
        eigvector = eigvector[:,idx]
        #z = X_centered.dot(eigvector[:, :n_comp])
        return eigvector[:, :self.components]

#z_pca = pca_a_mano(X)
#print('Proyeccion a la 1er componente (a mano):\n', z_pca)

In [147]:
path = 'clase3v2.csv'

dataset = Data(path)

In [148]:
X_train, X_test, y_train, y_test = dataset.split(0.8)

In [149]:
X_train.shape
X_train_mice=Imp_NaN(X_train).nan_mice()
X_train_mice.shape

(80, 6)

In [150]:
X_train_mean=Imp_NaN(X_train).nan_mean()

In [151]:
X_train_mean.shape

(80, 6)

In [152]:
pca_a_mano(X_train_mean).pca().shape

(6, 3)