In [99]:
import numpy as np
import random
import math
from sklearn.decomposition import PCA
from sklearn.decomposition import NMF
from timeit import default_timer as timer

# LLenado de datos

In [93]:
# Retorna un array bidimensional con la base de datos con nindep 
# parámetros aleatorios y hasta nfeat rellean con datos
# dependientes de los anteriores
def fill_data(ndata, nfeat, nindep, max_val, max_err, max_mult):
    data = np.zeros((ndata, nfeat))
    for j in range(nfeat):
        if j < (nindep):
            for i in range (ndata):
                random.seed()
                data[i][j] = random.uniform(0, max_val)
        else:
            random.seed()
            mult = random.uniform(1, max_mult)
            for i in range (ndata):
                random.seed()
                err = random.uniform(0, max_err)
                data[i][j] = data[i][j%nindep] * mult + err
    return data

In [94]:
def compute_error(database, recover):
    [ndata, nfeat] = database.shape
    error = 0
    for i in range(ndata):
        for j in range(nfeat):
            error = error + math.sqrt((database[i][j] - recover[i][j])**2)
    return error/(ndata*nfeat*absmax_val)

In [96]:
max_val = 10
max_err = 0.5
max_mult = 2
ndata = 5000
nfeat = 15
nindep = 5
database = fill_data(ndata, nfeat, nindep, max_val, max_err, max_mult)

## PCA

In [106]:
start = timer()
for i in range(nfeat):
    lapstart = timer()
    pca = PCA(n_components = i)
    reduced_data = pca.fit_transform(database)
    recover = pca.inverse_transform(reduced_data)
    error = compute_error(database, recover)
    lapend = timer()
    if (i == nindep):
        print("%2.2f" % (error*100) + " %" + "    <---------"+ "    Time --> %.2f" % (lapend - lapstart) + " s")
    else:
        print("%2.2f" % (error*100) + " %"+"              "+  "    Time --> %.2f" % (lapend - lapstart) + " s")
end = timer()
print("\nTiempo --> %.2f" % (end - start) + " s")

32.54 %                  Time --> 0.25 s
27.00 %                  Time --> 0.26 s
20.05 %                  Time --> 0.25 s
12.44 %                  Time --> 0.26 s
7.07 %                  Time --> 0.27 s
0.72 %    <---------    Time --> 0.26 s
0.64 %                  Time --> 0.26 s
0.56 %                  Time --> 0.25 s
0.47 %                  Time --> 0.26 s
0.35 %                  Time --> 0.26 s
0.26 %                  Time --> 0.26 s
0.20 %                  Time --> 0.24 s
0.15 %                  Time --> 0.24 s
0.12 %                  Time --> 0.25 s
0.07 %                  Time --> 0.26 s

Tiempo --> 3.83 s


In [103]:
pca = PCA(n_components = nindep)
reduced_data = pca.fit_transform(database)
recover = pca.inverse_transform(reduced_data)
error = compute_error(database, recover)
print(error)

0.007219864838643564


# NMF

In [108]:
# Le cuesta mucho calcular los últimos términos, lo ejecuto limitado a 200 iteraciones
start = timer()
for i in range(1, nfeat):
    lapstart = timer()
    model = NMF(n_components=i, init='random', random_state=0)
    reduced_data = model.fit_transform(database)
    recover = model.inverse_transform(reduced_data)
    error = compute_error(database, recover)
    lapend = timer()
    if (i == nindep):
        print("%2.2f" % (error*100) + " %" + "    <---------"+ "  Time --> %.2f" % (lapend - lapstart) + " s")
    else:
        print("%2.2f" % (error*100) + " %"+"              "+ "  Time --> %.2f" % (lapend - lapstart) + " s")
    
end = timer()
print("\nTiempoTotal --> %.2f" % (end - start) + " s")

28.17 %                Time --> 0.25 s




22.17 %                Time --> 0.35 s
16.27 %                Time --> 0.35 s




8.84 %                Time --> 0.38 s




0.75 %    <---------  Time --> 0.43 s




0.75 %                Time --> 0.49 s




0.75 %                Time --> 0.50 s




0.76 %                Time --> 0.53 s




0.74 %                Time --> 0.68 s




0.74 %                Time --> 0.65 s




0.67 %                Time --> 0.76 s




0.69 %                Time --> 0.85 s




0.69 %                Time --> 0.87 s




0.68 %                Time --> 0.92 s

TiempoTotal --> 8.02 s


In [109]:
# Le cuesta mucho calcular los últimos términos, le dejo ejecutar hasta el final
start = timer()
for i in range(1, nfeat):
    lapstart = timer()
    model = NMF(n_components=i, init='random', random_state=0, max_iter = 10000)
    reduced_data = model.fit_transform(database)
    recover = model.inverse_transform(reduced_data)
    error = compute_error(database, recover)
    lapend = timer()
    if (i == nindep):
        print("%2.2f" % (error*100) + " %" + "    <---------"+ "  Time --> %.2f" % (lapend - lapstart) + " s")
    else:
        print("%2.2f" % (error*100) + " %"+"              "+ "  Time --> %.2f" % (lapend - lapstart) + " s")
    
end = timer()
print("\nTiempoTotal --> %.2f" % (end - start) + " s")

28.17 %                Time --> 0.25 s
22.18 %                Time --> 0.69 s
16.27 %                Time --> 0.36 s
8.84 %                Time --> 0.39 s
0.75 %    <---------  Time --> 0.54 s
0.75 %                Time --> 0.73 s
0.68 %                Time --> 5.10 s
0.60 %                Time --> 10.25 s
0.50 %                Time --> 9.66 s
0.48 %                Time --> 8.86 s
0.34 %                Time --> 12.97 s
0.33 %                Time --> 10.59 s
0.30 %                Time --> 9.06 s
0.29 %                Time --> 10.86 s

TiempoTotal --> 80.32 s


In [110]:
model = NMF(n_components=nindep, init='random', random_state=0, max_iter=10000)
reduced_data = model.fit_transform(database)
recover = model.inverse_transform(reduced_data)
error = compute_error(database, recover)
print(error)

0.00748678938269072
