# PCA y Regresión Lineal Múltiple con espectros NIR para Predecir Propiedades del Plástico.

## Importación de librerías

In [1]:
import gc

In [2]:
import pandas as pd
import numpy as np


In [3]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error

In [4]:
from sklearn.linear_model import LinearRegression

## Lectura de datos espectrales

In [5]:
df_nir_train = pd.read_excel('MATRIZ FINAL.xlsx', sheet_name='calibracion')
df_nir_val = pd.read_excel('MATRIZ FINAL.xlsx', sheet_name='validacion')

In [6]:
print(df_nir_train.shape, df_nir_val.shape)

(266, 1110) (118, 1110)


## Eliminar columna inútil

In [7]:
df_nir_train.drop('Unnamed: 0', axis=1, inplace=True)
df_nir_val.drop('Unnamed: 0', axis=1, inplace=True)

In [8]:
print(df_nir_train.shape, df_nir_val.shape)

(266, 1109) (118, 1109)


In [9]:
X_train = df_nir_train.iloc[:,:1103]
X_test = df_nir_val.iloc[:,:1103]
Y_train = df_nir_train.iloc[:,1103:]
Y_test = df_nir_val.iloc[:,1103:]

## Escalado de datos

In [10]:
Y_train

Unnamed: 0,traccion,Energia,Tensil,Elongacion,1%secante,flexion
0,221237,43,5241,8.2,240306,249878
1,193847,50,5051,9.7,218331,224118
2,182146,50,4928,10.2,207553,212136
3,182164,51,4930,10.1,210538,218948
4,194220,48,4997,9.5,212068,222927
...,...,...,...,...,...,...
261,118297,55,4241,12.9,170587,177680
262,100581,52,3771,13.2,137127,145418
263,100492,53,3770,13.3,136687,143557
264,108062,54,4041,13.3,145452,153215


In [11]:
standard_scaler = StandardScaler()
standard_scaler.fit(X_train)
X_train_scaled = standard_scaler.transform(X_train)
X_test_scaled = standard_scaler.transform(X_test)

standard_scaler.fit(Y_train)
Y_train_scaled = standard_scaler.transform(Y_train)
Y_test_scaled = standard_scaler.transform(Y_test)

In [12]:
Y_train_scaled

array([[ 0.49986232, -0.04044156,  0.61624115, -0.46741273,  0.67344567,
         0.53356789],
       [ 0.03059172,  0.91275057,  0.37245938,  0.12880747,  0.25855003,
         0.14506543],
       [-0.16988053,  0.91275057,  0.21464276,  0.32754754,  0.05505765,
        -0.03564251],
       ...,
       [-1.56885172,  1.32126148, -1.27114299,  1.55973594, -1.28291706,
        -1.06992474],
       [-1.43915554,  1.45743178, -0.9234332 ,  1.55973594, -1.11743082,
        -0.92426648],
       [-1.51717885,  1.32126148, -1.14411985,  1.59948396, -1.14071028,
        -0.91512702]])

## Reducción de dimensionalidad con PCA

Pondremos 3 componentes porque ya previamente probamos que de la cuarta en adelante tienen menos del 1% de varianza.

In [42]:
pca = PCA(n_components=0.99)
pca.fit(X_train_scaled)
X_nir_pca = pca.transform(X_train_scaled)

In [43]:
pca.n_components_

4

In [44]:
np.sum(pca.explained_variance_ratio_)*100

99.33610219134823

In [45]:
# pca = PCA(n_components=3)
# pca.fit(X_train)
X_train_pca = pca.transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [46]:
np.sum(pca.explained_variance_ratio_)*100

99.33610219134823

## Separación de variables de salida

In [47]:
traccion = Y_train_scaled.T[0].reshape(-1,1)
energia = Y_train_scaled.T[1].reshape(-1,1)
tensil = Y_train_scaled.T[2].reshape(-1,1)
elongacion = Y_train_scaled.T[3].reshape(-1,1)
secante = Y_train_scaled.T[4].reshape(-1,1)
flexion = Y_train_scaled.T[5].reshape(-1,1)

In [48]:
traccion_test = Y_test_scaled.T[0].reshape(-1,1)
energia_test = Y_test_scaled.T[1].reshape(-1,1)
tensil_test = Y_test_scaled.T[2].reshape(-1,1)
elongacion_test = Y_test_scaled.T[3].reshape(-1,1)
secante_test = Y_test_scaled.T[4].reshape(-1,1)
flexion_test = Y_test_scaled.T[5].reshape(-1,1)

In [49]:
regression = LinearRegression()

In [50]:
Y_train.columns

Index(['traccion', 'Energia', 'Tensil', 'Elongacion', '1%secante', 'flexion'], dtype='object')

### Regresión Lineal para Tracción

In [51]:
Y_train_scaled.T[0].shape

(266,)

In [52]:
X_train_pca.shape

(266, 4)

In [53]:
regression.fit(X_train_pca, traccion)
print("r-cuadrado en train:", regression.score(X_train_pca, traccion))
print("r-cuadrado en test:", regression.score(X_test_pca, traccion_test))

r-cuadrado en train: 0.3026152778549782
r-cuadrado en test: 0.3367576685183159


### Regresión Lineal para Energía

In [54]:
regression.fit(X_train_pca, energia)
print("r-cuadrado en train:", regression.score(X_train_pca, energia))
print("r-cuadrado en test:", regression.score(X_test_pca, energia_test))

r-cuadrado en train: 0.5236114205335658
r-cuadrado en test: 0.5794675419167649


In [55]:
regression.fit(X_train_pca, energia)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, energia))
print(r2_score(energia, y))
print(mean_squared_error(energia, y))

0.5236114205335658
0.5236114205335658
0.47638857946643437


In [56]:
gc.collect()

5412

### Tensil

In [57]:
regression.fit(X_train_pca, traccion)
print("r-cuadrado en train:", regression.score(X_train_pca, tensil))
print("r-cuadrado en test:", regression.score(X_test_pca, tensil_test))

r-cuadrado en train: 0.14954576111514473
r-cuadrado en test: 0.2037446293366244


In [58]:
regression.fit(X_train_pca, tensil)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, tensil))
print(r2_score(tensil, y))
print(mean_squared_error(tensil, y))

0.23794626312221456
0.23794626312221456
0.7620537368777854


### Elongación

In [59]:
regression.fit(X_train_pca, elongacion)
print("r-cuadrado en train:", regression.score(X_train_pca, elongacion))
y = regression.predict(X_train_pca)
print("r-cuadrado en test:", regression.score(X_test_pca, elongacion_test))


r-cuadrado en train: 0.403796990048078
r-cuadrado en test: 0.44969411156269956


In [60]:
regression.fit(X_train_pca, elongacion)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, elongacion))
print(r2_score(elongacion, y))
print(mean_squared_error(elongacion, y))

0.403796990048078
0.403796990048078
0.596203009951922


### Secante

In [61]:
regression.fit(X_train_pca, secante)
print("r-cuadrado en train:", regression.score(X_train_pca, secante))
print("r-cuadrado en test:", regression.score(X_test_pca, secante_test))

r-cuadrado en train: 0.26374455931215146
r-cuadrado en test: 0.34695510322332224


In [62]:
regression.fit(X_train_pca, secante)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, secante))
print(r2_score(secante, y))
print(mean_squared_error(secante, y))

0.26374455931215146
0.26374455931215146
0.7362554406878485


### Flexión

In [63]:
regression.fit(X_train_pca, flexion)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, flexion))
print(r2_score(flexion, y))
print(mean_squared_error(flexion, y))

0.14881009146763557
0.14881009146763557
0.8511899085323644


## Regresión Polinomial para Tracción

In [64]:
from sklearn.preprocessing import PolynomialFeatures

# Crear instancias de PolynomialFeatures para generar características polinomiales
poly_features = PolynomialFeatures(degree=2)

# Transformar los datos de entrenamiento y prueba en características polinomiales
X_train_poly = poly_features.fit_transform(X_train_pca)
X_test_poly = poly_features.transform(X_test_pca)

# Crear un modelo de regresión lineal
regression = LinearRegression()

# Entrenar el modelo con los datos de entrenamiento polinomiales
regression.fit(X_train_poly, traccion)

# Predecir los valores de tracción en el conjunto de entrenamiento y prueba
y_train_pred = regression.predict(X_train_poly)
y_test_pred = regression.predict(X_test_poly)

# Calcular el coeficiente de determinación (R-cuadrado) en el conjunto de entrenamiento y prueba
r2_train = r2_score(traccion, y_train_pred)
r2_test = r2_score(traccion_test, y_test_pred)

# Imprimir los resultados
print("R-cuadrado en el conjunto de entrenamiento:", r2_train)
print("R-cuadrado en el conjunto de prueba:", r2_test)


R-cuadrado en el conjunto de entrenamiento: 0.5072367950192493
R-cuadrado en el conjunto de prueba: 0.4599124903201305


In [65]:
X_train_pca.shape

(266, 4)

In [74]:
import tensorflow as tf

# Definir la arquitectura de la red neuronal
model = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation='tanh', input_shape=(4,)),
    tf.keras.layers.Dense(4, activation='tanh'),
    tf.keras.layers.Dense(1)
])

# Compilar el modelo
model.compile(optimizer='rmsprop', loss='mse', metrics=['mean_squared_error'])

# Entrenar la red neuronal
model.fit(X_train_pca, traccion, epochs=220, verbose=2)

# Evaluar el modelo en el conjunto de entrenamiento y prueba
train_loss = model.evaluate(X_train_pca, traccion)
test_loss = model.evaluate(X_test_pca, traccion_test)

# Imprimir los resultados
print("Pérdida en el conjunto de entrenamiento:", train_loss)
print("Pérdida en el conjunto de prueba:", test_loss)


Epoch 1/220
9/9 - 4s - loss: 1.6970 - mean_squared_error: 1.6970 - 4s/epoch - 394ms/step
Epoch 2/220
9/9 - 0s - loss: 1.1223 - mean_squared_error: 1.1223 - 151ms/epoch - 17ms/step
Epoch 3/220
9/9 - 0s - loss: 1.0462 - mean_squared_error: 1.0462 - 142ms/epoch - 16ms/step
Epoch 4/220
9/9 - 0s - loss: 1.0391 - mean_squared_error: 1.0391 - 158ms/epoch - 18ms/step
Epoch 5/220
9/9 - 0s - loss: 1.0180 - mean_squared_error: 1.0180 - 125ms/epoch - 14ms/step
Epoch 6/220
9/9 - 0s - loss: 1.0012 - mean_squared_error: 1.0012 - 156ms/epoch - 17ms/step
Epoch 7/220
9/9 - 0s - loss: 0.9754 - mean_squared_error: 0.9754 - 111ms/epoch - 12ms/step
Epoch 8/220
9/9 - 0s - loss: 0.9720 - mean_squared_error: 0.9720 - 155ms/epoch - 17ms/step
Epoch 9/220
9/9 - 0s - loss: 0.9551 - mean_squared_error: 0.9551 - 149ms/epoch - 17ms/step
Epoch 10/220
9/9 - 0s - loss: 0.9487 - mean_squared_error: 0.9487 - 126ms/epoch - 14ms/step
Epoch 11/220
9/9 - 0s - loss: 0.9392 - mean_squared_error: 0.9392 - 152ms/epoch - 17ms/step

KeyboardInterrupt: 

In [73]:
print("r-cuadrado train:", r2_score(traccion, model.predict(X_train_pca)))
print("r-cuadrado test:", r2_score(traccion_test, model.predict(X_test_pca)))

r-cuadrado train: 0.18207044348678347
r-cuadrado test: 0.18627860575847688
