# PCA y Regresión Lineal Múltiple con espectros NIR para Predecir Propiedades del Plástico.

## Importación de librerías

In [64]:
import gc

In [1]:
import pandas as pd
import numpy as np


In [9]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error

In [38]:
from sklearn.linear_model import LinearRegression

## Lectura de datos espectrales

In [22]:
df_nir_train = pd.read_excel('MATRIZ FINAL.xlsx', sheet_name='calibracion')
df_nir_val = pd.read_excel('MATRIZ FINAL.xlsx', sheet_name='validacion')

In [23]:
print(df_nir_train.shape, df_nir_val.shape)

(266, 1110) (118, 1110)


In [24]:
df_nir_train.drop('Unnamed: 0', axis=1, inplace=True)
df_nir_val.drop('Unnamed: 0', axis=1, inplace=True)

In [25]:
print(df_nir_train.shape, df_nir_val.shape)

(266, 1109) (118, 1109)


In [26]:
X_train = df_nir_train.iloc[:,:1103]
X_test = df_nir_val.iloc[:,:1103]
Y_train = df_nir_train.iloc[:,1103:]
Y_test = df_nir_val.iloc[:,1103:]

## Escalado de datos

In [28]:
Y_train

Unnamed: 0,traccion,Energia,Tensil,Elongacion,1%secante,flexion
0,221237,43,5241,8.2,240306,249878
1,193847,50,5051,9.7,218331,224118
2,182146,50,4928,10.2,207553,212136
3,182164,51,4930,10.1,210538,218948
4,194220,48,4997,9.5,212068,222927
...,...,...,...,...,...,...
261,118297,55,4241,12.9,170587,177680
262,100581,52,3771,13.2,137127,145418
263,100492,53,3770,13.3,136687,143557
264,108062,54,4041,13.3,145452,153215


In [29]:
standard_scaler = StandardScaler()
standard_scaler.fit(X_train)
X_train_scaled = standard_scaler.transform(X_train)
X_test_scaled = standard_scaler.transform(X_test)

standard_scaler.fit(Y_train)
Y_train_scaled = standard_scaler.transform(Y_train)
Y_test_scaled = standard_scaler.transform(Y_test)

In [37]:
Y_train_scaled#[0]

array([[ 0.49986232, -0.04044156,  0.61624115, -0.46741273,  0.67344567,
         0.53356789],
       [ 0.03059172,  0.91275057,  0.37245938,  0.12880747,  0.25855003,
         0.14506543],
       [-0.16988053,  0.91275057,  0.21464276,  0.32754754,  0.05505765,
        -0.03564251],
       ...,
       [-1.56885172,  1.32126148, -1.27114299,  1.55973594, -1.28291706,
        -1.06992474],
       [-1.43915554,  1.45743178, -0.9234332 ,  1.55973594, -1.11743082,
        -0.92426648],
       [-1.51717885,  1.32126148, -1.14411985,  1.59948396, -1.14071028,
        -0.91512702]])

## Reducción de dimensionalidad con PCA

Pondremos 3 componentes porque ya previamente probamos que de la cuarta en adelante tienen menos del 1% de varianza.

In [33]:
pca = PCA(n_components=3)
pca.fit(X_train)
X_train_pca = pca.transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [35]:
pca.explained_variance_ratio_*100

array([76.71609604, 20.79538914,  1.24439013])

## Separación de variables de salida

In [62]:
traccion = Y_train_scaled.T[0].reshape(-1,1)
energia = Y_train_scaled.T[1].reshape(-1,1)
tensil = Y_train_scaled.T[2].reshape(-1,1)
elongacion = Y_train_scaled.T[3].reshape(-1,1)
secante = Y_train_scaled.T[4].reshape(-1,1)
flexion = Y_train_scaled.T[5].reshape(-1,1)

In [39]:
regression = LinearRegression()

In [42]:
Y_train.columns

Index(['traccion', 'Energia', 'Tensil', 'Elongacion', '1%secante', 'flexion'], dtype='object')

### Regresión Lineal para Tracción

In [47]:
Y_train_scaled.T[0].shape

(266,)

In [46]:
X_train_pca.shape

(266, 3)

In [72]:
regression.fit(X_train_pca, traccion)
print(regression.score(X_train_pca, traccion))
y = regression.predict(X_train_pca)
print(r2_score(traccion, y))
print(mean_squared_error(traccion, y))

0.1457016441101885
0.1457016441101885
0.8542983558898115


### Regresión Lineal para Energía

In [71]:
regression.fit(X_train_pca, energia)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, energia))
print(r2_score(energia, y))
print(mean_squared_error(energia, y))

0.49189633019367285
0.49189633019367285
0.5081036698063273


In [65]:
gc.collect()

1155

### Tensil

In [68]:
regression.fit(X_train_pca, tensil)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, tensil))
print(r2_score(tensil, y))
print(mean_squared_error(tensil, y))

0.12163727927395829
0.12163727927395829
0.8783627207260417


### Elongación

In [73]:
regression.fit(X_train_pca, elongacion)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, elongacion))
print(r2_score(elongacion, y))
print(mean_squared_error(elongacion, y))

0.2782245646747912
0.2782245646747912
0.7217754353252088


### Secante

In [74]:
regression.fit(X_train_pca, secante)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, secante))
print(r2_score(secante, y))
print(mean_squared_error(secante, y))

0.1159226819190673
0.1159226819190673
0.8840773180809327


### Flexión

In [75]:
regression.fit(X_train_pca, flexion)
y = regression.predict(X_train_pca)
print(regression.score(X_train_pca, flexion))
print(r2_score(flexion, y))
print(mean_squared_error(flexion, y))

0.06663115811873255
0.06663115811873255
0.9333688418812675
