In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.io as pio


In [2]:
data=pd.read_csv("../data/processed/data_milk.csv",index_col=0)
data.head()


Unnamed: 0,pH,Temprature,Taste,Odor,Fat,Turbidity,Colour,Grade
0,6.6,35,1,0,1,0,254,high
1,6.6,36,0,1,0,1,253,high
2,8.5,70,1,1,1,1,246,low
3,9.5,34,1,1,0,1,255,low
4,6.6,37,0,0,0,0,255,medium


In [3]:
Xr=data.drop(columns='Grade')
y=data['Grade']


In [4]:
features = ['pH', 'Temprature', 'Taste', 'Odor', 'Fat ', 'Turbidity', 'Colour']

fig = px.scatter_matrix(
    data,
    dimensions=features,
    color="Grade",
    height=800,
    template='plotly_dark'
)
fig.update_traces(diagonal_visible=False)
fig.update_layout(title="Projections from features")
print(f"Actual dimension {(fig.layout.width,fig.layout.height)}")
fig.show()

pio.write_image(fig, 
                '../reports/Projections from features.png',
                scale=1)

Actual dimension (None, 800)


In [6]:
import plotly.express as px
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
components = pca.fit_transform(data[features])
labels = {
    str(i): f"PC {i+1} ({var:.1f}%)"
    for i, var in enumerate(pca.explained_variance_ratio_ * 100)
}

fig = px.scatter_matrix(
    components,
    labels=labels,
    dimensions=range(3),
    color=data['Grade'],
    height=600,
    template='plotly_dark'
)
fig.update_traces(diagonal_visible=False)
fig.update_layout(title="PCA Projectios")
fig.show()

pio.write_image(fig, 
                '../reports/Projections from PCA.png',
                scale=1)


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.



In [7]:
import plotly.express as px
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
components = pca.fit_transform(Xr)

total_var = pca.explained_variance_ratio_.sum() * 100

fig = px.scatter_3d(
    components, x=0, y=1, z=2, color=y,
    title=f'Total Explained Variance: {total_var:.2f}%',
    labels={'0': 'PC 1', '1': 'PC 2', '2': 'PC 3'},
    template='plotly_dark'
)
fig.update_layout(title="PCA Projections 3D")
fig.show()

pio.write_image(fig, 
                '../reports/Projections from PCA 3D.png',
                scale=1)


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.



In [8]:
from sklearn.manifold import TSNE


tsne = TSNE(n_components=2, random_state=0)
projections = tsne.fit_transform(data[features])

fig = px.scatter(
    projections, x=0, y=1,
    color=data['Grade'], labels={'color': 'Grade'},
    template='plotly_dark',
    width=500
)
fig.update_layout(title="t-SNE Projectios")
fig.show()

pio.write_image(fig, 
                '../reports/Projections from t-SNE.png',
                scale=1)


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.



In [9]:
tsne = TSNE(n_components=3, random_state=0)
projections = tsne.fit_transform(data[features])

fig = px.scatter_3d(
    projections, x=0, y=1, z=2,
    color=data['Grade'], labels={'color': 'Grade'},
    width=500,
    template='plotly_dark'
)
fig.update_traces(marker_size=8)
fig.update_layout(title="t-SNE Projections 3D")
fig.show()

pio.write_image(fig, 
                '../reports/Projections from t-SNE 3D.png',
                scale=1)


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.

