# PCA Dimensionality Reduction with Wine Dataset 

In [None]:
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_profiling

%matplotlib inline

from IPython.display import display
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Load and pre-process data

In [None]:
# Load wine dataset
wine = datasets.load_wine()

# Print relevant information
print("Data", np.shape(wine.data))
print("Features names", wine.feature_names)
print("Target", np.shape(wine.target))
print("Target names", wine.target_names)

In [None]:
# Convert data to pandas DataFrame
df = pd.DataFrame(wine.data, columns=wine.feature_names)
display(df.head())
display(df.describe())

# To display the report in a Jupyter notebook, run:
profile = pandas_profiling.ProfileReport(df)
profile.to_file(output_file="profiling.html")

### Vizualizations

In [None]:
# To getter a better understanding of interaction of the dimensions
# plot the first three PCA dimensions

# Target variable
y = wine.target

# Features reduced through PCA
X_norm = StandardScaler().fit_transform(wine.data)
X_reduced = PCA(n_components=3).fit_transform(X_norm)

# Plot first three features
fig = plt.figure(1, figsize=(6, 4))

ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(
    wine.data[:, 1],
    wine.data[:, 2],
    wine.data[:, 3],
    c=y,
    cmap=plt.cm.Set1,
    edgecolor="k",
    s=40,
)

ax.set_title("First three features")
ax.set_xlabel("1st feature")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd feature")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd feature")
ax.w_zaxis.set_ticklabels([])

# Plot features obtained with PCA
fig = plt.figure(2, figsize=(6, 4))

ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(
    X_reduced[:, 0],
    X_reduced[:, 1],
    X_reduced[:, 2],
    c=y,
    cmap=plt.cm.Set1,
    edgecolor="k",
    s=40,
)

ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])

plt.show()

In [None]:
plt.figure()
plt.title("First two features")
plt.scatter(wine.data[:, 0], wine.data[:, 1], c=wine.target, cmap="Set1")
plt.xlabel("1st feature")
plt.ylabel("2nd feature")

plt.figure()
plt.title("First two PCA components")
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=wine.target, cmap="Set1")
plt.xlabel("1st eigenvector")
plt.ylabel("2nd eigenvector")