In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

In [None]:
cols = ['class name', 'left-weight', 'left-distance', 'right-weight', 'right-distance']
df = pd.read_table('data/balance-scale.txt', delimiter=',', names=cols)
print("Shape:", df.shape)
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
sns.countplot(df, x='class name', hue='class name')
plt.title("Count Plot ['B', 'R', 'L']")
plt.show()

In [None]:
features = ['left-weight', 'left-distance', 'right-weight', 'right-distance']
x = df.loc[:, features]
y = df.loc[:, 'class name']

In [None]:
x = StandardScaler().fit_transform(x)
pca = PCA(n_components=2)
pct = pca.fit_transform(x)

In [None]:
principal_df = pd.DataFrame(pct, columns=['pc1', 'pc2'])
principal_df['class name'] = df['class name']
principal_df.head()

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1, 1, 1)

targets = ['L','B','R']
colors = ['r', 'g','b']
for target, color in zip(targets, colors):
    selected = principal_df[principal_df['class name'] == target]
    ax.scatter(selected['pc1'], selected['pc2'], c=color, s=50)

ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
ax.set_title('2 component PCA')
ax.legend(targets)
ax.grid()