In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import accuracy_score
import matplotlib
%matplotlib inline

In [None]:
r_df = pd.read_csv("winequality-red.csv")
w_df = pd.read_csv("winequality-white.csv")
r_df["wine"] = np.zeros(r_df.shape[0]).astype(int)
w_df["wine"] = np.ones(w_df.shape[0]).astype(int)
df = pd.concat([r_df, w_df])
df.reset_index(drop=True)

In [None]:
X = df.drop("wine", axis=1)
y = df["wine"]

In [None]:
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
pca = PCA(n_components=2)
pca_repr = pca.fit_transform(X_scaled)
plt.scatter(pca_repr[:, 0], pca_repr[:, 1], c=df["wine"].map({0: "red", 1: "gray"}), alpha=0.5);

In [None]:
tsne = TSNE(random_state=228)
tsne_repr = tsne.fit_transform(X_scaled)
plt.scatter(tsne_repr[:, 0], tsne_repr[:, 1], c=df["wine"].map({0: "red", 1: "gray"}), alpha=0.5);

In [None]:
kmeans_clr = KMeans(n_clusters=2)
kmeans_clr_c = kmeans_clr.fit_predict(X_scaled)

In [None]:
print(accuracy_score(y, kmeans_clr_c), accuracy_score(y, 1-kmeans_clr_c))

In [None]:
plt.scatter(
    tsne_repr[:, 0],
    tsne_repr[:, 1],
    c=["orange" if x == 0 else "green" if x == 1 else "white" for x in kmeans_clr_c],
    alpha=0.5);

In [None]:
dbscan_clr = DBSCAN(
    eps=1.55,
    min_samples=20,
    leaf_size=100,
    n_jobs=8)
dbscan_clr_c = dbscan_clr.fit_predict(X_scaled)

In [None]:
set(dbscan_clr_c)

In [None]:
plt.scatter(
    [x for x,c in zip(tsne_repr[:, 0], dbscan_clr_c) if c != -1],
    [x for x,c in zip(tsne_repr[:, 1], dbscan_clr_c) if c != -1],
    c=[x for x in dbscan_clr_c if x != -1],
    alpha=0.5);