## Visualisasi Data - Iris Dataset

In [None]:
# lib manipulasi data
import pandas as pd
import numpy as np

# lib visualisasi data
import seaborn as sns
import matplotlib.pyplot as plt

# lib preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

In [None]:
# load dataset
dataset = pd.read_csv("../dataset/dataset_iris.csv")

In [None]:
def boxplot(dataset):

  # define boxplot
  fig, ax = plt.subplots(figsize=(8,4))
  ax.boxplot(
    x=dataset[["sepal_length", "sepal_width", "petal_length", "petal_width"]],
    labels=["sepal_length", "sepal_width", "petal_length", "petal_width"],
    patch_artist=True, widths=(0.75, 0.75, 0.75, 0.75)
  )

  # set labels
  ax.set_title("", fontsize=14)
  ax.set_xlabel("", fontsize=12)
  ax.set_ylabel("", fontsize=12)
  ax.grid(True)

  # show boxplot
  plt.tight_layout()
  plt.show()

In [None]:
def heatmap(dataset):
  # create heatmap corr
  fig, ax = plt.subplots(figsize=(8,4))
  sns.heatmap(
    dataset.corr(numeric_only=True), vmin=-1, vmax=1,
    cmap="viridis", annot=True, fmt=".3f", linewidths=1
  )

  # set labels
  ax.set_title("", fontsize=14)
  ax.set_xlabel("", fontsize=12)
  ax.set_ylabel("", fontsize=12)
  ax.grid(False)

  # show heatmap
  plt.tight_layout()
  plt.show()

In [None]:
# function scatter plot
def scatter(data, x, y):
  
  # create scatter plots
  fig, ax = plt.subplots(figsize = (8,4))
  sns.scatterplot(data=data, x=x, y=y, hue="species")
  
  # set labels
  ax.set_title("", fontsize=14)
  ax.set_xlabel("", fontsize=12)
  ax.set_ylabel("", fontsize=12)
  ax.legend(loc='upper left')
  ax.grid(True)
  
  # show scatterplot
  plt.tight_layout()
  plt.show()

In [None]:
# Set features and Labels
x = dataset[["sepal_length","sepal_width","petal_length","petal_width"]].values
y = dataset["species"].values

### MinMax Scaler

In [None]:
# process minmax scaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(x)

In [None]:
# results minmax scaler
MinMax = pd.concat([
  pd.DataFrame(scaled, columns=["sepal_length","sepal_width","petal_length","petal_width"]),
  pd.DataFrame(y, columns=["species"])
],axis=1)

In [None]:
# results minmax scaler
print(MinMax)

In [None]:
# results minmax with boxplot
boxplot(MinMax)

In [None]:
# results minmax with heatmap
heatmap(MinMax)

In [None]:
# results minmax with scatterplot
scatter(MinMax, "sepal_length", "sepal_width")
scatter(MinMax, "petal_length", "petal_width")

### Standard Scaler

In [None]:
# process standard scaler
scaled = StandardScaler().fit_transform(x)

In [None]:
# results standard scaler
standard = pd.concat([
  pd.DataFrame(scaled, columns=["sepal_length","sepal_width","petal_length","petal_width"]),
  pd.DataFrame(y, columns=["species"])
],axis=1)

In [None]:
# results standard scaler
print(standard)

In [None]:
# results minmax with boxplot
boxplot(standard)

In [None]:
# results minmax with heatmap
heatmap(standard)

In [None]:
# results minmax with scatterplot
scatter(standard, "sepal_length", "sepal_width")
scatter(standard, "petal_length", "petal_width")

### RobustScaler

In [None]:
# process robust scaler
scaled = RobustScaler().fit_transform(x)

In [None]:
# results robust scaler
robust = pd.concat([
  pd.DataFrame(scaled, columns=["sepal_length","sepal_width","petal_length","petal_width"]),
  pd.DataFrame(y, columns=["species"])
],axis=1)

In [None]:
# results robust scaler
print(robust)

In [None]:
# results robust with boxplot
boxplot(robust)

In [None]:
# results robust with heatmap
heatmap(robust)

In [None]:
# results robust with scatterplot
scatter(robust, "sepal_length", "sepal_width")
scatter(robust, "petal_length", "petal_width")

### Splitting Data

In [None]:
# split validation
trainX, testX, trainY, testY = train_test_split(
  MinMax.iloc[:,0:4], MinMax.iloc[:,-1:], train_size=0.75, test_size=0.25, random_state=7, shuffle=True
)