In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import default_rng

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler


In [6]:
rng = default_rng(1)
np.random.seed(1)


df = pd.DataFrame({
    'Col_1': rng.random(10)*6-3,
    'Col_2': rng.random(10)*2500 - 500,
    'Col_3': rng.random(10)*2,
})

df.head()

Unnamed: 0,Col_1,Col_2,Col_3
0,0.07093,1383.782772,1.500729
1,2.702782,845.358283,0.560818
2,-2.135042,324.329291,0.970382
3,2.691897,1471.071759,1.961474
4,-1.129011,257.987073,1.923314


# Standard Scaler

In [7]:
col_names = df.columns
features = df

scaler = StandardScaler().fit(features.values)
features = scaler.transform(features.values)
scaled_features = pd.DataFrame(features, columns=col_names)
scaled_features.head()


Unnamed: 0,Col_1,Col_2,Col_3
0,0.004605,1.608996,0.466361
1,1.456729,0.579405,-1.131311
2,-1.21254,-0.416921,-0.435129
3,1.450723,1.775912,1.24954
4,-0.657463,-0.543782,1.184675


# Min Max Scaler

In [8]:
col_names = df.columns
features = df

scaler = MinMaxScaler().fit(features.values)
features = scaler.transform(features.values)
scaled_features = pd.DataFrame(features, columns=col_names)
scaled_features.head()


Unnamed: 0,Col_1,Col_2,Col_3
0,0.524716,0.946644,0.719087
1,1.0,0.617527,0.14603
2,0.126341,0.299043,0.395738
3,0.998034,1.0,1.0
4,0.308019,0.258491,0.976734


# Robust Scaler

In [9]:
col_names = df.columns
features = df

scaler = RobustScaler().fit(features.values)
features = scaler.transform(features.values)
scaled_features = pd.DataFrame(features, columns=col_names)
scaled_features.head()


Unnamed: 0,Col_1,Col_2,Col_3
0,0.104852,1.583528,0.203309
1,1.144283,0.702484,-0.610848
2,-0.766381,-0.150096,-0.256081
3,1.139984,1.726363,0.60241
4,-0.369057,-0.258654,0.569355


In [10]:
columns = ["age", "year", "nodes", "class"]
df = pd.read_csv("./haberman+s+survival/haberman.data", names=columns, header=None, sep=",")
X = df[["age", "year", "nodes"]]
y = df["class"]

In [11]:
col_names = df.columns[:-1]
features = df[col_names]

scaler = MinMaxScaler().fit(features)
features = scaler.transform(features)

X = pd.DataFrame(features, columns=col_names)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6)

In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=13)

In [14]:
knn.fit(X_train, y_train)

In [15]:
y_pred = knn.predict(X_test)

In [16]:
print("Test set predictions: %.2f" % np.mean(y_pred == y_test))

Test set predictions: 0.75


# Perceptron

In [20]:
from sklearn.linear_model import Perceptron
p = Perceptron(random_state=42)
p.fit(X_train, y_train)

In [22]:
print("Test set predictions: %.2f" % np.mean(p.predict(X_test) == y_test))

Test set predictions: 0.71
