In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
#%matplotlib inline

In [None]:
rs = 2 #10

# Load a dataset

In [None]:
# import a dataset
df = pd.read_csv("./task/training.csv")

# Exploratory Data Analysis (EDA)

In [None]:
type(df)

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df['target'].value_counts()

In [None]:
df.describe(exclude=['int64'])

In [None]:
pd.DataFrame(df.groupby(by=['target']).describe()).stack().unstack(0)

In [None]:
sns.set_theme(style="ticks")
sns.pairplot(df, corner=True, diag_kind="hist", hue="target")

# Training

### Training dataset

In [None]:
print(df.columns)

In [None]:
#X = df.iloc[:,0:2] #X = df.iloc[:,:-1] #X = df.iloc[:,2:4]
X = df.iloc[:,0:2]
y = df.iloc[:,-1]

### Normalization

#### Min-Max Normalization

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1)).fit(X) 
print(scaler.data_min_)
print(scaler.data_max_)
X_scaled = scaler.transform(X) 
#print(X_scaled)

In [None]:
# scaled 
print('(scaled) max: ', X_scaled.max(axis=0))
print('(scaled) min: ', X_scaled.min(axis=0))

#### Z-score Normalization

scaler = StandardScaler().fit(X) 
print(scaler.mean_)
print(np.sqrt(scaler.var_))
X_scaled = scaler.transform(X)

# scaled 
print('(scaled) mean: ', np.round(X_scaled.mean(axis=0),5))
print('(scaled) sd: ', X_scaled.std(axis=0))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.4, random_state=rs)

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(X_scaled,y, test_size=0.4, random_state=rs)

In [None]:
X_train.shape

In [None]:
y_train.value_counts()

In [None]:
X_test.shape

In [None]:
y_test.value_counts()

### Perceptron learning

In [None]:
# initialization
model_per = Perceptron()
# optimization
model_per.fit(X_train, y_train)

In [None]:
# training: performance
y_pred_train = model_per.predict(X_train)
print(np.round(metrics.accuracy_score(y_pred_train, y_train),5))

In [None]:
# testing: performance
y_pred_test = model_per.predict(X_test)
print(np.round(metrics.accuracy_score(y_pred_test, y_test),5))

### Decision boundary

In [None]:
w = model_per.coef_[0]
#print(w)

In [None]:
w0 = model_per.intercept_
#print(w0)

In [None]:
if len(w) == 2:
    x_ticks = np.linspace(min(X.iloc[:,0]),max(X.iloc[:,0]),10)
    ax = plt.subplot(1,1,1)
    ax.set_xticks(x_ticks)
    ax.set_xlim(min(X.iloc[:,0]),max(X.iloc[:,0]))
    ax.set_ylim(min(X.iloc[:,1]),max(X.iloc[:,1]))
    ax.set_xlabel(X.columns[0])
    ax.set_ylabel(X.columns[1])
    plt.scatter(X.iloc[:50,0],X.iloc[:50,1],label='0')
    plt.scatter(X.iloc[50:,0],X.iloc[50:,1],label='1')
    plt.plot(x_ticks, (w[0]*x_ticks + w0)/(-w[1]))
    plt.legend(loc = 'best')

### Scaled

In [None]:
if len(w) == 2:
    x_ticks = np.linspace(min(X_scaled[:,0]),max(X_scaled[:,0]),10)
    ax.set_xticks(x_ticks)
    ax.set_xlim(min(X_scaled[:,0]),max(X_scaled[:,0]))
    ax.set_ylim(min(X_scaled[:,1]),max(X_scaled[:,1]))
    ax.set_xlabel(X.columns[0])
    ax.set_ylabel(X.columns[1])
    plt.scatter(X_scaled[:50,0],X_scaled[:50,1],label='0')
    plt.scatter(X_scaled[50:,0],X_scaled[50:,1],label='1')
    plt.plot(x_ticks, (w[0]*x_ticks + w0)/(-w[1]))
    plt.legend(loc = 'best')