### **D2APR: Aprendizado de Máquina e Reconhecimento de Padrões** (IFSP, Campinas) <br/>
**Prof**: Samuel Martins (Samuka) <br/>


<a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>. <br/><br/>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")

params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 5),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
plt.rcParams.update(params)

In [None]:
df = pd.read_csv('circular_data.csv')

In [None]:
df.head()

In [None]:
plt.figure(figsize=(10,10))
sns.scatterplot(data=df, x='x1', y='x2', hue='y')

In [None]:
X_train = df[['x1', 'x2']].values
y_train = df['y'].values

# Logistic Regression

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

log_reg_clf = Pipeline([
    ("std_scaler", StandardScaler()),
    ("log_reg", LogisticRegression(random_state=42)),
])

log_reg_clf.fit(X_train, y_train)

<span style='font-size: 20pt'>
$\theta_0 + \theta_1 * x_1 + \theta_2 * x_2 = 0$

$\theta_1 * x_1 + \theta_2 * x_2 = -\theta_0$
    
$x_2 = - (\theta_0 + \theta_1 * x_1) / \theta_2$
</span>

In [None]:
theta_0 = log_reg_clf.get_params()['log_reg'].intercept_[0]
theta_1, theta_2 = log_reg_clf.get_params()['log_reg'].coef_[0]

In [None]:
x1_db = np.random.uniform(X_train[:,0].min(), X_train[:,0].max()+0.01, 100)
x2_db = -(theta_0 + (theta_1 * x1_db)) / theta_2

In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(x=X_train[:,0], y=X_train[:,1], color=y_train.astype('str'), color_discrete_sequence=px.colors.qualitative.T10)
fig.add_trace(go.Scatter(x=x1_db, y=x2_db, mode='lines', name='Decision Boundary', marker_color='#FF5657'))

fig.update_layout(title='Logistic Regression',
                  xaxis_title='x1', yaxis_title='x2', width=700, height=600, template='plotly_white')
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()

# Polynomial Logistic Regression

## Degree = 2

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

pol_log_reg_clf = Pipeline([
    ('pol_feats', PolynomialFeatures(degree=2, include_bias=False)),
    ("std_scaler", StandardScaler()),
    ("log_reg", LogisticRegression(random_state=42)),
])

pol_log_reg_clf.fit(X_train, y_train)

In [None]:
theta_0 = pol_log_reg_clf.get_params()['log_reg'].intercept_[0]
theta_1, theta_2, theta_3, theta_4, theta_5 = pol_log_reg_clf.get_params()['log_reg'].coef_[0]

In [None]:
x1_model = np.linspace(X_train[:,0].min(), X_train[:,0].max(), 1000)
x2_model = np.linspace(X_train[:,1].min(), X_train[:,1].max(), 1000)
x1v, x2v = np.meshgrid(x1_model, x2_model)
y_model = pol_log_reg_clf.predict_proba(np.array([x1v.ravel(), x2v.ravel()]).T)[:,1].reshape(x1v.shape)

In [None]:
# https://stackoverflow.com/a/53116010

import plotly.graph_objects as go

fig = go.Figure(data=[
                    go.Scatter3d(x=X_train[:,0], y=X_train[:,1], z=y_train, mode='markers',
                                marker=dict(size=6, color=y_train, colorscale=[[0, '#4C78A8'], [1, '#F58518']], opacity=0.8)),
                    go.Surface(x=x1_model, y=x2_model, z=y_model),
            ])


fig.update_layout(title='Polynomial Logistic Regression',
                  scene=dict(xaxis_title='x1', yaxis_title='x2', zaxis_title='y'), width=700, height=600, template='plotly_white')
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()

In [None]:
mask = (y_model >= 0.495) & (y_model <= 0.505)
x1_db = x1v[mask].ravel()
x2_db = x2v[mask].ravel()

In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(x=X_train[:,0], y=X_train[:,1], color=y_train.astype('str'), color_discrete_sequence=px.colors.qualitative.T10)
fig.add_trace(go.Scatter(x=x1_db, y=x2_db, mode='markers', name='Decision Boundary', marker_color='#FF5657'))

fig.update_layout(title='Logistic Regression',
                  xaxis_title='x1', yaxis_title='x2', width=700, height=600)
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()

## Degree = 50

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

pol_log_reg_clf = Pipeline([
    ('pol_feats', PolynomialFeatures(degree=50, include_bias=False, interaction_only=False)),
    ("std_scaler", StandardScaler()),
    ("log_reg", LogisticRegression(random_state=42)),
])

pol_log_reg_clf.fit(X_train, y_train)

In [None]:
x1_model = np.linspace(X_train[:,0].min(), X_train[:,0].max(), 500)
x2_model = np.linspace(X_train[:,1].min(), X_train[:,1].max(), 500)
x1v, x2v = np.meshgrid(x1_model, x2_model)
y_model = pol_log_reg_clf.predict_proba(np.array([x1v.ravel(), x2v.ravel()]).T)[:,1].reshape(x1v.shape)

In [None]:
# https://stackoverflow.com/a/53116010

import plotly.graph_objects as go

fig = go.Figure(data=[
                    go.Scatter3d(x=X_train[:,0], y=X_train[:,1], z=y_train, mode='markers',
                                marker=dict(size=6, color=y_train, colorscale=[[0, '#4C78A8'], [1, '#F58518']], opacity=0.8)),
                    go.Surface(x=x1_model, y=x2_model, z=y_model),
            ])


fig.update_layout(title='Polynomial Logistic Regression',
                  scene=dict(xaxis_title='x1', yaxis_title='x2', zaxis_title='y'), width=700, height=600)
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()

In [None]:
mask = (y_model >= 0.495) & (y_model <= 0.505)
x1_db = x1v[mask].ravel()
x2_db = x2v[mask].ravel()

In [None]:
# I COULD NOT CORRECTLY PLOT THE DECISION BOUNDARY AS A LINE PLOT
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(x=X_train[:,0], y=X_train[:,1], color=y_train.astype('str'), color_discrete_sequence=px.colors.qualitative.T10)
fig.add_trace(go.Scatter(x=x1_db, y=x2_db, mode='lines', name='Decision Boundary'))

fig.update_layout(title='Logistic Regression',
                  xaxis_title='x1', yaxis_title='x2', width=700, height=600)
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()

In [None]:
# SO I PLOT SOME POINTS OVER THE DECISION BOUNDARY
import plotly.express as px
import plotly.graph_objects as go

fig = px.scatter(x=X_train[:,0], y=X_train[:,1], color=y_train.astype('str'), color_discrete_sequence=px.colors.qualitative.T10)
fig.add_trace(go.Scatter(x=x1_db, y=x2_db, mode='markers', name='Decision Boundary'))

fig.update_layout(title='Logistic Regression',
                  xaxis_title='x1', yaxis_title='x2', width=700, height=600)
fig.update_xaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])
fig.update_yaxes(range=[X_train.min() - 0.5, X_train.max() + 0.5])

fig.show()