In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor, LinearRegression
from sklearn.model_selection import train_test_split

# Prepare Data
X, y, w = make_regression(n_features=2, n_samples=4000,
                          random_state=42, coef=True, noise=1.0)
y = y + 1.5

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.5,
                                                    random_state=42)

In [2]:
# Run a Baseline Model
mod_lm = LinearRegression()
mod_lm.fit(X_train, y_train)

# Keep the MSE number around for safe-keeps.
normal_mse_test = np.mean((mod_lm.predict(X_test) - y_test)**2)

In [3]:
mod_pac = SGDRegressor()
data = []


for i, x in enumerate(X_train):
    # This is where we learn on a single datapoint
    mod_pac.partial_fit([x], [y_train[i]])

    # This is where we measure and save stats
    data.append({
        'c0': mod_pac.intercept_[0],
        'c1': mod_pac.coef_.flatten()[0],
        'c2': mod_pac.coef_.flatten()[1],
        'mse_test': np.mean((mod_pac.predict(X_test) - y_test)**2),
        'normal_mse_test': normal_mse_test,
        'i': i
    })

df_stats = pd.DataFrame(data)


In [4]:
import altair as alt

alt.data_transformers.disable_max_rows()

pltr1 = (pd.melt(df_stats[['i', 'c1', 'c2']], id_vars=["i"]))
pltr2 = (pd.melt(df_stats[['i', 'normal_mse_test', 'mse_test']], id_vars=["i"]))

p1 = (alt.Chart(pltr1, title='SGD evolution of weights')
        .mark_line()
        .encode(x='i', y='value', color='variable', tooltip=['i', 'value', 'variable'])
        .properties(width=300, height=150)
        .interactive())

p2 = (alt.Chart(pltr2, title='SGD evolution of mse')
        .mark_line()
        .encode(x='i', y='value', color='variable', tooltip=['i', 'value', 'variable'])
        .properties(width=350, height=150)
        .interactive())

p1 | p2


  for col_name, dtype in df.dtypes.iteritems():


In [5]:
from sklearn.linear_model import PassiveAggressiveRegressor

# Set jump coefficients
c_cold, c_warm = 0.1, 0.01

# Run for Stats
mod_pac = PassiveAggressiveRegressor(C=c_cold)
data = []

for i, x in enumerate(X_train):
    mod_pac.partial_fit([x], [y_train[i]])
    data.append({
        'c0': mod_pac.intercept_[0],
        'c1': mod_pac.coef_.flatten()[0],
        'c2': mod_pac.coef_.flatten()[1],
        'mse_test': np.mean((mod_pac.predict(X_test) - y_test)**2),
        'normal_mse_test': normal_mse_test,
        'i': i
    })
    if i == 500:
        mod_pac.C = c_warm

df_stats = pd.DataFrame(data)

In [6]:
alt.data_transformers.disable_max_rows()

pltr1 = (pd.melt(df_stats[['i', 'c1', 'c2']], id_vars=["i"]))
pltr2 = (pd.melt(df_stats[['i', 'normal_mse_test', 'mse_test']], id_vars=["i"]))

q1 = (alt.Chart(pltr1, title='PA evolution of weights')
        .mark_line()
        .encode(x='i', y='value', color='variable', tooltip=['i', 'value', 'variable'])
        .properties(width=300, height=150)
        .interactive())

q2 = (alt.Chart(pltr2, title='PA evolution of mse')
        .mark_line()
        .encode(x='i', y='value', color='variable', tooltip=['i', 'value', 'variable'])
        .properties(width=350, height=150)
        .interactive())

(p1 | p2) & (q1 | q2)


  for col_name, dtype in df.dtypes.iteritems():
