In [1]:
%load_ext autoreload
%autoreload 2

## Regression

In [6]:
from sklearn import datasets
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn import pipeline
from sklearn import preprocessing


scorer = metrics.make_scorer(metrics.mean_squared_error)
X, y = datasets.load_boston(return_X_y=True)
model = compose.Pipeline([
    ('scale', preprocessing.StandardScaler()),
    ('learn', linear_model.SGDRegressor(
        fit_intercept=True,
        max_iter=1,
        tol=1e-3
    ))
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()



71.82570096332665

In [5]:
import creme.linear_model
import creme.pipeline
import creme.preprocessing.


scorer = metrics.make_scorer(metrics.mean_squared_error)
X, y = datasets.load_boston(return_X_y=True)
model = creme.compose.Pipeline([
    ('scale', creme.preprocessing..StandardScaler()),
    ('learn', creme.linear_model.LinearRegression(
        creme.linear_model.optimize.VanillaSGD(
            lr=creme.linear_model.optimize.ConstantLR(0.01),
            l2=0.01
        ),
    ))
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()

1101.9066103061728

In [8]:
model._final_estimator._estimator_type

'regressor'

In [110]:
import creme.model_selection
import creme.stream

s = creme.stream.iter_sklearn_dataset(datasets.load_boston())
creme.model_selection.online_score(s, model, metrics.mean_squared_error)

38507.74943484116

## Classification

In [26]:
from sklearn import datasets
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn import pipeline
from sklearn import preprocessing


scorer = metrics.make_scorer(metrics.roc_auc_score)
X, y = datasets.load_breast_cancer(return_X_y=True)
model = compose.Pipeline([
    ('scale', preprocessing.StandardScaler()),
    ('learn', linear_model.SGDClassifier(max_iter=1, tol=1e-3, random_state=42))
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()



0.9455808252682341

In [30]:
import creme.linear_model
import creme.pipeline
import creme.preprocessing.


scorer = metrics.make_scorer(metrics.roc_auc_score)
X, y = datasets.load_breast_cancer(return_X_y=True)
model = creme.compose.Pipeline([
    ('scale', creme.preprocessing..StandardScaler()),
    ('learn', creme.linear_model.LogisticRegression(batch_size=1))
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()

0.9713154495816967

## Multi classification

In [31]:
from sklearn import datasets
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
from sklearn import multiclass
from sklearn import pipeline
from sklearn import preprocessing


scorer = metrics.make_scorer(metrics.accuracy_score)
X, y = datasets.load_iris(return_X_y=True)
model = compose.Pipeline([
    ('scale', preprocessing.StandardScaler()),
    ('learn', multiclass.OneVsRestClassifier(
        linear_model.SGDClassifier(max_iter=1, tol=1e-3, random_state=42)
    ))
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()



0.7733333333333333

In [34]:
import creme.linear_model
import creme.multiclass
import creme.pipeline
import creme.preprocessing.


scorer = metrics.make_scorer(metrics.accuracy_score)
X, y = datasets.load_iris(return_X_y=True)
optimizer = creme.linear_model.optimize.AdaGrad(lr=0.4)
model = creme.compose.Pipeline([
    ('scale', creme.preprocessing..StandardScaler()),
    ('learn', creme.multiclass.OneVsRestClassifier(
        base_estimator=creme.linear_model.LogisticRegression(optimizer))
    )
])
cv = model_selection.KFold(n_splits=5, shuffle=True, random_state=42)
scores = model_selection.cross_val_score(model, X, y, scoring=scorer, cv=cv)
scores.mean()

0.76

In [153]:
X_y = creme.stream.iter_sklearn_dataset(
    dataset=datasets.load_diabetes(),
    shuffle=True,
    random_state=42
)

list(X_y)[0]

({'age': 0.0453409833354632,
  'sex': -0.044641636506989,
  'bmi': -0.00620595413580824,
  'bp': -0.015999222636143,
  's1': 0.125018703134293,
  's2': 0.125198101136752,
  's3': 0.0191869970174533,
  's4': 0.0343088588777263,
  's5': 0.0324332257796019,
  's6': -0.0052198044153011},
 219.0)

In [2]:
from sklearn import datasets


params = {
    'categories': [
        'alt.atheism',
        'talk.religion.misc',
        'comp.graphics',
        'sci.space',
    ],
    'remove': ('headers', 'footers', 'quotes'),
    'shuffle': True,
    'random_state': 42
}


train = datasets.fetch_20newsgroups(subset='train', **params)
test = datasets.fetch_20newsgroups(subset='test', **params)

In [18]:
import creme.feature_extraction
import creme.naive_bayes
import creme.pipeline
from sklearn import metrics


model = creme.compose.Pipeline([
    ('vectorize', creme.feature_extraction.TFIDFVectorizer(on='text')),
    ('naive_bayes', creme.naive_bayes.MultinomialNB())
])

for x, y in zip(train['data'], train['target']):
    model.fit_one({'text': x}, train['target_names'][y])
    
y_true = [None] * len(test['data'])
y_pred = [None] * len(test['data'])

for i, (x, y) in enumerate(zip(test['data'], test['target'])):
    y_true[i] = test['target_names'][y]
    y_pred[i] = model.predict_one({'text': x})
    
print(metrics.classification_report(y_true, y_pred))

                    precision    recall  f1-score   support

       alt.atheism       0.56      0.71      0.63       319
     comp.graphics       0.90      0.88      0.89       389
         sci.space       0.68      0.93      0.78       394
talk.religion.misc       0.78      0.07      0.13       251

         micro avg       0.71      0.71      0.71      1353
         macro avg       0.73      0.65      0.61      1353
      weighted avg       0.73      0.71      0.66      1353



In [17]:
from sklearn import feature_extraction
from sklearn import naive_bayes
from sklearn import pipeline


skl_model = compose.Pipeline([
    ('vectorize', feature_extraction.text.TfidfVectorizer()),
    ('naive_bayes', naive_bayes.MultinomialNB())
])

X_train = train['data']
y_train = list(map(lambda i: train['target_names'][i], train['target']))

skl_model.fit(X_train, y_train);

X_test = test['data']
y_test = list(map(lambda i: test['target_names'][i], test['target']))
y_pred = skl_model.predict(X_test)

print(metrics.classification_report(y_test, y_pred))

                    precision    recall  f1-score   support

       alt.atheism       0.55      0.75      0.63       319
     comp.graphics       0.90      0.88      0.89       389
         sci.space       0.70      0.92      0.80       394
talk.religion.misc       0.78      0.07      0.13       251

         micro avg       0.71      0.71      0.71      1353
         macro avg       0.73      0.66      0.61      1353
      weighted avg       0.74      0.71      0.66      1353

