In [1]:
# Load config
from config import UNetTraining
from config import colors
config = UNetTraining.Configuration()
colors = colors.bcolors()

In [2]:
import tensorflow as tf

import numpy as np
import rasterio
# from rasterio.windows import Window

import os
import time
import rasterio.warp # Reproject raster samples
from functools import reduce
from tensorflow.keras.models import load_model

from core.UNet import UNet
from core.losses import tversky, accuracy, dice_coef, dice_loss, specificity, sensitivity
from core.optimizers import adaDelta #, adagrad, adam, nadam

import json
from sklearn.model_selection import train_test_split

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon

import warnings # ignore annoying warnings
warnings.filterwarnings("ignore")
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# %run "data_preparation.ipynb"
# some_func()

# Pipeline

In [4]:
def data_preparation():
    print("1: Data_preparation") 
    %run "1_data_preparation.ipynb"
def define_model():
    print("2: Model")
    %run "2_model.ipynb"
def training():
    print("3: Training")
    %run "3_training.ipynb"
def evaluation():
    print("4: Evaluation")
    %run "4_evaluation.ipynb"
def prediction():
    print("5: Prediction")
    %run "5_prediction.ipynb"
def data_cleanup():
    print("6: Data Cleanup")
    %run "6_data_cleanup.ipynb"

In [8]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ("data_preparation", data_preparation()),
    ("model", define_model()),
    ("training", training()),
    ("evaluation", evaluation()),
    ("prediction", prediction()),
    ("data_cleanup", data_cleanup())
])

4: Evaluation


AttributeError: 'list' object has no attribute 'history'

AttributeError: 'list' object has no attribute 'history'

In [None]:
# pipeline.fit(X_train, y_train)

In [None]:
pipeline

NameError: name 'pipeline' is not defined

In [9]:
set_config(display="diagram")
pipeline

NameError: name 'set_config' is not defined

In [None]:
pipeline.score(X_test_reshaped, y_test)

In [None]:
if 0:
    model = LinearRegression()
    # predict the target on test data 
    predict_test  = model.predict(X_test_reshaped)
    # Root Mean Squared Error on train and test date
    print('RMSE on test data: ',  mean_squared_error(y_test, predict_test)**(0.5))

- Putting Everything Together (cf. p. 553 & davor)
    - Chaining Transformations
    - shuffle data etc.
- Präsi: Loading Preprocessing Data with TensorFlow

##### cool methods
- concatenate() 
- zip()
- window()
- reduce()
- shard()
- flat_map()
- padded_batch()
- from_generator()
- from_tensors()

## Inspiration

In [None]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline

numeric_preprocessor = Pipeline(
    steps=[
        ("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
        ("scaler", StandardScaler()),
    ]
)

categorical_preprocessor = Pipeline(
    steps=[
        (
            "imputation_constant",
            SimpleImputer(fill_value="missing", strategy="constant"),
        ),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)

preprocessor = ColumnTransformer(
    [
        ("categorical", categorical_preprocessor, ["state", "gender"]),
        ("numerical", numeric_preprocessor, ["age", "weight"]),
    ]
)

pipeline = make_pipeline(preprocessor, LogisticRegression(max_iter=500))

In [None]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline

numeric_preprocessor = Pipeline(
    steps=[
        ("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
        ("scaler", StandardScaler()),
    ]
)

categorical_preprocessor = Pipeline(
    steps=[
        (
            "imputation_constant",
            SimpleImputer(fill_value="missing", strategy="constant"),
        ),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)

preprocessor = ColumnTransformer(
    [
        ("categorical", categorical_preprocessor, ["state", "gender"]),
        ("numerical", numeric_preprocessor, ["age", "weight"]),
    ]
)

pipe = make_pipeline(preprocessor, LogisticRegression(max_iter=500))
# The only difference is that make_pipeline generates names for steps automatically.
# Step names are needed e.g. if you want to use a pipeline with model selection utilities (e.g. GridSearchCV). With grid search you need to specify parameters for various steps of a pipeline:

In [None]:
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler

In [None]:
# example p. 109 ff. in hands-on ML ...
from sklearn.pipeline import Pipeline

num_pipeline = Pipeline([ 
    ('imputer', SimpleImputer (strategy=" median")), 
    ('attribs_adder', CombinedAttributesAdder()), 
    ('std_scaler', StandardScaler()), 
])
housing_num_tr = num_pipeline.fit_transform(housing_num)

In [None]:
from sklearn.compose import ColumnTransformer

num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]
full_pipeline = ColumnTransformer([
    ("num", num_pipeline, num_attribs),
    ("cat", OneHotEncoder(), cat_attribs),
])
housing_prepared = full_pipeline.fit_transform(housing)

In [None]:
# example p. 296
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

clf = Pipeline ([
    ("kpca", KernelPCA(n_components=2)),
    ("log_reg", LogisticRegression())
])
param_grid = [{
    "kpca_gamma": np.linspace (0.03, 0.05, 10),
    "kpca_kernel": ["rbf", "sigmoid"]
}]

grid_search = GridSearchCV(clf, param_grid, cv=3) 
grid_search.fit(X, y)

In [None]:
# example p. 327

from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ("means", KMeans(n_clusters=50)),
    ("log_reg", LogisticRegression()),
])
pipeline.fit(X_train, y_train)

pipeline.score(X_test, y_test)

In [None]:
# https://www.analyticsvidhya.com/blog/2020/01/build-your-first-machine-learning-pipeline-using-scikit-learn/
# Define the Pipeline
"""
Step1: get the oultet binary columns
Step2: pre processing
Step3: Train a Random Forest Model
"""
model_pipeline = Pipeline(steps=[('get_outlet_binary_columns', OutletTypeEncoder()), 
                                 ('pre_processing',pre_process),
                                 ('random_forest', RandomForestRegressor(max_depth=10,random_state=2))
                                 ])
# fit the pipeline with the training data
model_pipeline.fit(train_x,train_y)

# predict target values on the training data
model_pipeline.predict(train_x)