# (Optional) Lesson 0

## What is ML? 

## Supervised ML: Classification

### Popular Classification Models and Python Frameworks

### Example Classification Task: Iris Dataset 

In [1]:
# import scikit-learn dataset library
from sklearn import datasets

# import dataset splitting mechanism
from sklearn.model_selection import train_test_split

#Load dataset
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

#### Random Forests with Scikit-learn

NOTE: Minor changes from full stack to make all three examples consistent. 

In [2]:
from sklearn.ensemble import RandomForestClassifier

# instantiate model
clf_rf = RandomForestClassifier(n_estimators=10, 
                                min_samples_split=2, 
                                random_state=0)

# train model
clf_rf.fit(X_train, y_train)

# score model
rf_accuracy = clf_rf.score(X_test, y_test)
rf_accuracy

0.98

#### Boosted Trees with XGBoost

In [3]:
from xgboost import XGBClassifier

# instantiate model
clf_xgb = XGBClassifier(n_estimators=10, 
                        random_state=0,
                        eval_metric='mlogloss',
                        use_label_encoder=False)

# train model
clf_xgb.fit(X_train, y_train)

# score model
xgb_accuracy = clf_xgb.score(X_test, y_test)
xgb_accuracy

0.98

#### Neural Networks with Keras

In [4]:
from tensorflow import keras
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) # don't want to fit on test data!

In [5]:
def build_model(hidden_layer_dim, meta):
    
    # meta is a scikeras argument that will be
    # handed a dict containing input metadata
    n_features_in_ = meta["n_features_in_"]
    X_shape_ = meta["X_shape_"]
    n_classes_ = meta["n_classes_"]

    # build neural net model 
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(n_features_in_, 
                                 input_shape=X_shape_[1:]))
    model.add(keras.layers.Activation("relu"))
    model.add(keras.layers.Dense(hidden_layer_dim))
    model.add(keras.layers.Activation("relu"))
    model.add(keras.layers.Dense(n_classes_))
    model.add(keras.layers.Activation("softmax"))
    return model

In [6]:
# instantiate model
clf_keras = KerasClassifier(build_model, 
                            loss='categorical_crossentropy',
                            hidden_layer_dim=100,
                            epochs=200,
                            verbose=0)

# train model
clf_keras.fit(X_train_scaled, y_train)

# score model
keras_accuracy = clf_keras.score(X_test_scaled, y_test)
keras_accuracy

2022-07-21 08:29:51.592997: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


0.86