### Bootstrap

#### Packages

In [1]:
import pandas as pkg_pandas
import math as pkg_math
from matplotlib import pyplot as pkg_plot
from sklearn import linear_model as pkg_linear_model
from sklearn import model_selection as pkg_model_selection
from sklearn import preprocessing as pkg_preprocessing
from sklearn import tree as pkg_tree
from sklearn import metrics as pkg_metrics
from sklearn import datasets as pkg_datasets
from sklearn import ensemble as pkg_ensemble
from sklearn import svm as pkg_svm
import seaborn as pkg_seaborn

#### Load Data

In [2]:
loaded_data = pkg_datasets.load_iris()
dir(loaded_data)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [3]:
print("\nFeature Names = {}\nTarget Names = {}\nColumn".format(\
    loaded_data.feature_names, loaded_data.target_names))


Feature Names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target Names = ['setosa' 'versicolor' 'virginica']
Column


In [4]:
column_names = []
for feature_name in loaded_data.feature_names:
    column_name = feature_name.replace(' ', '_').replace('(','').replace(')','')
    column_names.append(column_name)

In [5]:
loaded_df = pkg_pandas.DataFrame(loaded_data.data, columns=column_names)
loaded_df['flower_number'] = loaded_data.target
loaded_df['flower_name'] = loaded_df['flower_number'].apply(lambda fnum: loaded_data.target_names[fnum])
loaded_df.head()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,flower_number,flower_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


### Analysis

#### Pre-Work

In [6]:
def get_score(model, train_X, train_Y, test_X, test_Y):
    model.fit(X=train_X, y=train_Y)
    return model.score(X=test_X, y=test_Y)

#### Model

In [7]:
# Initialize
baseline_df = loaded_df

In [8]:
output_column_name = 'flower_number'
baseline_outputs = baseline_df[output_column_name]
baseline_inputs = baseline_df.drop(columns=[output_column_name, 'flower_name']).to_numpy()

In [9]:
baseline_inputs[2:3]

array([[4.7, 3.2, 1.3, 0.2]])

In [10]:
baseline_outputs[5:10]

5    0
6    0
7    0
8    0
9    0
Name: flower_number, dtype: int64

In [11]:
scores_lnr = []
scores_lgr = []
scores_svm = []
scores_dst = []
scores_rf = []

scores_lnr = pkg_model_selection.cross_val_score(pkg_linear_model.LinearRegression(), baseline_inputs, baseline_outputs)
scores_lgr = pkg_model_selection.cross_val_score(pkg_linear_model.LogisticRegression(max_iter=5000), baseline_inputs, baseline_outputs)
scores_svm = pkg_model_selection.cross_val_score(pkg_svm.SVC(), baseline_inputs, baseline_outputs)
scores_dst = pkg_model_selection.cross_val_score(pkg_tree.DecisionTreeClassifier(), baseline_inputs, baseline_outputs)
scores_rf  = pkg_model_selection.cross_val_score(pkg_ensemble.RandomForestClassifier(n_estimators=20), baseline_inputs, baseline_outputs)

In [12]:
print("= = = ::: Scores ::: = = =")
print("Linear Regression = {}".format(scores_lnr))
print("Logistic Regression = {}".format(scores_lgr))
print("Support Vector Machine = {}".format(scores_svm))
print("Decision Trees = {}".format(scores_dst))
print("Random Forest = {}".format(scores_rf))

= = = ::: Scores ::: = = =
Linear Regression = [0.         0.85124923 0.         0.76155439 0.        ]
Logistic Regression = [0.96666667 1.         0.93333333 0.96666667 1.        ]
Support Vector Machine = [0.96666667 0.96666667 0.96666667 0.93333333 1.        ]
Decision Trees = [0.96666667 0.96666667 0.9        1.         1.        ]
Random Forest = [0.96666667 0.96666667 0.93333333 0.9        1.        ]
