## Environmental Setup

In [1]:
# !pip install -U -q segmentation-models
# !pip install -q tensorflow
# !pip install -q keras
# !pip install -q tensorflow-estimator
# !pip install plotly
# !pip install xgboost

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ["SM_FRAMEWORK"] = "tf.keras"

In [3]:
# import keras
from tensorflow import keras
import segmentation_models as sm
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

Segmentation Models: using `tf.keras` framework.


## Data Loading

In [4]:
data = load_iris()
features = data['data']
targets = data['target']
class_names = data['target_names']
feature_names = data['feature_names']

df = pd.DataFrame({
  feature_names[0]: features[:, 0],
  feature_names[1]: features[:, 1],
  feature_names[2]: features[:, 2],
  feature_names[3]: features[:, 3],
  'label': [class_names[label] for label in targets]
})

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
df.shape

(150, 5)

In [6]:
df.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
label                0
dtype: int64

## Exploratory Data Analysis

In [7]:
class_dis = df.label.value_counts()
class_dis

versicolor    50
setosa        50
virginica     50
Name: label, dtype: int64

#### Visualize the class distribution

In [8]:
class_dis_pie = px.pie(
  names = class_dis.index,
  values = class_dis.values,
  hole = 0.4,
  title = 'Class Distribution Donut Plot'
)
class_dis_pie.show()

In [9]:
class_dis_bar = px.bar(
  x = class_dis.index,
  y = class_dis.values,
  color = class_dis.index,
  title = 'Class Distribution Bar Plot'
)
class_dis_bar.show()

#### Visualize Sepal length

In [10]:
sepal_length_hist = px.histogram(
  df,
  x = 'sepal length (cm)',
  text_auto = True,
  color = 'label',
  barmode = 'group',
  title = 'Sepal Length Histogram'
)
sepal_length_hist.show()

In [11]:
sepal_length_box_plot = px.box(
  df,
  x = 'sepal length (cm)',
  color = 'label',
  title = 'Sepal Length Box Plot'
)
sepal_length_box_plot.show()

In [12]:
sepal_length_violin_plot = px.violin(
  df,
  x = 'sepal length (cm)',
  color = 'label',
  title = 'Sepal Length Violin Plot'
)
sepal_length_violin_plot.show()

#### Visualize Sepal Width

In [13]:
sepal_width_hist = px.histogram(
    df, x="sepal width (cm)",
    text_auto = True,
    color = "label",
    barmode = "group",
    title = "Sepal width Histogram"
)
sepal_width_hist.show()

In [14]:
sepal_width_box_plot = px.box(
    df, x="sepal width (cm)",
    color = "label",
    title = "Sepal width Box Plot"
)
sepal_width_box_plot.show()

In [15]:
sepal_width_violin_plot = px.violin(
    df, x="sepal width (cm)",
    color = "label",
    title = "Sepal width Violin Plot"
)
sepal_width_violin_plot.show()

#### Visualize Petal Length

In [16]:
petal_length_hist = px.histogram(
    df, x="petal length (cm)",
    text_auto = True,
    color = "label",
    barmode = "group",
    title = "Petal length Histogram"
)
petal_length_hist.show()

In [17]:
petal_length_box_plot = px.box(
    df, x="petal length (cm)",
    color = "label",
    title = "Petal length Box Plot"
)
petal_length_box_plot.show()

In [18]:
petal_length_violin_plot = px.violin(
    df, x="petal length (cm)",
    color = "label",
    title = "Petal length Violin Plot"
)
petal_length_violin_plot.show()

#### Visualize Petal Width

In [19]:
petal_width_hist = px.histogram(
    df, x="petal width (cm)",
    text_auto = True,
    color = "label",
    barmode = "group",
    title = "Petal width Histogram"
)
petal_width_hist.show()

In [20]:
petal_width_box_plot = px.box(
    df, x="petal width (cm)",
    color = "label",
    title = "Petal width Box Plot"
)
petal_width_box_plot.show()

In [21]:
petal_width_violin_plot = px.violin(
    df, x="petal width (cm)",
    color = "label",
    title = "Petal width Violin Plot"
)
petal_width_violin_plot.show()


### Visual Understanding of the Statical inferences

#### Sepal length vs Sepal width

In [22]:
scatter_plot = px.scatter(
    df,
    x = "sepal length (cm)", y = "sepal width (cm)",
    color = "label",
    title = "Sepal length vs Sepal width",
    symbol = "label",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
scatter_plot.show()

In [23]:
density_plot = px.density_contour(
    df,
    x = "sepal length (cm)", y = "sepal width (cm)",
    color = "label",
    title = "Sepal length vs Sepal width",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
density_plot.show()

#### Petal length vs Petal width

In [24]:
scatter_plot = px.scatter(
    df,
    x = "petal length (cm)", y = "petal width (cm)",
    color = "label",
    title = "Petal length vs Petal width",
    symbol = "label",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
scatter_plot.show()

In [25]:
density_contour_plot = px.density_contour(
    df,
    x = "petal length (cm)", y = "petal width (cm)",
    color = "label",
    title = "Petal length vs Petal width",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
density_contour_plot.show()

#### Petal length vs Sepal length

In [26]:
scatter_plot = px.scatter(
    df,
    x = "petal length (cm)", y = "sepal length (cm)",
    color = "label",
    title = "Petal length vs Sepal length",
    symbol = "label",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
scatter_plot.show()

In [27]:
density_contour_plot = px.density_contour(
    df,
    x = "petal length (cm)", y = "sepal length (cm)",
    color = "label",
    title = "Petal length vs Sepal length",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
density_contour_plot.show()

#### Petal width va sepal width

In [28]:
scatter_plot = px.scatter(
    df,
    x = "petal width (cm)", y = "sepal width (cm)",
    color = "label",
    title = "Petal width vs Sepal width",
    symbol = "label",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
scatter_plot.show()

In [29]:
density_plot = px.density_contour(
    df,
    x = "petal width (cm)", y = "sepal width (cm)",
    color = "label",
    title = "Petal width vs Sepal width",
    marginal_x = "box",
    marginal_y = "box",
    height = 800,
    width = 1400
)
density_plot.show()

#### Petal width vs Sepal width vs Petal length

In [30]:
scatter_plot = px.scatter_3d(
    df,
    x = "petal width (cm)", y = "sepal width (cm)", z = "petal length (cm)",
    color = "label",
    title = "Petal width vs Sepal width vs Petal length",
    symbol = "label",
)
scatter_plot.show()

#### Petal width vs Sepal width vs Sepal length

In [31]:
scatter_plot = px.scatter_3d(
    df,
    x = "petal width (cm)", y = "sepal width (cm)", z = "sepal length (cm)",
    color = "label",
    title = "Petal width vs Sepal width vs Sepal length",
    symbol = "label",
)
scatter_plot.show()

### Spearman Correlation

<p>Spearman’s Correlation is a statical measure of measuring the strength and direction of the monotonic relationship between two continuous variables. Therefore, these attributes are ranked or put in the order of their preference. It is denoted by the symbol “rho” (ρ) and can take values between -1 to +1. A positive value of rho indicates that there exists a positive relationship between the two variables, while a negative value of rho indicates a negative relationship. A rho value of 0 indicates no association between the two variables.</p>

In [32]:
df.label = targets
corr = df.corr(method='spearman')

corr_heatmap = px.imshow(corr, text_auto=True, title="Spearman Correlation")
corr_heatmap.show()

## ML Models

### Data Preprocessing

In [33]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(features)

In [34]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, targets, shuffle=True, random_state=42, train_size=0.9, test_size=0.1)

### Metrics

In [35]:
def metrics(true, pred):
    p = precision_score(true, pred, average='macro')
    r = recall_score(true, pred, average='macro')
    f1 = f1_score(true, pred, average='macro')
    acc = accuracy_score(true, pred)
    return p, r, f1, acc

In [36]:
# Record all the training and validation scores
model_names = []

train_precisions = []
test_precisions = []

train_recalls = []
test_recalls = []

train_f1s = []
test_f1s = []

train_accuracy_score = []
test_accuracy_score = []

### Intialize and Train model

In [37]:
models = {
    "SVM": SVC(),
    "XGBoost": XGBClassifier(),
    "GaussianNB": GaussianNB(),
    "DecisionTree": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier(),
    "LogisticRegression": LogisticRegression()
}

In [None]:
for model_name, model in models.items():
  model.fit(x_train,y_train)
  train_pred = model.predict(x_train)
  test_pred = model.predict(x_test)

  train_p, train_r, train_f1, train_acc = metrics(y_train, train_pred)
  test_p, test_r, test_f1, test_acc = metrics(y_test, test_pred)

  model_names.append(model_name)
  train_precisions.append(train_p)
  test_precisions.append(test_p)
  train_recalls.append(train_r)
  test_recalls.append(test_r)
  train_f1s.append(train_f1)
  test_f1s.append(test_f1)
  train_accuracy_score.append(train_acc)
  test_accuracy_score.append(test_acc)


In [40]:
for i in range(len(model_names)):
  print("Model Name :", model_names[i])
  print("Train Precision :", train_precisions[i])
  print("Test Precision  :", test_precisions[i])
  print("Train Recall  :", train_recalls[i])
  print("Test Recall  :", test_recalls[i])
  print("Train F1-score  :", train_f1s[i])
  print("Test F1-score  :", test_f1s[i])
  print("Train Accuracy  :", train_accuracy_score[i])
  print("Test Accuracy  :", test_accuracy_score[i])
  print("\n")

Model Name : SVM
Train Precision : 0.9716553287981861
Test Precision  : 1.0
Train Recall  : 0.9701805286911669
Test Recall  : 1.0
Train F1-score  : 0.9706072351421188
Test F1-score  : 1.0
Train Accuracy  : 0.9703703703703703
Test Accuracy  : 1.0


Model Name : XGBoost
Train Precision : 1.0
Test Precision  : 1.0
Train Recall  : 1.0
Test Recall  : 1.0
Train F1-score  : 1.0
Test F1-score  : 1.0
Train Accuracy  : 1.0
Test Accuracy  : 1.0


Model Name : GaussianNB
Train Precision : 0.9559961315280464
Test Precision  : 1.0
Train Recall  : 0.9559961315280464
Test Recall  : 1.0
Train F1-score  : 0.9559961315280464
Test F1-score  : 1.0
Train Accuracy  : 0.9555555555555556
Test Accuracy  : 1.0


Model Name : DecisionTree
Train Precision : 1.0
Test Precision  : 1.0
Train Recall  : 1.0
Test Recall  : 1.0
Train F1-score  : 1.0
Test F1-score  : 1.0
Train Accuracy  : 1.0
Test Accuracy  : 1.0


Model Name : RandomForest
Train Precision : 0.9925925925925926
Test Precision  : 1.0
Train Recall  : 0.99290

In [41]:
model_evals = pd.DataFrame(data={
    "Name": model_names,
    "Train Precision": train_precisions,
    "Test Precision": test_precisions,
    "Train Recall": train_recalls,
    "Test Recall": test_recalls,
    "Train F1-score": train_f1s,
    "Test F1-score": test_f1s,
    "Train Accuracy": train_accuracy_score,
    "Test Accuracy": test_accuracy_score
})

In [42]:
model_evals

Unnamed: 0,Name,Train Precision,Test Precision,Train Recall,Test Recall,Train F1-score,Test F1-score,Train Accuracy,Test Accuracy
0,SVM,0.971655,1.0,0.970181,1.0,0.970607,1.0,0.97037,1.0
1,XGBoost,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,GaussianNB,0.955996,1.0,0.955996,1.0,0.955996,1.0,0.955556,1.0
3,DecisionTree,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,RandomForest,0.992593,1.0,0.992908,1.0,0.99267,1.0,0.992593,1.0
5,LogisticRegression,0.902019,1.0,0.895874,1.0,0.896423,1.0,0.896296,1.0


In [43]:
train_precisions_bar = px.bar(model_evals, x = "Name", y = "Train Precision", title = "Train Precision Bar Graph", color="Name")
train_precisions_bar.update_layout(showlegend=False)
train_precisions_bar.show()

In [44]:
test_precision_bar = px.bar(model_evals, x = "Name", y = "Test Precision", title = "Test Precision Bar Graph", color="Name")
test_precision_bar.update_layout(showlegend=False)
test_precision_bar.show()

In [45]:
train_Recall_bar = px.bar(model_evals, x = "Name", y = "Train Recall", title = "Train Recall Bar Graph", color="Name")
train_Recall_bar.update_layout(showlegend=False)
train_Recall_bar.show()

In [46]:
test_Recall_bar = px.bar(model_evals, x = "Name", y = "Test Recall", title = "Test Recall Bar Graph", color="Name")
test_Recall_bar.update_layout(showlegend=False)
test_Recall_bar.show()

In [47]:
train_f1_score_bar = px.bar(model_evals, x = "Name", y = "Train F1-score", title = "Train F1-score Bar Graph", color="Name")
train_f1_score_bar.update_layout(showlegend=False)
train_f1_score_bar.show()

In [48]:
test_f1_score_bar = px.bar(model_evals, x = "Name", y = "Test F1-score", title = "Test F1-score Bar Graph", color="Name")
test_f1_score_bar.update_layout(showlegend=False)
test_f1_score_bar.show()

In [49]:
train_accuracy_bar = px.bar(model_evals, x = "Name", y = "Train Accuracy", title = "Train Accuracy Bar Graph", color="Name")
train_accuracy_bar.update_layout(showlegend=False)
train_accuracy_bar.show()

In [50]:
test_accuracy_bar = px.bar(model_evals, x = "Name", y = "Test Accuracy", title = "Test Accuracy Bar Graph", color="Name")
test_accuracy_bar.update_layout(showlegend=False)
test_accuracy_bar.show()