# Iris Visualization

## Setup

In [1]:
# Python ≥3.5 is required
import sys
import pandas as pd
assert sys.version_info >= (3, 5)

sys.path.append("..")

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

Select the Imput File

In [2]:
input_file = "../data/iris.data"

Create a DataFrame using an ifered schema 

In [None]:
# load data file.
# create a DataFrame using an ifered Schema 
df = pd.read_csv(input_file, names=["sepal length","sepal width","petal length","petal width", "label"]) 
display(df)

### Box Plots

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
df.boxplot(sym='r+', figsize=(20,15))
save_fig("attribute_box_plots")
plt.show()

### Histograms

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
df.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()

### Density Plot

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
df.plot.kde(figsize=(20,15))
save_fig("attribute_density_plots")
plt.show()

### Scatter Plot

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
ax = df[df['label'] == 'Iris-setosa'].plot.scatter(x='sepal length', y='petal width', color='DarkBlue', label='Iris-setosa')
df[df['label'] == 'Iris-versicolor'].plot.scatter(x='sepal length', y='petal width', color='DarkGreen', label='Iris-versicolour', ax=ax)
df[df['label'] == 'Iris-virginica'].plot.scatter(x='sepal length', y='petal width', color='DarkRed', label='Iris-virginica', ax=ax)
save_fig("scatter_plots")
plt.show()

### Scatter matrix

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
scatter_matrix(df, figsize=(30, 30), diagonal='kde')
save_fig("scatter_matrix")
plt.show()

### Bubble Charts

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dfPandas= df[:]
dfPandas.plot.scatter(x='sepal length', y='petal width', s=dfPandas['sepal width'] * 10, color='DarkBlue')
save_fig("bubble_plots")
plt.show()

### Parallel coordinates

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from pandas.plotting import parallel_coordinates
parallel_coordinates(df, 'label')
save_fig("paralle_coordinates_plots")
plt.show()