In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]

from pandas import DataFrame, read_excel

# Distinguish Iris species based on flower morphological features

<img align="right" src="../data/iris_petal_sepal.png">
The Iris flower data set or Fisher's Iris data set is a multivariate data set introduced by the British statistician and biologist Ronald Fisher.

The data set consists of 50 samples from each of three species of Iris (*Iris setosa*, *Iris virginica* and *Iris versicolor*). Four features were measured from each sample: the length and the width of the sepals and petals, in centimeters.

In [None]:
df = read_excel('../data/iris-dataset.xls')

In [None]:
# Display (part of) the data set contained in the Pandas DataFrame
print(df)

### A very high-level introduction on working with DataFrames

In [None]:
# Working with columns
print(df.columns)
print(df['petal width'])
print(df[['petal width', 'species']])

# Working with rows
print(df.loc[10])
print(df.loc[5:10])
print(df.head(10))

# Query for data
print(df[df['sepal length'] > 7])
print(df.loc[df['sepal length'] > 7, 'petal width'])

### Compute descriptive statistics

In [None]:
# Single statistical operations
print(df.mean())
print(df.sum())
print(df.skew())

# Statistical overview
print(df.describe())

# combining operations
sp = df['sepal width']
sp_stand = (sp - sp.mean()) / sp.std()
print(sp_stand.describe())

### Grouping data (pivot table in Excel)

In [None]:
pv = df.groupby('species').describe()
pv.iloc[:, pv.columns.get_level_values(1).isin(['mean', 'std'])]

### Plotting data

In [None]:
f1 = df.plot(title='Iris data set features')
f1.set(xlabel="sample", ylabel="width/length (cm)")

f2 = df.plot.box()

f3 = df.hist()