# Iris

### Introduction:

This exercise may seem a little bit strange, but keep doing it.

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

In [3]:
url = 'https://raw.githubusercontent.com/pydata/pandas/master/pandas/tests/io/data/csv/iris.csv'
data = pd.read_csv(url, sep=',')


### Step 3. Assign it to a variable called iris

In [5]:
iris = data
print(iris.head())

   SepalLength  SepalWidth  PetalLength  PetalWidth         Name
0          5.1         3.5          1.4         0.2  Iris-setosa
1          4.9         3.0          1.4         0.2  Iris-setosa
2          4.7         3.2          1.3         0.2  Iris-setosa
3          4.6         3.1          1.5         0.2  Iris-setosa
4          5.0         3.6          1.4         0.2  Iris-setosa


### Step 4. Create columns for the dataset

In [6]:
iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [57]:
# 1. sepal_length (in cm)
# 2. sepal_width (in cm)
# 3. petal_length (in cm)
# 4. petal_width (in cm)
# 5. class

### Step 5.  Is there any missing value in the dataframe?

In [7]:
print(iris.isnull().sum())

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64


### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [8]:
import numpy as np
iris.loc[10:29, 'petal_length'] = np.nan
print(iris.head(30))

    sepal_length  sepal_width  petal_length  petal_width        class
0            5.1          3.5           1.4          0.2  Iris-setosa
1            4.9          3.0           1.4          0.2  Iris-setosa
2            4.7          3.2           1.3          0.2  Iris-setosa
3            4.6          3.1           1.5          0.2  Iris-setosa
4            5.0          3.6           1.4          0.2  Iris-setosa
5            5.4          3.9           1.7          0.4  Iris-setosa
6            4.6          3.4           1.4          0.3  Iris-setosa
7            5.0          3.4           1.5          0.2  Iris-setosa
8            4.4          2.9           1.4          0.2  Iris-setosa
9            4.9          3.1           1.5          0.1  Iris-setosa
10           5.4          3.7           NaN          0.2  Iris-setosa
11           4.8          3.4           NaN          0.2  Iris-setosa
12           4.8          3.0           NaN          0.1  Iris-setosa
13           4.3    

### Step 7. Good, now lets substitute the NaN values to 1.0

In [9]:
iris['petal_length'].fillna(1.0, inplace=True)
print(iris.head(30))

    sepal_length  sepal_width  petal_length  petal_width        class
0            5.1          3.5           1.4          0.2  Iris-setosa
1            4.9          3.0           1.4          0.2  Iris-setosa
2            4.7          3.2           1.3          0.2  Iris-setosa
3            4.6          3.1           1.5          0.2  Iris-setosa
4            5.0          3.6           1.4          0.2  Iris-setosa
5            5.4          3.9           1.7          0.4  Iris-setosa
6            4.6          3.4           1.4          0.3  Iris-setosa
7            5.0          3.4           1.5          0.2  Iris-setosa
8            4.4          2.9           1.4          0.2  Iris-setosa
9            4.9          3.1           1.5          0.1  Iris-setosa
10           5.4          3.7           1.0          0.2  Iris-setosa
11           4.8          3.4           1.0          0.2  Iris-setosa
12           4.8          3.0           1.0          0.1  Iris-setosa
13           4.3    

### Step 8. Now let's delete the column class

In [10]:
iris.drop(columns=['class'], inplace=True)
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2


### Step 9.  Set the first 3 rows as NaN

In [11]:
iris.iloc[:3] = np.nan
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width
0           NaN          NaN           NaN          NaN
1           NaN          NaN           NaN          NaN
2           NaN          NaN           NaN          NaN
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2


### Step 10.  Delete the rows that have NaN

In [12]:
iris.dropna(inplace=True)
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2
5           5.4          3.9           1.7          0.4
6           4.6          3.4           1.4          0.3
7           5.0          3.4           1.5          0.2


### Step 11. Reset the index so it begins with 0 again

In [13]:
iris.reset_index(drop=True, inplace=True)
print(iris.head())

   sepal_length  sepal_width  petal_length  petal_width
0           4.6          3.1           1.5          0.2
1           5.0          3.6           1.4          0.2
2           5.4          3.9           1.7          0.4
3           4.6          3.4           1.4          0.3
4           5.0          3.4           1.5          0.2


### BONUS: Create your own question and answer it.

In [14]:
# BONUS: What is the average sepal length for the dataset?

average_sepal_length = iris['sepal_length'].mean()
print(f"The average sepal length is: {average_sepal_length}")

The average sepal length is: 5.862585034013605
