# Iris

### Introduction:

This exercise may seem a little bit strange, but keep doing it.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 

In [2]:
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
iris_data = pd.read_csv(url, header=None, names=column_names)

print(iris_data.head())


   sepal_length  sepal_width  petal_length  petal_width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


### Step 3. Assign it to a variable called iris

In [3]:
iris = pd.read_csv(url, header=None, names=column_names)

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


### Step 4. Create columns for the dataset

In [4]:
# 1. sepal_length (in cm)
# 2. sepal_width (in cm)
# 3. petal_length (in cm)
# 4. petal_width (in cm)
# 5. class

iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


### Step 5.  Is there any missing value in the dataframe?

In [5]:
missing_values = iris.isnull().sum()

print(missing_values)


sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64


### Step 6.  Lets set the values of the rows 10 to 29 of the column 'petal_length' to NaN

In [6]:
iris.loc[10:29, 'petal_length'] = np.nan

print(iris.loc[10:29, ['petal_length']])


    petal_length
10           NaN
11           NaN
12           NaN
13           NaN
14           NaN
15           NaN
16           NaN
17           NaN
18           NaN
19           NaN
20           NaN
21           NaN
22           NaN
23           NaN
24           NaN
25           NaN
26           NaN
27           NaN
28           NaN
29           NaN


### Step 7. Good, now lets substitute the NaN values to 1.0

In [7]:
iris['petal_length'] = iris['petal_length'].fillna(1.0)

print(iris.loc[10:29, ['petal_length']])


    petal_length
10           1.0
11           1.0
12           1.0
13           1.0
14           1.0
15           1.0
16           1.0
17           1.0
18           1.0
19           1.0
20           1.0
21           1.0
22           1.0
23           1.0
24           1.0
25           1.0
26           1.0
27           1.0
28           1.0
29           1.0


### Step 8. Now let's delete the column class

In [8]:
iris = iris.drop(columns=['class'])

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2


### Step 9.  Set the first 3 rows as NaN

In [9]:
iris.iloc[0:3, :] = np.nan

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width
0           NaN          NaN           NaN          NaN
1           NaN          NaN           NaN          NaN
2           NaN          NaN           NaN          NaN
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2


### Step 10.  Delete the rows that have NaN

In [10]:
iris = iris.dropna()

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2
5           5.4          3.9           1.7          0.4
6           4.6          3.4           1.4          0.3
7           5.0          3.4           1.5          0.2


### Step 11. Reset the index so it begins with 0 again

In [11]:
iris = iris.reset_index(drop=True)

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width
0           4.6          3.1           1.5          0.2
1           5.0          3.6           1.4          0.2
2           5.4          3.9           1.7          0.4
3           4.6          3.4           1.4          0.3
4           5.0          3.4           1.5          0.2


### BONUS: Create your own question and answer it.

In [20]:
species = ['setosa'] * 49 + ['versicolor'] * 49 + ['virginica'] * 49  

iris['species'] = species

print(iris.head())


   sepal_length  sepal_width  petal_length  petal_width species
0           4.6          3.1           1.5          0.2  setosa
1           5.0          3.6           1.4          0.2  setosa
2           5.4          3.9           1.7          0.4  setosa
3           4.6          3.4           1.4          0.3  setosa
4           5.0          3.4           1.5          0.2  setosa
