# Data analysis using Numpy

In [1]:
import numpy as np
import sklearn.datasets as datasets

In [2]:
data = datasets.load_iris().data

# About the data set

The variable `data` is a numpy array with 150 measurements of samples of flowers.  Each sample has 4 measurements for the sepal length, sepal width, petal length and petal width.  All measurements are done in cm.

In [3]:
# Here is a sample of the measurements:  h

data[:10]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

# ✎ Shape of data

Implement a function that returns the shape of the numpy array as a tuple.

In [4]:
def get_data_shape():
    return data.shape
    

# ✎ last few samples

Implement a function that returns the last _n_ samples.

Parameter:

- `n`: the number of samples to return from the _end_ of data

Returns:

- A numpy array of shape `(n, 4)`

In [5]:
def get_last_samples(n):
    return data[n:]

# ✎ Sepal width

Implement a function to return the sepal width for all the
flowers.  This should be the second column of the data array.
Remember the array indices start with 0.

Parameter: none

Returns:
- A numpy array of shape `(n)`.

In [6]:
def get_sepal_width():
    return n
    pass

# ✎ Petal area

Implement a function to compute the area of the petals.  We approximate the shape of a petal as a rectangle, so the area is computed as its width times length.

Parameter: none

Returns:

- A numpy array of shape `(n)`.

In [7]:
def get_petal_area():
    return data[:,2] * data[:,3]


# ✎ Samples in top percentile

Implement a function that returns the __samples__ in the top `p` percentile as measured by their petal sizes.

Parameters:

- `p`: the percentile between 0 and 100

Returns:
- A numpy array of shape `(k, 4)` where `k` is some integer corresponding to the number of samples

In [8]:
def get_samples_top_percentile_by_petal_area(p):
    flower_areas = get_petal_area()
    area_cutoff = np.percentile(flower_areas, p)
    top_areas = [x for x in data if (x[2] * x[3] >= area_cutoff)]
    
    return (np.array(top_areas))


# ✎ Samples in __bottom__ percentile

Implement a function that returns the __samples__ in the __bottom__ `p` percentile as measured by their petal sizes.

Parameters:

- `p`: the percentile between 0 and 100

Returns:
- A numpy array of shape `(k, 4)` where `k` is some integer corresponding to the number of samples

In [9]:
def get_samples_bottom_percentile_by_petal_area(p):
    flower_areas = get_petal_area()
    area_cutoff = np.percentile(flower_areas, p)
    bottom_areas = [x for x in data if (x[2] * x[3] <= area_cutoff)]
    
    return (np.array(bottom_areas))


# Test your code before submitting

In [10]:
get_petal_area().shape

(150,)

In [11]:
get_samples_bottom_percentile_by_petal_area(99)

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [12]:
len(get_samples_top_percentile_by_petal_area(100)) == 1

True

In [13]:
get_samples_bottom_percentile_by_petal_area(1) is not None

True

In [14]:
# sanity check
assert(get_data_shape() is not None)

In [15]:
assert(get_samples_top_percentile_by_petal_area(99) is not None)

In [16]:
assert(len(get_samples_top_percentile_by_petal_area(100)) == 1)

In [17]:
assert(get_samples_bottom_percentile_by_petal_area(1) is not None)

In [18]:
assert(len(get_samples_bottom_percentile_by_petal_area(0)) == 1)