## Let's get the quartiles! I'll try a few diferent approaches.


### `1` - This approach is quite bulky, but I tried to make the code as self-explanatory as possible.

In [1]:
# helper functions
def convert_to_element_number(index):
    return index+1

def convert_to_index(element_number):
    return element_number-1

def is_even(number):
    return number%2 == 0

In [2]:
# function to get mean
def get_mean(numerical_dataset):
    length_of_the_dataset = len(numerical_dataset)
    assert length_of_the_dataset >= 0
    
    if is_even(length_of_the_dataset):
        nr_of_first_number = length_of_the_dataset//2
        nr_of_second_number = length_of_the_dataset//2 + 1

        index_of_first_number = convert_to_index(nr_of_first_number)
        index_of_second_number = convert_to_index(nr_of_second_number)

        first_number = numerical_dataset[index_of_first_number]
        second_number = numerical_dataset[index_of_second_number]

        mean = (first_number+second_number)/2

    else:
        nr_of_central_value = length_of_the_dataset//2 + 1
        index_of_central_value = convert_to_index(nr_of_central_value)
        central_value = numerical_dataset[index_of_central_value]

        mean = central_value

    return mean

In [3]:
# function that divides the data set by the mean, but it has to be sorted...
def divide_by_mean(sorted_dataset):
    data_subset_1 = []
    data_subset_2 = []
    mean = get_mean(sorted_dataset)
    for index in range(0, len(sorted_dataset)):
        if sorted_dataset[index] <= mean:
            data_subset_1.append(sorted_dataset[index])
        else:
            data_subset_2.append(sorted_dataset[index])
    return data_subset_1, data_subset_2

In [4]:
# we can also do something like this
def divide_sorted_set_by_number(sorted_dataset, number):
    data_subset_1 = []
    data_subset_2 = []
    for index in range(0, len(sorted_dataset)):
        if sorted_dataset[index] <= number:
            data_subset_1.append(sorted_dataset[index])
        else:
            data_subset_2.append(sorted_dataset[index])
    return data_subset_1, data_subset_2

Here we use everything that we've coded to actualy implement the algorithm.

In [5]:
def get_quartiles(numerical_dataset):    
    sorted_set = sorted(numerical_dataset)
    quartile_2 = get_mean(sorted_set)
    subset_1, subset_2 = divide_sorted_set_by_number(sorted_set, quartile_2)
    quartile_1 = get_mean(subset_1)
    quartile_3 = get_mean(subset_2)
    return quartile_1, quartile_2, quartile_3

In [6]:
get_quartiles([1,7,8,2,3,6,9,4,5,10,1,2,1,4,5])

(2.0, 4, 7)

#### Profit!

### `2` - Now let's do all of this in a more fasionable way. Less lines, more profit. :]

In [7]:
def get_mean_and_divide(dataset):
    ds_length = len(dataset)
    assert ds_length >= 0
    dataset.sort()    
    middle = ds_length // 2
    mean = dataset[middle]
    if not ds_length%2:
        mean = (mean+dataset[middle-1])/2
    return [number for number in dataset if number <= mean], [number for number in dataset if number > mean], mean
    

In [8]:
def quartiles(dataset):
    subset_1, subset_2, quartile_2 = get_mean_and_divide(dataset)
    _, _, quartile_1 = get_mean_and_divide(subset_1)
    _, _, quartile_3 = get_mean_and_divide(subset_2)
    return quartile_1, quartile_2, quartile_3

In [9]:
quartiles([1,7,8,2,3,6,9,4,5,10,1,2,1,4,5])

(2.0, 4, 7)

#### Profit!

### `2)` We can also speed this up a bit by just adding a function that only get's the mean

In [12]:
def mean(dataset):
    ds_length = len(dataset)
    assert ds_length >= 0    
    dataset.sort()  # notice this for later reference 
    mean = dataset[ds_length // 2]
    return mean if ds_length%2 else (mean+dataset[middle-1])/2

We don't have to worry about sorting the set to divide it by the given value:

In [13]:
def divide(dataset, division_value):
    ds_length = len(dataset)
    assert ds_length >= 0
    return [number for number in dataset if number <= division_value], [number for number in dataset if number > division_value]

And since our mean() function sorts the set to find the mean everything works fine here:

In [14]:
def quartiles_(dataset):
    quartile_2 = mean(dataset)
    subset_1, subset_2 = divide(dataset, mean)    
    return mean(subset_1), quartile_2, mean(subset_2)

In [15]:
quartiles([1,7,8,2,3,6,9,4,5,10,1,2,1,4,5])

(2.0, 4, 7)

#### Profit!

## So there are many ways of achieving the same task. Pick your favourite! :D