In [14]:
''' 
Author: Navado (Romaine) Wray
Date: 20200129
CS301-006, Professor Watson
HW02 Solution
Calculate the quantiles, min, max of a small dataset using different interpolation methods such as R8.
Program prints out the outliers and a dictionary with the min/max w/o outliers and the three different 
quantiles (.25, .5, .75) including outliers.  
# Link to the github repo: https://github.com/nrw24/Data-Science
# Link to the relevant git commit: https://github.com/nrw24/Data-Science/blob/master/r8_interpolation.ipynb
# name of the branch: master

'''
import numpy as np
import math

def q_summary_V1(data):    #grades is a list of numbers
    stats_dict = {}
    #detect outliers
    res = removeOutliers_v1(data)
    outliers = res[1]
    noOutliers = res[0]
    #calculate true quantiles w/o outliers
    stats_dict["q1"] = np.quantile(data, .25)
    stats_dict["q2"] = np.quantile(data, .5)
    stats_dict["q3"] = np.quantile(data, .75)
    stats_dict["max"] = np.amax(noOutliers)
    stats_dict["min"] = np.amin(noOutliers)
    #convert list to string
    str1 = ""
    for i in range(len(outliers)):
        str1 += str(outliers[i]) + ","
    
    str1 = str1[:-1]
    
    #print results 
    print("The following are outliers: [" + str1 + "]" )
    print(stats_dict)

def removeOutliers_v1(data):
    arr = np.array(data)
    q1 = np.quantile(arr, .25)
    q3 = np.quantile(arr, .75)
    iqr = q3 - q1 
    high = q3 + 1.5*iqr
    low = q1 - 1.5*iqr
    outliers = []
    result = []
    
    for i in arr: 
        if i >= low and i <= high:
            result.append(i)
        else:
            outliers.append(i)
    return (result, outliers)

def get_quantile_indexes(n, p):
    '''
    Calculate index values as input for interpolation
    
    input
    _____
    n (list) - the list of numbers
    p (float) - the percentage corresponding to the quantile (ex:0.25)
    
    output
    ______
    l, i, u (float) - the corresponding x1, x, x1 values as calculated via class
    '''
    i = p*(n+1) - 1
    l = np.floor(i)
    u = np.ceil(i)
    return l,i,u


def get_quantile(nums, q):
    '''
    Use the in-class method of interpolation to calculate the quantile
    
    input
    _____
    nums (list) - the list of numbers
    q (float) - the quantile desired
    
    output
    ______
    y (int/float) - this is the value in nums at the given quantile q
    '''
    x1, x, x2 = get_quantile_indexes(len(nums), q)
    if np.floor(x) == x1 and np.ceil(x) == x1:
        x = int(x)
        y = nums[x]
        # print('x = {}, y = {}'.format(x, y))
        return y
        
    # print('x1, x, x2 = {}, {}, {}'.format(x1, x, x2))
    y1, y2 = nums[int(x1)], nums[int(x2)]
    # print('y1, y2 = {}, {}'.format(y1, y2))
    y = (x - x1) * (y1-y2) / (x1 - x2) + y1
    # print('y =', y)
    return y

def removeOutliers_v2(data):
    data = sorted(data)
    q1 = get_quantile(data, .25)
    q3 = get_quantile(data, .75)
    iqr = q3 - q1 
    high = q3 + 1.5*iqr
    low = q1 - 1.5*iqr
    outliers = []
    result = []
    
    for i in data: 
        if i >= low and i <= high:
            result.append(i)
        else:
            outliers.append(i)
            
    return (result, outliers)
    
def q_summary_V2(data):
    stats_dict = {}
    data = sorted(data)
    #get Outliers
    res = removeOutliers_v2(data)
    outliers = res[1]
    noOutliers = res[0]
    
    #calculate true quantiles w/o outliers
    stats_dict["q1"] = get_quantile(data, .25)
    stats_dict["q2"] = get_quantile(data, .5)
    stats_dict["q3"] = get_quantile(data, .75)
    stats_dict["max"] = np.amax(noOutliers)
    stats_dict["min"] = np.amin(noOutliers)
    
    #convert list to string
    str1 = ""
    for i in range(len(outliers)):
        str1 += str(outliers[i]) + ","
    
    str1 = str1[:-1]
    
    #print results 
    print("The following are outliers: [" + str1 + "]" )
    print(stats_dict)
    


grades = [0, 0, 14, 35, 63, 66, 75, 77, 78, 80, 81, 81, 87, 89, 90, 91, 100]
times = [5, 10, 10, 15, 15, 15, 15, 20, 20, 20, 25, 30, 30, 40, 40, 45, 60, 60, 65, 89]

q_summary_V1(grades)
q_summary_V1(times)

q_summary_V2(grades)
q_summary_V2(times)

The following are outliers: [0,0,14]
{'q1': 63.0, 'q2': 78.0, 'q3': 87.0, 'max': 100, 'min': 35}
The following are outliers: [89]
{'q1': 15.0, 'q2': 22.5, 'q3': 41.25, 'max': 65, 'min': 5}
The following are outliers: []
{'q1': 49.0, 'q2': 78, 'q3': 88.0, 'max': 100, 'min': 0}
The following are outliers: [89]
{'q1': 15.0, 'q2': 22.5, 'q3': 43.75, 'max': 65, 'min': 5}
