In [185]:
import pandas as pd

In [186]:
def get_mean(series):
    return series.mean()

In [187]:
def get_midrange(series):
    return (series.min() + series.max()) / 2

In [188]:
def get_mode(series):
    return series.mode().values

In [189]:
def get_modality(mode_arr):
    count = mode_arr.size
    if count == 1:
        return "Unimodal"
    elif count == 2:
        return "Bimodal"
    elif count == 3:
        return "Trimodal"
    else:
        return "Multimodal"

In [190]:
def five_num_summ(series):
    summ = pd.Series(0.0, index=['min', 'lowq', 'med', 'upq', 'max'])
    summ['min'] = series.min()
    summ['lowq'] = series.quantile(0.25)
    summ['med'] = series.median()
    summ['upq'] = series.quantile(0.75)
    summ['max'] = series.max()
    
    return summ

In [191]:
def eq_depth_bin(series, depth, by='none'):
    bins = pd.qcut(series, depth, duplicates='drop')
    return bins

In [192]:
def q1_data_statistics(data):   
    print("Q1. Data Statistics")
    for col in data.loc[:, ['C', 'D', 'E', 'F']]:
        series = data[col]
        mean = get_mean(series)
        midrange = get_midrange(series)
        mode = get_mode(series)
        if mode.size == series.size:
            modality = "No mode"
            mode = []
        else:
            modality = get_modality(mode)

        summary = five_num_summ(series)
        print("Column: {}".format(col))
        print("    Mean: {:.4f}".format(mean))
        print("    Midrange: {:.4f}".format(midrange))
        print("    Mode: [ ", end='')
        for val in mode:
            print("{:.4f} ".format(val), end='')
        print("]")
        print("    Modality: {}".format(modality))
        print("    Five Number Summary:")
        for (ind, item) in summary.iteritems():
            print("        {}: {:.4f}".format(ind, item))
        print("\n")

In [193]:
def q2_smoothing(data):
    col = data.loc[:, ['F']]
    bins = eq_depth_bin(col.values.ravel(), 100)

In [194]:
#file_name = input("Enter name of file containing data: ")
file_name = "hwk01.csv"
df = pd.read_csv(file_name)

In [195]:
q1_data_statistics(df)
q2_smoothing(df)

Q1. Data Statistics
Column: C
    Mean: 5184.6630
    Midrange: 5037.0000
    Mode: [ 589.0000 6930.0000 ]
    Modality: Bimodal
    Five Number Summary:
        min: 78.0000
        lowq: 2795.5000
        med: 5180.0000
        upq: 7557.2500
        max: 9996.0000


Column: D
    Mean: -0.0348
    Midrange: -0.0138
    Mode: [ ]
    Modality: No mode
    Five Number Summary:
        min: -3.2984
        lowq: -0.7186
        med: -0.0454
        upq: 0.6769
        max: 3.2708


Column: E
    Mean: 15.4567
    Midrange: 19.1057
    Mode: [ ]
    Modality: No mode
    Five Number Summary:
        min: -16.4951
        lowq: 8.5649
        med: 15.4411
        upq: 22.4839
        max: 54.7065


Column: F
    Mean: 5.9200
    Midrange: 6.0000
    Mode: [ 6.0000 ]
    Modality: Unimodal
    Five Number Summary:
        min: 1.0000
        lowq: 3.0000
        med: 6.0000
        upq: 9.0000
        max: 11.0000


(0.999, 2.0]    175
(2.0, 3.0]       98
(3.0, 4.0]      100
(4.0, 5.0]   