## IMPORT LIBRARY

In [1]:
import random  # to generate test data


## STEMPLOT FUNCTIONS

### CREATES SINGLE STEMPLOT FROM INTEGER DATA

In [2]:
def create_int_stemplot(x, l=True, p=True, leaf_order=False):
    """ Return a dictionary where each key is a 'stem' and its associated value is a string of ordered 'leaves'. 
    
    Arguments: x, the data, is a 1D list of integers.
    l is a Boolean where l=True means to print a legend.
    p is a Boolean where p=True means to print the stemplot.
    leaf_order is a Boolean used only when create_int_stemplot() is called by compare_stemplots().
    """
    
    # print legend for plot
    if l:
        print 'LEGEND'
        print '\t----------------'
        print '\t00|1 shows that x contains the number 1' 
        print '\t12|25 shows that x contains 122, 125'
        print '\t----------------'
        
        
    # prepend zeros so len(x[i]) == len(x[j]) for all i,j
    offset = max([len(str(i)) for i in x])
    for i in range(len(x)):
        x[i] = str(x[i])
        while len(x[i]) < offset:
            x[i] = '0' + x[i]
 

    # create dictionary from data: stemplot = {'stem': ['leaf', 'leaf']}
    # store 1s digit of each x[i] according to its 10s and 100s digit
    # e.g. if x[i]=001, x[j]=002, x[k]=006, stemplot = {'00': ['1','2','6']}
    # e.g. if x[i]=021 and x[j]=218, stemplot = {'02': ['1'], '21', ['8']}
    stemplot = dict()
    for em in x:
        # if stem is already in dictionary
        if em[:-1] in stemplot:
            # append the new leaf
            stemplot[em[:-1]] = stemplot[em[:-1]] + em[-1]
        else:
            # create a stem with its first leaf
            stemplot[em[:-1]] = em[-1]
 

    # make each dictionary value, currently a list of characters, into a string of characters
    # e.g. stemplot = {'00': ['1','2','6']} becomes stemplot = {'00': '126'}
    # sort dictionary values; sort in descending order if leaf_order = True
    for key,value in stemplot.items():
        stemplot[key] = ''.join(sorted(value, reverse=leaf_order))
            
            
    # prints stemplot as column of stem|leaves
    # e.g. 08|012446
    #      09|002
    if p:
        print 'DATA'
        for i in sorted(stemplot.keys()):
            print '\t' + i + '|' + stemplot[i]
        
        
    return stemplot

### CREATES SINGLE STEMPLOT FROM FLOAT DATA

In [3]:
def create_float_stemplot(x, p=True, l=False, leaf_order=True, precision=2):
    """ Return a dictionary where each key is a 'stem' and its associated value is a string of ordered 'leaves'. 
    
    Arguments: x, the data, is a 1D list of integers.
    l is a Boolean where l=True means to print a legend.
    p is a Boolean where p=True means to print the stemplot.
    leaf_order is a Boolean used when create_int_stemplot() is called by compare_stemplots().
    precision is an integer specifying the number of digits for rounding x.
    """
    
    # need when create_float_stemplot() is called by compare_stemplots()
    x = [float(i) for i in x]
    
    
    # round data to desired precision, default = 2 decimal places    
    # pad with trailing zeros, e.g. 2.1 becomes 2.100 if precision=3
    # prepend zeros, e.g. 2.1 becomes 002.1
    offset = max([str(i).find('.') for i in x])
    for i in range(len(x)):
        x[i] = str(round(x[i], precision))
        a,b = x[i].split('.')
        while len(b) < precision:
            b = b + '0'
        while len(a) < offset:
            a = '0' + a
        x[i] = '.'.join([a, b]) 
    
    
    # print legend for plot
    if l:
        print 'LEGEND'
        print '\t----------------'
        print '\t12|25 shows that x contains 12.2, 12.5'
        print '\t1.2|25 shows that x contains 1.22, 1.25'
        print '\t----------------'
   

    # create dictionary from data: stemplot = {'stem': ['leaf', 'leaf']}
    # store 1s digit of each x[i] according to its 10s and 100s digit
    # e.g. if x[i] = 001, x[j] = 002, x[k] = 006, stemplot = {'00': ['1','2','6']}
    # e.g. if x[i] = 021 and x[j] = 218, stemplot = {'02': ['1'], '21', ['8']}
    stemplot = dict()
    for em in x:
        # if stem is already in dictionary
        if em[:-1] in stemplot:
            # append the new leaf
            stemplot[em[:-1]] = stemplot[em[:-1]] + em[-1]
        else:
            # create a stem with its first leaf
            stemplot[em[:-1]] = em[-1]

            
    # reformat dictionary keys when stem-end coincides with decimal place  
    # e.g. stemplot['124.': '1'] becomes stemplot['124': '1']
    if precision == 1:
        for key in stemplot.keys(): stemplot[key[:-1]] = stemplot.pop(key) 

            
    # make each dictionary value, currently a list of characters, into a string of characters
    # e.g. stemplot = {'00': ['1','2','6']} becomes stemplot = {'00': '126'}
    # sort dictionary values; sort in descending order if leaf_order = True
    for key,value in stemplot.items():
        stemplot[key] = ''.join(sorted(value, reverse=leaf_order))
            
            
    # prints stemplot as column of stem|leaves
    # e.g. 08|012446
    #      09|002
    if p:
        print 'DATA'
        for key in sorted(stemplot.keys()):
            print '\t' + key + '|' + stemplot[key]
    
    
    return stemplot

### CREATES TWO STEMPLOTS FROM INTEGER OR FLOATS DATA

In [4]:
def compare_stemplots(x, y, integer_data=True, precision=2):
    """ No return value; function prints two stemplots that share their stem.
    
    Arguments: x and y, the data, are two 1D lists of integers; can be floats or ints.
    integer_data is a Boolean where integer_data = True means to call create_int_stemplot() on x and y; 
    integer_data = False means to call create_float_stemplot() on x and y.
    precision is an integer specifying the number of digits for rounding x, y when calling create_float_stemplot().
    """
        
    # create stemplots for floats or integer data
    # leaf_order is a Boolean used to mirror x data around its stem shared with y
    if integer_data:
        x_stemplot = create_int_stemplot(x, p=False, l=False, leaf_order=True)
        y_stemplot = create_int_stemplot(y, p=False, l=False)
    else:
        x_stemplot = create_float_stemplot(x, p=False, l=False, leaf_order=True, precision=precision)
        y_stemplot = create_float_stemplot(y, p=False, l=False, precision=precision)
        
    # print legend for floats or integer data
    print 'LEGEND \n \t----------------'
    if integer_data:
        print '\t' + '1'.rjust(3) + '|00|1 shows that x and y both contain the number 1' 
        print '\t850|12|25 shows that x contains numbers 128, 125, 120 and y contains 122, 125'
    else:
        print '\t' + '1'.rjust(3) + '|00|1 shows that x and y both contain the number 0.1' 
        print '\t850|1.2|25 shows that x contains numbers 1.28, 1.25, 1.20 and y contains 1.22, 1.25'
    print '\t----------------'
        
    
    # print floats or integer data as column of x_leaves|stem|y_leaves
    # e.g. 10|13|125
    # if x has no leaves but y does, print **|00|114
    # if y has no leaves but x does, print 61|02|***
    print 'DATA'
    for em in sorted(set(x_stemplot.keys()).union(set(y_stemplot.keys()))):  # sorted keys for both dictionaries
        offset = max([len(val) for val in x_stemplot.values()])  # used to justify text
        spacer = '*'*offset  # used to justify text
        try: print '\t', x_stemplot[em].rjust(offset) + '|' + em + '|' + y_stemplot[em]  # x_leaves|stem|y_values
        except: 
            try: print x_stemplot[em].rjust(offset) + '|' + em + '|' + spacer  # x_leaves|stem|*****
            except: print spacer.rjust(offset) + '|' + em + '|' + y_stemplot[em]  # ****|stem|y_values      

## GENERATE TEST DATA & CALL FUNCTIONS

In [5]:
pop_x = range(0,300,1)
xn = 50
x1 = random.sample(pop_x, xn) + random.sample(pop_x, xn)  # integer test data
x2 = random.sample(pop_x, xn) + random.sample(pop_x, xn)

In [6]:
x1_stemplot = create_int_stemplot(x1, l=True)

LEGEND
	----------------
	00|1 shows that x contains the number 1
	12|25 shows that x contains 122, 125
	----------------
DATA
	00|478
	01|014679
	02|57
	03|33469
	04|3
	05|26
	06|567799
	07|2
	08|46
	09|56
	10|2
	11|589
	12|012469
	13|259
	14|036778
	15|3
	16|489
	17|79
	18|1224677
	19|089
	20|89
	21|289
	22|04579
	23|4457
	24|4
	25|3779
	26|0129
	27|78
	28|7899
	29|013459


In [7]:
x1_x2_stemplot = compare_stemplots(x1, x2)

LEGEND 
 	----------------
	  1|00|1 shows that x and y both contain the number 1
	850|12|25 shows that x contains numbers 128, 125, 120 and y contains 122, 125
	----------------
DATA
	    874|00|3579
	 976410|01|458
	     75|02|*******
	  96433|03|389
	      3|04|578
	     62|05|168
	 997765|06|3457
	      2|07|044
	     64|08|27
	     65|09|234579
	      2|10|35667
	    985|11|035
	 964210|12|25688
	    952|13|3
	 877630|14|36
	      3|15|5
	    984|16|1137
	     97|17|035688
	7764221|18|1456
	    980|19|568
	     98|20|02489
	    982|21|0258
	  97540|22|06
	   7544|23|1359
	      4|24|467
	   9773|25|37
	   9210|26|0278
	     87|27|02679
	   9987|28|017
	 954310|29|468


In [8]:
pop_y = range(0,20,1)
yn=20
y1 = [i + random.uniform(0,10) for i in random.sample(pop_y, yn)]  # floats test data
y1 = [i + random.uniform(0,10) for i in y1]
y2 = [i + random.uniform(0,10) for i in random.sample(pop_y, yn)]

In [9]:
y1_stemplot = create_float_stemplot(y1, l=True, precision=3)

LEGEND
	----------------
	12|25 shows that x contains 12.2, 12.5
	1.2|25 shows that x contains 1.22, 1.25
	----------------
DATA
	05.00|9
	09.23|4
	11.61|2
	12.50|7
	14.47|7
	16.36|3
	17.42|6
	17.58|8
	18.25|6
	18.71|3
	19.19|3
	20.16|6
	20.39|4
	21.07|3
	22.29|8
	22.54|9
	25.20|4
	25.65|0
	32.71|4
	34.62|9


In [10]:
y1_y2_stemplot = compare_stemplots(y1, y2, integer_data=False, precision=1)

LEGEND 
 	----------------
	  1|00|1 shows that x and y both contain the number 0.1
	850|1.2|25 shows that x contains numbers 1.28, 1.25, 1.20 and y contains 1.22, 1.25
	----------------
DATA
	 0|05|6
	**|07|3
	 2|09|80
	 6|11|9
	 5|12|974
	**|13|2
	 5|14|42
	**|15|4
	 4|16|81
	64|17|**
	73|18|631
	 2|19|7
	42|20|**
	 1|21|**
	53|22|**
	72|25|3
	**|26|1
	 7|32|**
	 6|34|**
