Functions to classify the data set of letters.

In [20]:
from numpy import genfromtxt
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy import ndimage
import PIL
from persim import plot_diagrams
from ripser import ripser, lower_star_img
import csv
import math
import warnings
import random
import copy

# Function to return the betti numbers for dimensions 0 and 1, of the simplicial complex
# created form a Rips filtration of a point cloud.
def betti_nums(data, sType, scanStart, scanStop, eps = 100, showPlot = False, doSlice = False):
    '''Inputs: -data: Your data set. Should be a list of 1's and 0's.
    
               -Stype: Scanning type. Should be a string either, 'ud'(up-and-down),
                       'lr'(left-to-right), or 'rl'(right-to-left). 
                       *For down-and-up scanning, fix scanStop at the last index of 
                        your list, and alter scanStart.*
                       **For slices, choose 'ud' for horizontal slices. Then choose scanStart 
                         and scanStop to match where you want to start and end your slice.**
                       ***For middle-out scanning, repeat the slicing procedure, but
                          increase scanStart and scanStop at the same time to gradually
                          increment the size of the slice.***
                       
               -scanStart: Where to start scanning. For 'ud' and 'lr' scanning
                           fix this to be the first index.
                
               -scanStop: Where to stop scanning. For 'du' scanning
                          fix this to be the last index.
                          
               -eps: The max distance in a Rips filtration. This becomes our, "infinity."
                     This is initialized to 100. For more precision, decrease this to fit
                     your data set.
                     
               -showPlot: Whether or not to show the PH diagrams. Initialized to False, change
                          to True to show PH diagrams.
                
               -doSlice: Whether or not to so slice scanning. Initialized to False. Set to
                         True if you wish to do slice scanning.
        
       Outputs: -(b0, b1): A tuple containing betti0 and betti1 for the scanning data set,
                           depending on the scanning type and how much was scanned.
    '''
    
    letter = np.array([[0,0]])    # Initializes an array of (1x2)-arrays.
    
    # Up-and-Down scanning.
    if sType.lower() == 'ud':
        
        # Loops through the line with the letter data and if the kth position
        # of the array is a 1, then the coordinates of that 1 is added as a
        # 1x2 array to the letter array.
        for k in range(scanStart,scanStop):
            if data[k] == 1:
                col = 10-int((k-1)/10)
                row = (k-1)%10
                letter = np.append(letter,[[row, col]], axis = 0)
        
        # Removes the first entry of letter, as it was a placeholder.
        letter = np.delete(letter, 0, 0)
        
        # Test for the space (or just nothing), otherwise, the function breaks.
        if len(letter) == 0:
            return (0,0)
        
        # Filters the point cloud, using a Rips filtration up to a max dimension of 2.
        diagrams = ripser(letter, maxdim = 1, metric = 'euclidean',thresh = eps)['dgms']
        
        # If the you want to see the shape of the data and the persistence diagram
        # both are displayed.
        if showPlot == True:
            # Setting the x and y coordinates to be plotted to show the shape
            # of the data.
            x_data = [letter[i][0] for i in range(0,len(letter))]
            y_data = [letter[i][1] for i in range(0,len(letter))]
        
            # Plots the shape of the data
            plt.figure(1)
            plt.plot(x_data, y_data, 'ro', scalex = True, scaley = True)
            axes = plt.gca()
            axes.set_xlim([-1,11])
            axes.set_ylim([-1,11])
        
            # Plots the PH diagram of the complex created from the Rips filtration
            plt.figure(2)
            plot_diagrams(diagrams, show=True, xy_range = [-0.25,5,-0.25,5])
            
    # Left-to-Right Scanning:
    if sType.lower() == 'lr':
        
        # Tests if the the function is going to do left-to-right
        # slice scanning or not. The scanning interval is changed
        # accordingly.
        if doSlice == False:
            scanStart2 = 1
            scanStop2 = 101
        else:
            scanStart2 = scanStart
            scanStop2 = 101
        
        # Loops through the line with the letter data and if the kth position
        # of the array is a 1, then the coordinates of that 1 is added as a
        # 1x2 array to the letter array.
        for k in range(scanStart2,scanStop2):
            if data[k] == 1:
                row = 10-int((k-1)/10)
                col = (k-1)%10
                letter = np.append(letter,[[row, col]], axis = 0)
        
        # Removes the first entry of letter, as it was a placeholder.
        letter = np.delete(letter, 0, 0)
        
        # Test for the space (or just nothing), otherwise, the function breaks.
        if len(letter) == 0:
            return (0,0)
        
        # Rearranges the xy-coordinates to do left-to-right scanning.
        y_data = np.array([letter[i][0] for i in range(0,len(letter)) for j in range(1,int(scanStop/10)) if letter[i][1] == j])
        x_data = np.array([letter[i][1] for i in range(0,len(letter)) for j in range(1,int(scanStop/10)) if letter[i][1] == j])
        
        # Creates a new array with the the xy-coordinates oriented properly
        # for left-to-right scanning.
        letter2 = np.array([x_data,y_data])
        letter2 = np.transpose(letter2)
        
        # Tests again to make sure there is something in the letter2 array.
        # If there isn't, b0 = b1 = 0.
        if len(letter2) == 0:
            return (0,0)
        
        # Filters the point cloud, using a Rips filtration up to a max dimension of 2.
        diagrams = ripser(letter2, maxdim = 1, metric = 'euclidean',thresh = eps)['dgms']
        
        # If the you want to see the shape of the data and the persistence diagram
        # both are displayed.
        if showPlot == True:
            # Plots the shape of the data
            plt.figure(1)
            plt.plot(x_data, y_data, 'ro', scalex = True, scaley = True)
            axes = plt.gca()
            axes.set_xlim([-1,11])
            axes.set_ylim([-1,11])
        
            # Plots the PH diagram of the complex created from the Rips filtration
            plt.figure(2)
            plot_diagrams(diagrams, show=True, xy_range = [-0.25,5,-0.25,5])
    
    
    # Right-to-Left Scanning:
    if sType.lower() == 'rl':
        
        # Loops through the line with the letter data and if the kth position
        # of the array is a 1, then the coordinates of that 1 is added as a
        # 1x2 array to the letter array.
        for k in range(1,101):
            if data[k] == 1:
                row = 10-int((k-1)/10)
                col = 10-(k-1)%10
                letter = np.append(letter,[[row, col]], axis = 0)
    
        # Removes the first entry of letter, as it was a placeholder.
        letter = np.delete(letter, 0, 0)
        
        # Test for the space (or just nothing), otherwise, the function breaks.
        if len(letter) == 0:
            return (0,0)
        
        # Rearranges the xy-coordinates to do right-to-left scanning.
        y_data = np.array([letter[i][0] for i in range(0,len(letter)) for j in range(1,int(scanStop/10)) if letter[i][1] == j])
        x_data = np.array([letter[i][1] for i in range(0,len(letter)) for j in range(1,int(scanStop/10)) if letter[i][1] == j])
        
        # Creates a new array with the the xy-coordinates oriented properly
        # for Right-to-Left scanning.
        letter2 = np.array([x_data,y_data])
        letter2 = np.transpose(letter2)
        
        # Tests again to make sure there is something in the letter2 array.
        # If there isn't, b0 = b1 = 0.
        if len(letter2) == 0:
            return (0,0)
        
        # Filters the point cloud, using a Rips filtration up to a max dimension of 2.
        diagrams = ripser(letter2, maxdim = 1, metric = 'euclidean',thresh = eps)['dgms']
        
        # If the you want to see the shape of the data and the persistence diagram
        # both are displayed.
        if showPlot == True:
            # Plots the shape of the data
            plt.figure(1)
            plt.plot(x_data, y_data, 'ro', scalex = True, scaley = True)
            axes = plt.gca()
            axes.set_xlim([-1,11])
            axes.set_ylim([-1,11])
        
            # Plots the PH diagram of the complex created from the Rips filtration
            plt.figure(2)
            plot_diagrams(diagrams, show=True, xy_range = [-0.25,5,-0.25,5])
        
    # Calculates the 0-dim betti number.
    betti0 = [1 for x in diagrams[0] if float('inf') in x]
            
    # If there is a 1-dim betti number, it is returned.
    if len(diagrams) == 2:
        
        # Calculates the 1-dim betti number.
        betti1 = [1 for x in diagrams[1] if float('inf') in x]
        
        # Returns the betti numbers.
        return (len(betti0),len(betti1))
    
    # Otherwise b1 = 0.
    else:
        # Returns the betti numbers.
        return(len(betti0),0)

    
# Function to create classification vectors for data sets.
def classificationVector(dataSet, flag):
    '''Inputs: -dataSet: The data set to be analyzed.
               -flag: Flag to control single vector analysis or whole data set analysis.
                       Either 'single' or 'whole'.
    
       Outputs: -dataVect: A matrix containing vectors of weights corresponding
                           to signatures from the data set. (Called dataVect for
                           a single vector, or totalDataVect for the whole set.)
    '''
    
    # List with the scanning types to iterate through.
    scanTypes = ['ud', 'du', 'lr', 'mo-hori', 'dist1', 'ptdim', 'shortlr','ends', 'width']
    
    # Creates classification vectors for each character in the data set.
    if flag.lower() == 'whole':
        
        # List holding numpy arrays of each classification vector.
        totalDataVect = []
    
        j = 0    # loop control variable.
    
        # Iterates through the data set and creates classification vectors
        # for each character.
        while j != len(dataSet):
            
            letter_one_line = dataSet[j,:]    # Loads the data for one character from the data set.
            dataVect = np.array([])           # Creates an empty array to hold the classification features
        
            # Finds the total betti numbers for each data set and places it in the positions 0 and 1
            # of the vector.
            (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 1.42, showPlot = False, doSlice = False)
            dataVect = np.append(dataVect,b0)
            dataVect = np.append(dataVect,b1)
        
            # Iterating through the different scan types and adding to dataVect
            # features depending on the scan type. Each feature is a number between
            # 0 and 1. For betti numbers this is calculated as (100-bn)/100, where
            # n = 0 or 1. For width this is calulated as width/100.
            for sc in scanTypes:
                
                # bottom-to-top scanning is done on the bottom half of the character.
                if sc.lower() == 'du':
                    (b0,b1) = betti_nums(letter_one_line, 'ud', 45, 101, eps = 1.42, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
        
                # Horizontal middle-out scanning on the middle third of the character.
                elif sc.lower() == 'mo-hori':
                    (b0,b1) = betti_nums(letter_one_line, 'ud', 25, 76, eps = 1.42, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
        
                # Width scanning. The scan interval is kept at a fixed [50-i, 50+i] and i is increased until
                # either b1 is greater than or equal to 1, or the entire character is scanned.
                # This width gives the 'time' when the first 1D 'hole' is found. This is calculated as
                # the length of the interval which is 50+i - (50-i) = 2*i. It is then reweighted with division
                # by 100.
                elif sc.lower() == 'width':
                    
                    scanWidth = 0    # Initializes scanWidth to be zero.
                    
                    # Increases the width in increments of 10, with i ranging from 1 to 50.
                    for i in range(1,50,10):
                        
                        # The beginning and end of the scan interval.
                        scStart = 50-i
                        scStop = 50+i
                        
                        # Calculates the betti numbers for the character at the current width.
                        (b0,b1) = betti_nums(letter_one_line, 'lr', scStart, scStop, eps = 1.42, showPlot = False, doSlice = False)

                        # If b1 is greater than or equal to 1, the width calculated as 2*i, and the loop is broken.
                        if b1 >= 1:
                            scanWidth = 2*i
                            break
                    
                    # Appends the width to the classification vector.
                    dataVect = np.append(dataVect,scanWidth/100)
                    
                    scanWidth = 0    # Resets scanWidth back to zero.
        
                # Up-and-Down scanning, but with the 'infinity' being equal to 1. This connects the skeleton
                # of the character, but without filling in the 'holes' that get filled in a distance of ~1.42.
                elif sc.lower() == 'dist1':
                    (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 1.0, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
        
                # Calculates the number of 'pixels' in the character. This is equivalent to the number of points
                # in the point cloud (i.e. the number of 1's in the data for the character).
                elif sc.lower() == 'ptdim':
                    (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 0, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
            
                # Left-to-Right scanning in a shorter interval than normal. This is done to pick up any 'endpoints'
                # of characters that are missed in the normal left-to-right scan.
                elif sc.lower() == 'shortlr':
                    (b0,b1) = betti_nums(letter_one_line, 'lr', 1, 40, eps = 1.42, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
                    
                # Short scanning Up-and-Down, Down-and-Up, and Left-to-Right. Effectively calculating the ends
                # of the structure if the middle is excised.
                elif sc.lower() == 'ends':
                    (b0u,b1u) = betti_nums(letter_one_line,'ud', 1,21, eps = 1.42, showPlot = False, doSlice = False)
                    (b0d,b1d) = betti_nums(letter_one_line,'ud', 80,101, eps = 1.42, showPlot = False, doSlice = False)
                    (b0l,b1l) = betti_nums(letter_one_line,'lr', 1,31, eps = 1.42, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect, (100-(b0u+b0d+b0l))/100)
            
                
                # The remaining scan types (ud and lr) on half of the character.
                else:
                    (b0,b1) = betti_nums(letter_one_line, sc, 1, 86, eps = 1.42, showPlot = False, doSlice = False)
                    dataVect = np.append(dataVect,(100-b0)/100)
                    dataVect = np.append(dataVect,(100-b1)/100)
        
            # Adds the classifcation vector for the jth character to the total list of vectors.
            totalDataVect.append(dataVect)
            
            # Increments j.
            j += 1
        
        # Returns the total list of classification vectors.
        return totalDataVect
    
    # Creates a classification vector for one character.
    elif flag.lower() == 'single':
        
        letter_one_line = dataSet    # Loads the data for the one character.
        dataVect = np.array([])      # Creates an empty array to hold the classification features
        
        # Finds the total betti numbers for each data set and places it in the positions 0 and 1
        # of the vector.
        (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 1.42, showPlot = False, doSlice = False)
        dataVect = np.append(dataVect,b0)
        dataVect = np.append(dataVect,b1)
        
        # Iterating through the different scan types and adding to dataVect
        # features depending on the scan type. Each feature is a number between
        # 0 and 1. For betti numbers this is calculated as (100-bn)/100, where
        # n = 0 or 1. For width this is calulated as width/100.
        for sc in scanTypes:
            
            # bottom-to-top scanning is done on the bottom half of the character.
            if sc.lower() == 'du':
                (b0,b1) = betti_nums(letter_one_line, 'ud', 45, 101, eps = 1.42, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
            
            # Horizontal middle-out scanning on the middle third of the character.
            elif sc.lower() == 'mo-hori':
                (b0,b1) = betti_nums(letter_one_line, 'ud', 25, 76, eps = 1.42, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
            
            # Width scanning. The scan interval is kept at a fixed [50-i, 50+i] and i is increased until
            # either b1 is greater than or equal to 1, or the entire character is scanned.
            # This width gives the 'time' when the first 1D 'hole' is found. This is calculated as
            # the width of the interval which is 50+i - (50-i) = 2*i. It is then reweighted with division
            # by 100.
            elif sc.lower() == 'width':
                
                scanWidth = 0    # Initializes scanWidth to be zero.
                
                # Increases the width in increments of 10, with i ranging from 1 to 50.
                for i in range(1,50,10):
                    
                    # The beginning and end of the scan interval.
                    scStart = 50-i
                    scStop = 50+i
                    
                    # Calculates the betti numbers for the character at the current width.
                    (b0,b1) = betti_nums(letter_one_line, 'lr', scStart, scStop, eps = 1.42, showPlot = False, doSlice = False)

                    # If b1 is greater than or equal to 1, the width calculated as 2*i, and the loop is broken.
                    if b1 >= 1:
                        scanWidth = 2*i
                        break
                
                # Appends the width to the classification vector.
                dataVect = np.append(dataVect,scanWidth/100)
                
                scanWidth = 0    # Resets scanWidth back to zero.
        
            # Up-and-Down scanning, but with the 'infinity' being equal to 1. This connects the skeleton
            # of the character, but without filling in the 'holes' that get filled in a distance of ~1.42.
            elif sc.lower() == 'dist1':
                (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 1.0, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
        
            # Calculates the number of 'pixels' in the character. This is equivalent to the number of points
            # in the point cloud (i.e. the number of 1's in the data for the character).
            elif sc.lower() == 'ptdim':
                (b0,b1) = betti_nums(letter_one_line, 'ud', 1, 101, eps = 0, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
            
            # Left-to-Right scanning in a shorter interval than normal. This is done to pick up any 'endpoints'
            # of characters that are missed in the normal left-to-right scan.
            elif sc.lower() == 'shortlr':
                (b0,b1) = betti_nums(letter_one_line, 'lr', 1, 40, eps = 1.42, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
            
            # Short scanning Up-and-Down, Down-and-Up, and Left-to-Right. Effectively calculating the ends
            # of the structure if the middle is excised.
            elif sc.lower() == 'ends':
                (b0u,b1u) = betti_nums(letter_one_line,'ud', 1,21, eps = 1.42, showPlot = False, doSlice = False)
                (b0d,b1d) = betti_nums(letter_one_line,'ud', 80,101, eps = 1.42, showPlot = False, doSlice = False)
                (b0l,b1l) = betti_nums(letter_one_line,'lr', 1,31, eps = 1.42, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect, (100-(b0u+b0d+b0l))/100)
            
            # The remaining scan types (ud and lr) on half of the character.
            else:
                (b0,b1) = betti_nums(letter_one_line, sc, 1, 86, eps = 1.42, showPlot = False, doSlice = False)
                dataVect = np.append(dataVect,(100-b0)/100)
                dataVect = np.append(dataVect,(100-b1)/100)
        
        return dataVect    # Returns the single classification vector.
    

# Function to compare the unknown character to the known characters
# and decide which character the uknown is.
def comparison(known, unknown):
    '''Inputs: -known: A list of weight vectors for the letters.
               -unknown: A weight vector for the unknown letter.
        
       Outputs: -unknownLetter: A string containing the classified
                                unknown letter.
    '''
    distance = 0      # The distance between the classification vectors for the known and unknown characters.
    distances = []    # List holding the distances between the unknown vector and the known vectors
    
    # The outer loop ranges from 0 to the length of the list of the known character classification vectors.
    # The inner loop ranges from 0 to the length of the unknown classification vector. Calculates the
    # Euclidean distance between the classification vector for the unknown character and the classification
    # vectors for the known characters. After each pass of the outer loop, the hth distance is added to the
    # distances list.
    for h in range(0,len(known)):
        for i in range(0,len(unknown)):
            distance += (unknown[i]-known[h][i])**2
        distances.append(math.sqrt(distance))
        distance = 0
    
    # Alphabet of the 26 latin letters, 5 punctuation marks, and a space.
    alphabet = 'abcdefghijklmnopqrstuvwxyz., -:;'
    
    # Determines the index of the minimum distance in the distances list.
    clsif = distances.index(min(distances))
    
    # Returns the symbol at index clsif of the alphabet string. This is the
    # decision on which symbol the unknown symbol is.
    return alphabet[clsif].upper()


# Function to add noise to the data set, and attempt to classify the noisy data.
def noiseTests(noiseType, dSet, dSetVect):
    '''Inputs: -noiseType: A string signifying the type of noise to add.
                           Either '0', '1', 'rand', or 'none'.
                        '0': Randomly changes five data points to zeroes.
                        '1': Randomly changes five data points to ones.
                        'rand': Randomly changes five data points to 0 or 1.
                               This is chosen randomly.
                        'none': No noise is added to the data set. This should
                               result in a classification rate of 1.
                               
               -dSet: The data set to be analyzed.
               
               -dSetVect: A list of classification vectors calculated from the data set.
       
       Outputs: -classRate: A floating point number between 0 and 1 signifying
                            the proportion of correctly classified letters to
                            the total number of letters. e.g. if there was 100%
                            classification, this number would be 1.
    
    '''
    # Supresses any warning messages.
    warnings.filterwarnings("ignore")

    # Alphabet of letters.
    alph = 'abcdefghijklmnopqrstuvwxyz., -:;'
    
    if noiseType.lower() == 'none':
        print('Classification Rate without noise: ')
    
    correct = 0    # The number of correctly classified letters.

    # Iterates over each letter in the data set of letters, adds noise,
    # and tries to classify the noisy letter.
    for i in range(0,32):
    
        # Loading letter to add random noise.
        unknownLetter = copy.deepcopy(dSet[i,:])
        
        # Vector of coordinates to change.
        noise = random.sample(range(1,100), 5)

        if noiseType.lower() == '0':
            
            # Adds noise to the letter by changing coordinates corresponding to
            # the values in the noise vector to 0's.
            for k in noise:
                unknownLetter[k] = 0
        
        elif noiseType.lower() == '1':
            
            # Adds noise to the letter by changing coordinates corresponding to
            # the values in the noise vector to 1's.
            for k in noise:
                unknownLetter[k] = 1
                    
        elif noiseType.lower() == 'rand':
            
            # Adds noise to the letter by changing coordinates corresponding to
            # the values in the noise vector to 0 or 1, chosen randomly.
            for k in noise:
                unknownLetter[k] = random.randint(0,1)
                
        # Calls the ClassificationVector function to generate a classification
        # vector for the unknown letter.
        unknownVect = classificationVector(unknownLetter, 'single')

        # Calls the comparison function to determine the unknown character.
        prediction = comparison(dSetVect, unknownVect)
    
        # If the letter is accurately classified, the number of correct
        # classifications is incremented.
        if prediction == alph[i].upper():
            correct += 1
    
        # Resets the noise to be empty.
        noise = []

    # Returns the classification rate of the noisy letters as the proportion of
    # correctly classified letters to the total number of letters.
    classRate = correct/32
    return classRate

Testing the classification methods on the data set without added noise.

In [23]:
# Loads the letters data set and calculates the list of classification vectors for
# the letters.
letterSet = genfromtxt('../AMAT585/letters.csv', delimiter = ',')
letterSetVect = classificationVector(letterSet, 'whole')

# Calculates the classification rate for the data set without noise.
# This should be result in 100% classification.
print(noiseTests('none',letterSet, letterSetVect))

Classification Rate without noise: 
1.0


Testing the classification methods on the data set with added noise.

In [24]:
# Performing 100 tests of classifying the data set of letters with noise added.
# Then, the average classification rate is calculated. This is performed three
# times, once for each type of noise.

# Loads the letters data set and calculates the list of classification vectors for
# the letters.
letterSet = genfromtxt('../AMAT585/letters.csv', delimiter = ',')
letterSetVect = classificationVector(letterSet, 'whole')

classifVect0 = []       # Vector classification rates for added 0's noise.
classifVect1 = []       # Vector classification rates for added 1's noise.
classifVectRand = []    # Vector classification rates for randomly added 0's of 1's noise.

# Calls the noiseTest function 100 times to collect a large sample size
# of classification rates for each noise type.
for h in range(1,101):
    classifVect0.append(noiseTests('0', letterSet, letterSetVect))
    classifVect1.append(noiseTests('1',letterSet, letterSetVect))
    classifVectRand.append(noiseTests('rand',letterSet, letterSetVect))

# Displays the average classification rates for each noise type.
print('The average classification rate for 0-type noise added to the data set is: ')
print(sum(classifVect0)/100)
print()

print('The average classification rate for 1-type noise added to the data set is: ')
print(sum(classifVect1)/100)
print()

print('The average classification rate for random-type noise added to the data set is: ')
print(sum(classifVectRand)/100)

The average classification rate for 0-type noise added to the data set is: 
0.59125

The average classification rate for 1-type noise added to the data set is: 
0.1775

The average classification rate for random-type noise added to the data set is: 
0.291875
