In [18]:
import numpy as np
import math

#this method counts all of the unique features and classes then stores them in a 
#dictionary with the feature or class as the key and their counts as its value
#the method then returns the dictionary
def unique(data):
    uniqueFeature, counts = np.unique(data, return_counts=True)
    featureDictionary = dict(zip(uniqueFeature, counts))
    return featureDictionary

#this method uses the given data and target to generate a predictive model for each feature of each class
#the probabilities of each feature given a class is then stored in a dictionary
#the method then returns the dictionary
def trainData(data,target):
    featureDictionary = unique(data)
    classDictionary = unique(target)
    dictionary = {}
    combinedData = combineDataAndTarget(data,target)
    for featureKey in featureDictionary:
        for classKey in classDictionary:
            targetCount = 0
            for sample in combinedData:
                if((featureKey in sample) and (classKey in sample)):
                        targetCount+=1          
            targetCount /= classDictionary.get(classKey)
            dictionary[(featureKey + '/' + classKey)] = targetCount
    
    for classKey in classDictionary:
        count = 0
        for item in target:
            if item == classKey:
                count+=1
        dictionary[classKey] = count/len(target)
    return dictionary  

#this method combines the data array with the target array to reshape
#them both into one array for clarity
def combineDataAndTarget(data,target):
    newArray = np.array([])        
    for i in range(len(data)):
        sample = np.array([])
        sample = np.append(data[i],target[i])
        newArray = np.append(newArray,sample)
    shape = np.shape(data)
    newArray = newArray.reshape((shape[0],shape[1]+1))
    return newArray

#this method combines the data and target to shuffle them then 
#it seperates them into two different arrays
#this method returns the shuffled data and target 
def shuffleArray(data,target):
    newArray = np.array([])        
    for i in range(len(data)):
        sample = np.array([])
        sample = np.append(data[i],target[i])
        newArray = np.append(newArray,sample)
    shape = np.shape(data)
    newArray = newArray.reshape((shape[0],shape[1]+1))
    np.random.shuffle(newArray)
    newData = np.delete(newArray,shape[1],1)
    newTarget = newArray[:,shape[1]:] 
    newTarget = newTarget.flatten()
    return newData,newTarget

#this uses the information given from the predictive model to 
#predict the class of all samples
#it returns all of the samples in the array with their labeled class
def predict(data,testData,probabilityDictionary):
    newArray = np.array([])
    for sample in testData:
        sampleArray = predictSample(data,sample,probabilityDictionary)
        newArray = np.append(newArray, sampleArray, 0)
    arrayShape = np.shape(testData)
    newArray = newArray.reshape((arrayShape[0],arrayShape[1]+1))
    return newArray

#this uses the information given from the predictive model to 
#predict the class of one sample 
def predictSample(data,sample,probabilityDictionary):
    uniqueDictionary = unique(data)
    featuresProbability = probabilityOfAllFeatures(data,sample)
    newArray = np.array([])
    uniqueClassList = getTargets(probabilityDictionary)
    dictionary = { }
    for item in uniqueClassList:
        classProbability=1
        for feature in sample:
            num = 0
            if (feature+'/'+item) not in probabilityDictionary:
                continue
            else:
                num = probabilityDictionary[feature+'/'+item]
            classProbability *= num
        classProbability *= probabilityDictionary[item]
        classProbability /= featuresProbability
        dictionary[item] = classProbability
    classLabel = ''
    for key in dictionary:
        if dictionary[key] == max(dictionary.values()):
            if len(classLabel) > 0:
                continue
            else:
                classLabel += key
    newArray = np.append(sample, classLabel)
    return newArray

#gets the target from the probability dictionary and
#returns it as a list
def getTargets(probabilityDictionary):
    classList = [ ]
    for key in probabilityDictionary:
        if '/' in key:
            continue
        else:
            classList.append(key)
    return classList

#returns the probabilty of all features occuring from the sample
def probabilityOfAllFeatures(data,sample):
    featureDictionary = unique(data)
    probability = 1  
    for feature in sample:
        num = 0
        if feature not in featureDictionary:
            continue
        else:
            num = featureDictionary[feature]
        probability *= (num/len(data))
    return probability

#splits the data to be trained and tested based on the percentage the
#user entered
def splitData(originalData,originalTarget,testDataPercentage):
    data,target = shuffleArray(originalData,originalTarget)
    split = len(data)-math.ceil(len(data)*testDataPercentage)
    if testDataPercentage == 1:
        train_data = data
        test_data = data
        target_train = target
        target_test = target
    elif split > 0.0 and split < len(data):
        train_data = data[:split,:]
        test_data = data[split:,:]
        target_train = target[:split]
        target_test = target[split:]
    else:
        print('You entered: ', testDataPercentage,'\nYou cannot enter a percentage less than or equal to 0 or more than 1. ')
        return None

    probabilityDictionary = trainData(train_data,target_train)
    print('Probability Table:\n',probabilityDictionary,'\n')
    newArray = predict(train_data,test_data,probabilityDictionary)
    predictedTarget = getPredictedTarget(newArray,probabilityDictionary)
    acc = accuracy(target_test,predictedTarget)
    print('train_data: \n',train_data)
    print('test_data: \n',test_data)
    print('target_train: \n',target_train)
    print('target_test: \n',target_test)
    print('predictedTarget: \n',predictedTarget)
    print('***With a test data percentage of '+ str((testDataPercentage*100)) + '% the accuracy is: ' + str(acc), '***\n')
    array = np.array([['Sunny','Cool','High','Strong']])
    newArray = predict(data,array,probabilityDictionary)
    print('\nAfter training the algorithm with the train data: ')
    print('***From the input ', array, '\nthe algorithm predicted ', newArray,'***\n')

#returns the predicted target by removing all features from each of the samples except the classes
def getPredictedTarget(data,probabilityDictionary):
    classList = getTargets(probabilityDictionary)
    newData = np.array([])
    newSample = np.array([])
    removeList = []
    for i in range(0,len(data[0])-1):
        removeList.append(i)
    for sample in data:
        newSample = np.delete(sample,removeList)
        newData = np.append(newData,newSample)
    return newData

#compares the original targets with the predicted target
#to find the percentage of targets that were correctly labeled
def accuracy(target_test,predictedTarget):
    length = len(target_test)
    count = 0.0
    for i in range(length):
        if target_test.item(i) == predictedTarget.item(i):
            count += 1.0
    return count/length


outlook = np.array(['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast',
                    'Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'])
temperature = np.array(['Hot','Hot','Hot','Mild','Cool','Cool','Cool',
                        'Mild','Cool','Mild','Mild','Mild','Hot','Mild'])
humidity = np.array(['High','High','High','High','Normal','Normal','Normal',
                     'High','Normal','Normal','Normal','High','Normal','High'])
wind = np.array(['Weak','Strong','Weak','Weak','Weak','Strong','Strong',
                 'Weak','Weak','Weak','Strong','Strong','Weak','Strong'])
playTennis = np.array(['No','No','Yes','Yes','Yes','No','Yes',
                       'No','Yes','Yes','Yes','Yes','Yes','No'])
data = np.stack((outlook,temperature,humidity,wind),1)

#print('Original Data\n',data)
#print('Original Classes\n',playTennis)
print('Original Data with Features and Class: \n',combineDataAndTarget(data,playTennis), '\n')
splitData(data,playTennis,.3)

Original Data with Features and Class: 
 [['Sunny' 'Hot' 'High' 'Weak' 'No']
 ['Sunny' 'Hot' 'High' 'Strong' 'No']
 ['Overcast' 'Hot' 'High' 'Weak' 'Yes']
 ['Rain' 'Mild' 'High' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'No']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'Yes']
 ['Sunny' 'Mild' 'High' 'Weak' 'No']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'Yes']
 ['Sunny' 'Mild' 'Normal' 'Strong' 'Yes']
 ['Overcast' 'Mild' 'High' 'Strong' 'Yes']
 ['Overcast' 'Hot' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Mild' 'High' 'Strong' 'No']] 

Probability Table:
 {'Cool/No': 0.25, 'Cool/Yes': 0.2, 'High/No': 0.75, 'High/Yes': 0.6, 'Hot/No': 0.5, 'Hot/Yes': 0.2, 'Mild/No': 0.25, 'Mild/Yes': 0.6, 'Normal/No': 0.25, 'Normal/Yes': 0.4, 'Overcast/No': 0.0, 'Overcast/Yes': 0.4, 'Rain/No': 0.5, 'Rain/Yes': 0.4, 'Strong/No': 0.75, 'Strong/Yes': 0.4, 'Sunny/No': 0.5, 'Sunny/Yes': 0.2, 'Weak/No': 0.25, 'Weak/Yes': 0.6, 'No': 0.4444444444444444, '