In [2]:
import os
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import preprocessing 
import math
import warnings
import operator

'''
My Own Algorithms Import
'''
from preprocessing.scaler import MinMaxScaler, StandardScaler
from preprocessing.pca import PCA
from analysis.measure import ConfusionMatrix, ROC

#Ignore Warnings
warnings.filterwarnings("ignore")

#read data
golf_data = pd.read_csv('data-sets/golf-weather.csv')
interview_data = pd.read_excel('data-sets/interview.xlsx')

#randomize data with given seed
golf_data = golf_data.reindex(np.random.RandomState(seed=42).permutation(golf_data.index))
interview_data = interview_data.reindex(np.random.RandomState(seed=42).permutation(interview_data.index))

ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 1.0.0 for Excel support Use pip or conda to install xlrd.

# Naive Bayes Classifier with Golf-Weather Data

# Analyze the Data

In [None]:
golf_data.info()

In [None]:
golf_data.head()

## Manipulate Data

In [None]:
golf_data['Windy'] = [False if w=="f" else True for w in golf_data['Windy']]

## Set Up Data

In [None]:
seventy_five__percentile = int(len(golf_data)*0.75) 
x_train, y_train = golf_data.drop(['Play'], axis=1)[:seventy_five__percentile], golf_data['Play'][:seventy_five__percentile]
x_test, y_test = golf_data.drop(['Play'], axis=1)[seventy_five__percentile:], golf_data['Play'][seventy_five__percentile:]
x_train.reset_index(drop=True, inplace=True)
x_test.reset_index(drop=True, inplace=True)

## Naive Bayes Classifier

In [None]:
class NaiveBayes():
        
    def fit(self, x_train, results):
        self.results = results
        self.results_unique = np.unique(results)
        self.results_probs = ["P(" + prob + ")" for prob in self.results_unique]
        self.feature_tables = {}
        for column in x_train.columns:
            all_values = []
            for val in x_train[column]:
                if (val not in all_values):
                    all_values.append(val)
            feature_df = pd.DataFrame(0, columns = self.results_unique, index = all_values)
            self.feature_tables[column] = feature_df
        for i in range(len(x_train)):
                row = x_train.iloc[i, :]
                classification = results[i]
                for feature in row.keys():
                    self.feature_tables[feature].ix[row[feature]][results[i]]+=1
                
        for k, df in self.feature_tables.items():
            df['Total'] = 0
            for row_index in df.index:
                df.loc[row_index, 'Total'] = np.sum(df.loc[row_index].values)
                
        for k, df in self.feature_tables.items():
            for column in df.columns:
                prob_col_title = "P(" + column + ")"
                df[prob_col_title] = df[column]/np.sum(df[column].values)
            
        for k, df in self.feature_tables.items():
            df.ix['Total'] = 0
            for column in df.columns:
                df.loc['Total', column] = np.sum(df[column].values)
                
    def predict(self, x_test):
        predictions = []
        for i in range(len(x_test)):
            sample = x_test.iloc[i, :]
            predictions.append(self.predict_sample(sample))
        return predictions
            
    def predict_sample(self, sample):
        probabilities = {}
        for column in self.results_probs:
            probs_given_result = 1
            feature_prob = 1
            for weather_condition in sample.index:
                probs_given_result *= self.feature_tables[weather_condition].ix[sample[weather_condition], column]
                feature_prob *= self.feature_tables[weather_condition].ix[sample[weather_condition], 'P(Total)']
            stripped_column = column[2:-1]
            class_prob = np.count_nonzero(self.results == stripped_column)/len(self.results)
            probabilities[stripped_column] = (probs_given_result*class_prob)/feature_prob
        return max(probabilities, key=probabilities.get)

In [None]:
naive_bayes = NaiveBayes()
naive_bayes.fit(x_train, y_train.values)
predictions = naive_bayes.predict(x_test)
print(predictions)
print(y_test)

# Naive Bayes Classifier with Interview Attendance Data

## Analyze the Data

In [None]:
interview_data.info()

In [None]:
interview_data.head()