In [20]:
import pandas as pd
import math

In [25]:
FORWARD_SELECTION, BACKWARD_ELIMINATION = 'forward_selection', 'backward_elimination'
K_FOLD = 10

In [26]:
def read_dataset(dataset_path):

    data = pd.read_csv(dataset_path, delim_whitespace=True, header = None)
    instance_count, column_count = data.shape
    feature_count = column_count - 1
    
    instances = data.values.tolist()
    
    return instances, instance_count, feature_count

In [None]:
def find_euclidean_distance(instance, compare_instance, features):
    
    squares = 0
    for feature_idx in range(len(features)):
        
        diff = instance[feature_idx] - compare_instance[feature_idx]
        squares += diff ** 2
    
    return math.sqrt()

In [None]:
def find_nearest_neighbor(dataset, feature_set, instance_count):
    
    correct_prediction = 0
    for instance_idx in range(instance_count):
        
        instance = dataset[instance_idx]
        target = instance[0]
        features = feature_set 
        
        nearest_neighbor_distance = math.inf
        nearest_neighbor_predict = -1
        
        for compare_idx in range(instance_count):
            
            compare_instance = dataset[compare_idx]
            compare_target = compare_instance[0]
            if compare_idx != instance_idx:
                distance = find_euclidean_distance(instance,
                                                  compare_instance,
                                                  features)
                
                if distance < nearest_neighbor_distance:
                        nearest_neighbor_distance = distance
                        nearest_neighbor_predict = compare_target
         
        if nearest_neighbor_predict == target:
            correct_prediction += 1
        
    return correct_prediction / instance_count
            

In [None]:
def leave_one_out_cross_validation(instances, instance_count, \
                                   current_feature_set, feature):
    
    
    fold_size = instance_count//K_FOLD
    accuracy_list = []
    for k_fold_itr in range(1, K_FOLD):
        
        ## dataset
        start = (k_fold_itr - 1) * fold_size
        end = start + fold_size
        
        dataset = instances[0:start] + instances[end: instance_count]
        
        ## features
        feature_set = current_feature_set + feature
        
        accuracy = find_nearest_neighbor(dataset, feature_set, fold_size)
        accuracy_list.append(accuracy)
    
    return sum(accuracy_list)/K_FOLD
        
        

In [27]:
def start_experiment(instances, instance_count, feature_count, search_type = FORWARD_SELECTION):
    
    print(search_type)
    current_feature_set = []
    
    for level in range(1, feature_count - 1):
        print('On the ', str(level), 'th level of the search tree')
        best_accuracy_feature = None
        best_accuracy = 0
        
        for feature in range(1, feature_count - 1):
            if feature not in current_feature_set:
                
                print('Considering add the ', str(feature), ' feature')
                accuracy = leave_one_out_cross_validation(\
                                instances, instance_count, \
                                current_feature_set, feature)
                
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_accuracy_feature = feature
        
        current_feature_set.append(best_accuracy_feature)
        print('On level ', str(level), 'addded feature ', \
                  best_accuracy_feature, 'to current set')
    

In [24]:
# driver function
if __name__ == "__main__":
    
    small_dataset_path = 'CS205_SP_2022_SMALLtestdata__35.txt'
    large_dataset_path = 'CS205_SP_2022_Largetestdata__62.txt'
    
    while True:

        input_case = input('Press 11 to run Forward Selection with small dataset\n' + \
                          'Press 12 to run Forward Selection with large dataset\n' + \
                          'Press 21 to run Backward Elimination with small dataset\n' + \
                          'Press 22 to run Backward Elimination with large dataset\n' + \
                          'Press any other key to exit\n').strip()
        
        if input_case == '11':
            print("Forward Selection selected with small dataset")
            instances, instance_count, feature_count = read_dataset(small_dataset_path)
            
            start_experiment(instances, instance_count, feature_count, FORWARD_SELECTION)
            
        elif input_case == '12':
            print("Forward Selection selected with large dataset")
            instances, instance_count, feature_count = read_dataset(large_dataset_path)
            
            start_experiment(instances, instance_count, feature_count, FORWARD_SELECTION)
            
        elif input_case == '21':
            print('Backward Elimination selected with small dataset')
            instances, instance_count, feature_count = read_dataset(small_dataset_path)
            
            start_experiment(instances, instance_count, feature_count, BACKWARD_ELIMINATION)
            
        elif input_case == '22':
            print('Backward Elimination selected with large dataset')
            
            instances, instance_count, feature_count = read_dataset(large_dataset_path)
            
            start_experiment(instances, instance_count, feature_count, BACKWARD_ELIMINATION)
        else:
            print('Exit. Thank you')
            break
            

Press 11 to run Forward Selection with small dataset
Press 12 to run Forward Selection with large dataset
Press 21 to run Backward Elimination with small dataset
Press 22 to run Backward Elimination with large dataset
Press any other key to exit
11
Forward Selection selected with small dataset
forward_selection
Press 11 to run Forward Selection with small dataset
Press 12 to run Forward Selection with large dataset
Press 21 to run Backward Elimination with small dataset
Press 22 to run Backward Elimination with large dataset
Press any other key to exit
12
Forward Selection selected with large dataset
forward_selection
Press 11 to run Forward Selection with small dataset
Press 12 to run Forward Selection with large dataset
Press 21 to run Backward Elimination with small dataset
Press 22 to run Backward Elimination with large dataset
Press any other key to exit
21
Backward Elimination selected with small dataset
backward_elimination
Press 11 to run Forward Selection with small dataset
Pr

KeyboardInterrupt: Interrupted by user