In [20]:
# Import all the necessary packages

import numpy as np
import pandas as pd
import scipy
import uci_dataset as data
import random
random.seed(10)

# The results will depend on the random train-test split, so we average the accuracies over a certain number of repetitions
n_rep = 40

import wittgenstein2 as lw
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from datetime import datetime

In [None]:
# Define a dictionary with the datasets we want to consider when experimenting
# dfs = ...

In [31]:
import numpy as np
import time
from Helper import LearningCurvePlot, smooth


def average_over_repetitions(smoothing_window, n_repetitions, df, W, class_feat, pos_class, name_test):
    acc = np.empty([n_repetitions]) # accuracies array
    now = datetime.now()
    
    for i in range(n_repetitions):
        X_train, X_test = train_test_split(df, test_size = 0.2)
        ripper_clf = lw.RIPPER(k=2, verbosity = 1, W = W)
        ripper_clf.fit(X_train, class_feat = class_feat, pos_class = pos_class)
        y_test = X_test[pos_class]
        score = ripper_clf.score(X_test, y_test)
        acc += [score]
        
    # Save results to a text file
    np.savetxt('Results/' + name_test + '_W' + str(W) + '_time' + now.strftime("%d-%m-%Y_%H.%M.%S"),acc,delimiter = ',')
    
    print('Running one setting takes {} minutes'.format((time.time()-now)/60))
    learning_curve = np.mean(acc,axis=0) # average over repetitions
    learning_curve = smooth(learning_curve,smoothing_window) # additional smoothing
    return learning_curve, acc

def experiment(n_repetitions, DFs, Ws = np.arange(0,1,0.01)):
    ####### Settings
    # Experiment    
    # Here DFs should be a dataframe with columns:
    #        data: containing the keys to a dictionary of dataframes
    #        class_feat: containing the class feature for each dataset
    #        pos_class: containing the name of the positive class
    smoothing_window = 24
    
    # Plotting parameters
    plot = True
    
    ####### Experiments
    
    #Plot1 = LearningCurvePlot(title = labels[problem] + ' problem solved with ' + labels[method])
    
    for name_test in DFs:
        Plot1 = LearningCurvePlot(title = name_test)
        for W in Ws:
            learning_curve = average_over_repetitions(smoothing_window, n_repetitions, dfs[name_test], W,
                                                            class_feat, pos_class, name_test)
            Plot1.add_curve(learning_curve,label = name_test + str(W))
    Plot1.save(name_test + '_' + labels[problem] + '.png')