In [1]:
import Orange
from Orange.data import *
from Orange.regression import SVRLearner, TreeLearner, KNNRegressionLearner
from Orange.modelling import Fitter, SklFitter
from Orange.evaluation import CrossValidation

import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("data60.csv", low_memory=False)

# replace ? for NAN
df = df.replace('?', np.nan)

# Delete rows with missing values
df = df.dropna()
df = df.astype(float)

df = df.sample(n = 1000, random_state = 42)

target_columns = df.iloc[:,-33:].columns
y = df.loc[:, target_columns].astype(float).sum(axis=1)
df.drop(columns=target_columns, inplace=True)

In [3]:
data = Table("data_sum")
#data.domain.class_var = data.domain["sum"]

In [4]:
# from attributes/features to class_var/target
domain = Domain(data.domain.attributes[0:484], data.domain.attributes[485])
data = data.transform(domain)

In [5]:
# models
models = [Orange.regression.TreeLearner(max_depth = 5, min_samples_split = 20, min_samples_leaf = 10),
          Orange.regression.knn.KNNRegressionLearner(n_neighbors = 10, weights = "distance"),
          Orange.regression.svm.SVRLearner(kernel = 'rbf', epsilon = 0.01)]
          #Orange.regression.NNRegressionLearner(hidden_layer_sizes=(10,10), alpha = 0.01, max_iter = 100)]

In [None]:
repeats = 10
rez = {}

for i in range(repeats):
    train_data, test_data = Orange.evaluation.testing.sample(data, n=0.7)

    for j in models:
        model = j(train_data)
        results = Orange.evaluation.testing.TestOnTestData(train_data, test_data, [lambda testdata: model])
        score = Orange.evaluation.RMSE(results)
        
        col_name = f"Model_{j}"
        
        if col_name not in rez:
            rez[col_name] = []

        rez[col_name].append(score)
    

In [None]:
rezultati = pd.DataFrame(rez)

column_names = {rezultati.columns[0]: 'DesisionTreeRegressor_rmse',
                rezultati.columns[1]: 'SVR_rmse',
                rezultati.columns[2]: 'KNeighborsRegressor_rmse'}
                #rezultati.columns[3]: 'MLPRegressor_mse'}

rezultati = rezultati.rename(columns=column_names)
rezultati.to_csv('rezultati_orange.csv', index=False)
rezultati


In [15]:
rezultati = pd.read_csv("rezultati_orange.csv", header=0)
rezultati


Unnamed: 0,DesisionTreeRegressor_rmse,SVR_rmse,KNeighborsRegressor_rmse
0,0.651240,0.633560,0.472959
1,0.653405,0.651219,0.485171
2,0.644658,0.634417,0.477224
3,0.649949,0.630879,0.470769
4,0.639771,0.628698,0.474737
...,...,...,...
95,0.651547,0.658797,0.489820
96,0.654151,0.625387,0.472107
97,0.643795,0.629808,0.483937
98,0.649502,0.642989,0.483686


In [17]:
rezultati = rezultati[['DesisionTreeRegressor_rmse',
                                     'SVR_rmse',
                                     'KNeighborsRegressor_rmse']].agg(['mean', 'var']).round(7)

rezultati


Unnamed: 0,DesisionTreeRegressor_rmse,SVR_rmse,KNeighborsRegressor_rmse
mean,0.324437,0.319091,0.239935
var,0.210376,0.203548,0.115092


In [6]:
repeats = 2
rez_cv = {}

for i in range(repeats):

    for j in models:
        results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3, random_state = j) 
        score = Orange.evaluation.RMSE(results)
        
        col_name = f"Model_{j}"
        
        if col_name not in rez_cv:
            rez_cv[col_name] = []

        rez_cv[col_name].append(score)

construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)
construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)
construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)
construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)
construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)
construct an instance and call it
  results = Orange.evaluation.testing.CrossValidation(data, [j], k = 3)


KeyboardInterrupt: 

In [None]:
rezultati_cv = pd.DataFrame(rez_cv)

column_names = {rezultati_cv.columns[0]: 'DesisionTreeRegressor_rmse',
                rezultati_cv.columns[1]: 'SVR_rmse',
                rezultati_cv.columns[2]: 'KNeighborsRegressor_rmse'}
                #rezultati.columns[3]: 'MLPRegressor_mse'}

rezultati_cv = rezultati_cv.rename(columns=column_names)
rezultati_cv.to_csv('rezultati_cv_orange.csv', index=False)
rezultati_cv

In [None]:
rezultati_cv = pd.DataFrame(rezultati_cv)[['DesisionTreeRegressor_rmse',
                                           'SVR_rmse',
                                           'KNeighborsRegressor_rmse']].agg(['mean', 'var']).round(7)

rezultati_cv