In [1]:
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.preprocessing import StandardScaler

from joblib import dump, load
import pandas as pd
import numpy as np

from psyke import Extractor, Clustering, Target
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy
from psyke.extraction.hypercubic import Grid, FeatureRanker
from psyke.utils.logic import pretty_theory

In [2]:
def getTrainTest(data, testB):
    b = bartels[bartels.n==testB]
    t0, t1 = b.t0.values[0], b.t1.values[0]
    idx = (data.index >= t0) & (data.index < t1)
    return data[~idx], data[idx]

def getScaler(train, name):
    scaler = StandardScaler().fit(train)
    dump(scaler, f"scalers/scalerV{name}.joblib")
    normalization = {key: (m, s) for key, m, s in zip(train.columns, scaler.mean_, scaler.scale_)}
    return scaler, pd.DataFrame(scaler.transform(train), columns=train.columns), normalization

In [3]:
def gridex(model, train, test, normalization, s, m):
    ranked = FeatureRanker(train.columns).fit(model, train.iloc[:, :-1]).rankings()
    gridEx = Extractor.gridex(model, Grid(1, AdaptiveStrategy(ranked, [(0.6, 3), (0.75, 4)])),
                              threshold=5, min_examples=1, normalization=normalization)
    gridEx.extract(train)
    return gridEx.brute_predict(test), gridEx.n_rules, sum([p is None for p in gridEx.predict(test)])
    
def gridrex(model, train, test, normalization, s, m):
    ranked = FeatureRanker(train.columns).fit(model, train.iloc[:, :-1]).rankings()
    gridREx = Extractor.gridrex(model, Grid(1, AdaptiveStrategy(ranked, [(0.5, 3)])),
                                threshold=5, min_examples=1, normalization=normalization)
    gridREx.extract(train)
    return gridREx.brute_predict(test), gridREx.n_rules, sum([p is None for p in gridREx.predict(test)])

def cart(model, train, test, normalization, s, m):
    CART = Extractor.cart(model, max_depth=5, max_leaves=7, normalization=normalization)
    CART.extract(train)
    return CART.predict(test) * s + m, CART.n_rules, sum([p is None for p in CART.predict(test)])

def cosmik(model, train, test, normalization, s, m):
    COSMiK = Extractor.cosmik(model, max_components=10, k=100, patience=10, close_to_center=True,
                              output=Target.CONSTANT, normalization=normalization)
    COSMiK.extract(train)
    return COSMiK.brute_predict(test), COSMiK.n_rules, sum([p is None for p in COSMiK.predict(test)])

def creepy(model, train, test, normalization, s, m):
    CReEPy = Extractor.creepy(model, clustering=Clustering.cream, depth=5, error_threshold=5, gauss_components=10,
                              output=Target.REGRESSION, normalization=normalization)
    CReEPy.extract(train)
    return CReEPy.brute_predict(test), CReEPy.n_rules, sum([p is None for p in CReEPy.predict(test)])

In [4]:
bartels = pd.read_csv("data/bartels.csv", parse_dates = [1, 2])

extractors = ['GridEx', 'GridREx', 'CART', 'COSMiK', 'CReEPy']

TESTB = [i for i in range(2491, 2509)]

predicted = {name: [] for name in ['index', 'V', 'model'] + extractors}

rules = {name: [] for name in ['BR'] + extractors}

missed = {name: [] for name in ['BR'] + extractors}

for testB in TESTB:
    rules['BR'].append(testB)
    missed['BR'].append(testB)
    print(testB)

    data = pd.read_csv(f'data/halffuzzycoefs2B.csv', parse_dates=[0], index_col=0)
    train, test = getTrainTest(data, testB)

    predicted['index'] += list(test.index.values)
    predicted['V'] += list(test.V.values)

    scaler, scaledTrain, normalization = getScaler(train, f"test{testB}")
    scaledTest = pd.DataFrame(scaler.transform(test), columns=test.columns).iloc[:, :-1]
    m, s = normalization[test.columns[-1]]

    model = KNN(200, weights='distance', p=1).fit(scaledTrain.iloc[:, :-1], scaledTrain.iloc[:, -1])
    #dump(model, f"models/RF/{k}_{name}_{testB}.joblib")
    predicted['model'] += list(model.predict(scaledTest) * s + m)

    for name, fun in zip(extractors, [gridex, gridrex, cart, cosmik, creepy]):
        print(name)
        #if name in ['GridREx', 'CART', 'COSMiK']:
        #    continue
        pred, n, miss = fun(model, scaledTrain, scaledTest, normalization, s, m)
        predicted[name] += list(pred)
        rules[name].append(n)
        missed[name].append(miss)
    break

2491
GridREx


AttributeError: 'NoneType' object has no attribute 'predict'

In [None]:
pd.DataFrame(predicted).to_csv("pred.csv")

In [None]:
pd.DataFrame(rules).to_csv('rules.csv')
pd.DataFrame(missed).to_csv('missed.csv')

In [None]:
p = pd.DataFrame(predicted)

In [None]:
abs(p.COSMiK - p.model).mean()

In [None]:
p.describe()