In [1]:
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.preprocessing import StandardScaler

from joblib import dump, load
import pandas as pd
import numpy as np

from psyke import Extractor, Clustering, Target
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy
from psyke.extraction.hypercubic import Grid, FeatureRanker
from psyke.utils.logic import pretty_theory

In [2]:
def getTrainTest(data, testB):
    b = bartels[bartels.n==testB]
    t0, t1 = b.t0.values[0], b.t1.values[0]
    idx = (data.index >= t0) & (data.index < t1)
    return data[~idx], data[idx]

def getScaler(train, name):
    scaler = StandardScaler().fit(train)
    dump(scaler, f"scalers/scalerV{name}.joblib")
    normalization = {key: (m, s) for key, m, s in zip(train.columns, scaler.mean_, scaler.scale_)}
    return scaler, pd.DataFrame(scaler.transform(train), columns=train.columns), normalization

In [3]:
def gridex(model, train, test, normalization):
    ranked = FeatureRanker(train.columns).fit(model, train.iloc[:, :-1]).rankings()
    gridEx = Extractor.gridex(model, Grid(1, AdaptiveStrategy(ranked, [(0.6, 3), (0.75, 4)])),
                              threshold=5, min_examples=1, normalization=normalization)
    gridEx.extract(train)
    return gridEx.brute_predict(test), gridEx.n_rules, sum([p is None for p in gridEx.predict(test)])
    
def gridrex(model, train, test, normalization):
    ranked = FeatureRanker(train.columns).fit(model, train.iloc[:, :-1]).rankings()
    gridREx = Extractor.gridrex(model, Grid(1, AdaptiveStrategy(ranked, [(0.5, 3)])),
                                threshold=5, min_examples=1, normalization=normalization)
    gridREx.extract(train)
    return gridREx.brute_predict(test), gridREx.n_rules, sum([p is None for p in gridREx.predict(test)])

def cart(model, train, test, normalization):
    CART = Extractor.cart(model, max_depth=5, max_leaves=7, normalization=normalization)
    CART.extract(train)
    return CART.predict(test), CART.n_rules, sum([p is None for p in CART.predict(test)])

def cosmik(model, train, test, normalization):
    COSMiK = Extractor.cosmik(model, max_components=10, k=100, patience=10, close_to_center=True,
                              output=Target.CONSTANT, normalization=normalization)
    COSMiK.extract(train)
    return COSMiK.brute_predict(test), COSMiK.n_rules, sum([p is None for p in COSMiK.predict(test)])

def creepy(model, train, test, normalization):
    CReEPy = Extractor.creepy(model, clustering=Clustering.cream, depth=5, error_threshold=5, gauss_components=10,
                              output=Target.REGRESSION, normalization=normalization)
    CReEPy.extract(train)
    return CReEPy.brute_predict(test), CReEPy.n_rules, sum([p is None for p in CReEPy.predict(test)])

In [4]:
bartels = pd.read_csv("data/bartels.csv", parse_dates = [1, 2])

extractors = ['GridEx', 'GridREx', 'CART', 'COSMiK', 'CReEPy']

TESTB = [i for i in range(2491, 2509)]

predicted = {name: [] for name in ['index', 'V', 'model'] + extractors}

rules = {name: [] for name in ['BR'] + extractors}

missed = {name: [] for name in ['BR'] + extractors}

for testB in TESTB:
    rules['BR'].append(testB)
    missed['BR'].append(testB)
    print(testB)

    data = pd.read_csv(f'data/halffuzzycoefs2B.csv', parse_dates=[0], index_col=0)
    train, test = getTrainTest(data, testB)

    predicted['index'] += list(test.index.values)
    predicted['V'] += list(test.V.values)

    scaler, scaledTrain, normalization = getScaler(train, f"test{testB}")
    scaledTest = pd.DataFrame(scaler.transform(test), columns=test.columns).iloc[:, :-1]
    m, s = normalization[test.columns[-1]]

    model = KNN(200, weights='distance', p=1).fit(scaledTrain.iloc[:, :-1], scaledTrain.iloc[:, -1])
    #dump(model, f"models/RF/{k}_{name}_{testB}.joblib")
    predicted['model'] += list(model.predict(scaledTest) * s + m)
    break
    for name, fun in zip(extractors, [gridex, gridrex, cart, cosmik, creepy]):
        print(name)
        #if name in ['GridREx', 'CART', 'COSMiK']:
        #    continue
        pred, n, miss = fun(model, scaledTrain, scaledTest, normalization)
        predicted[name] += list(pred)
        rules[name].append(n)
        missed[name].append(miss)
    break

2491


In [19]:
ranked = FeatureRanker(train.columns).fit(model, train.iloc[:, :-1]).rankings()
CART = Extractor.gridex(model, Grid(1, AdaptiveStrategy(ranked, [(0.6, 1)])),
                              threshold=5, min_examples=1, normalization=normalization)
CART.extract(scaledTrain)
p = CART.predict(scaledTest)

KeyboardInterrupt: 

In [17]:
CART.normalization

In [15]:
np.array(p).shape

(648,)

In [16]:
model.predict(scaledTest)

array([ 5.94787596e-01,  6.27852580e-01,  6.53351288e-01,  6.86756656e-01,
        6.80554845e-01,  6.56268596e-01,  6.41509728e-01,  6.23215360e-01,
        5.06968206e-01,  4.49547691e-01,  4.50112948e-01,  3.89409986e-01,
        4.17987119e-01,  4.47363530e-01,  4.73589506e-01,  5.27920839e-01,
        5.65069947e-01,  6.30275078e-01,  6.50014692e-01,  6.25667786e-01,
        5.32084121e-01,  5.11315913e-01,  4.66818875e-01,  3.79488214e-01,
        3.49910991e-01,  2.21345403e-01,  1.93290286e-01,  1.29067767e-01,
        1.29278790e-01,  9.70285114e-02,  1.16471507e-01,  1.20058101e-01,
        1.10625154e-01,  9.43989669e-02,  4.62561519e-02,  1.54927422e-02,
       -3.67243793e-02, -9.73570606e-02, -8.82136957e-02, -8.59049356e-02,
       -1.88867848e-01, -1.98258070e-01, -2.14678123e-01, -2.25880942e-01,
       -2.73753227e-01, -2.86873033e-01, -3.36773813e-01, -3.41153580e-01,
       -2.72939657e-01, -2.60564106e-01, -2.22866189e-01, -3.16103250e-01,
       -4.57077669e-01, -

In [8]:
pd.DataFrame(predicted)

Unnamed: 0,index,V,model,GridEx,GridREx,CART,COSMiK,CReEPy
0,2016-03-04 00:00:00,410.0,518.750997,0.0,-0.381030,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
1,2016-03-04 01:00:00,400.0,522.352090,0.0,-0.407892,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
2,2016-03-04 02:00:00,395.0,525.129143,0.0,-0.433331,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
3,2016-03-04 03:00:00,408.0,528.767307,0.0,-0.461158,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
4,2016-03-04 04:00:00,406.0,528.091871,0.0,-0.477063,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
...,...,...,...,...,...,...,...,...
643,2016-03-30 19:00:00,497.0,487.521085,0.0,-0.294504,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
644,2016-03-30 20:00:00,501.0,489.614882,0.0,-0.278289,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
645,2016-03-30 21:00:00,518.0,490.609540,0.0,-0.267642,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."
646,2016-03-30 22:00:00,510.0,491.321955,0.0,-0.251590,"[441.86581920903956, 441.86581920903956, 441.8...","[340.3877551020408, 340.3877551020408, 340.387...","[464.19181456754364, 462.0928005366921, 459.64..."


In [None]:
sdsfgd

In [None]:
pd.DataFrame(predicted).to_csv("pred.csv")

In [None]:
pd.DataFrame(rules).to_csv('rules.csv')
pd.DataFrame(missed).to_csv('missed.csv')

In [None]:
p = pd.DataFrame(predicted)

In [None]:
abs(p.COSMiK - p.model).mean()

In [None]:
p.describe()