In [11]:
# https://www.tensorflow.org/tutorials/keras/regression

from __future__ import absolute_import, division, print_function, unicode_literals

import pathlib

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
import os

In [None]:
# hyperparameters
minvolume = 500 # minimum required volume for a line to not be rejected (pre-filtered)
first_last_thresh = 30 # threshold for deciding if line is a rise-only or fall-only wave
scale = 100.0 # divide rising/falling idx by this number so it fits in fpga precision


In [2]:
column_names = ['fileId', 'row','rising_idx','falling_idx','volume','rising_weight',
                'falling_weight', 'first_val', 'last_val', 'delay']
def filterBad(dataset):
    initial_len = len(dataset)
    dataset = dataset[(dataset['rising_idx'] != 0)]
    dataset = dataset[(dataset['falling_idx'] != 0)]
    dataset = dataset[(dataset['volume'] > minvolume)]
#     dataset = dataset[(dataset['first_val'] < 30)]
#     dataset = dataset[(dataset['last_val'] < 30)]
    final_len = len(dataset)
    print('Rejected %d points (%f%%)' % ((initial_len-final_len), (initial_len-final_len)/initial_len))
    return dataset

raw_dataset =  pd.read_feather('../preprocessing/processed.feather')
raw_dataset = filterBad(raw_dataset)
# visualize_dataset = raw_dataset.sample(frac=0.01)
# sns.pairplot(visualize_dataset[column_names], diag_kind="kde")


dataset = raw_dataset.copy()
dataset.tail()

dataset = dataset.dropna()
dataset = dataset.drop(columns=['fileId', 'row'])

dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
dataset.tail()
train_dataset = dataset.sample(frac=0.8,random_state=0)

test_dataset = dataset.drop(train_dataset.index)
# sns.pairplot(train_dataset[["row","rising_idx", "falling_idx", "volume", "rising_weight", "falling_weight", "delay"]], diag_kind="kde")
train_stats = train_dataset.describe()
train_stats.pop("delay")
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('delay')
test_labels = test_dataset.pop('delay')





Rejected 1810263 points (0.166070%)


In [3]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

def train_model(x,y,degree):
  model = np.polyfit(x,y,degree)
  return model


def predict(pt, rising_model, falling_model):
    tyr = np.sum([m * math.pow(pt['rising_idx'] / scale,i) for i,m in enumerate(rising_model[::-1])])
    tyf = np.sum([m * math.pow(pt['falling_idx'] / scale,i) for i,m in enumerate(falling_model[::-1])])
    
    if (pt['first_val'] >= first_last_thresh): return tyf # only use falling
    elif (pt['last_val'] >= first_last_thresh): return tyr # only use rising
    return (tyr + tyf) / 2 # use both rising and falling


rising_dataset = train_dataset[(train_dataset['first_val'] < first_last_thresh)]
rising_labels = train_labels[(train_dataset['first_val'] < first_last_thresh)]
rising_model = train_model(rising_dataset['rising_idx'] / scale, rising_labels, 3)
print('val rising_model = Seq(%s)' % ','.join([str(x) + '.to[T]' for x in rising_model]))
falling_dataset = train_dataset[(train_dataset['last_val'] < first_last_thresh)]
falling_labels = train_labels[(train_dataset['last_val'] < first_last_thresh)]
falling_model = train_model(falling_dataset['falling_idx'] / scale, falling_labels, 3)
print('val falling_model = Seq(%s)' % ','.join([str(x) + '.to[T]' for x in falling_model]))


val rising_model = Seq(-1.3856189477301577.to[T],47.84993584308752.to[T],-977.8034355526304.to[T],1916.1962081617075.to[T])
val falling_model = Seq(-0.5955960893359528.to[T],35.71560251795508.to[T],-924.0859562063667.to[T],3244.657042078325.to[T])


In [None]:
import numpy.polynomial.polynomial as poly
ffit = poly.Polynomial(rising_model[::-1])    # instead of np.poly1d

# fig1 = plt.figure()                                                                                           
# ax1 = fig1.add_subplot(111)                                                                                   
# ax1.scatter(train_dataset['rising_idx'] / scale, train_labels)    
# s = np.array(sorted(train_dataset['rising_idx'] / scale)).astype(float)
# plt.plot(s, ffit(s))
# plt.show()

In [13]:
# loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)

preds = [predict(x[1], rising_model, falling_model) for x,y in zip(test_dataset.iterrows(), test_labels)]
rmse = mean_squared_error(test_labels, preds)
rmae = mean_absolute_error(test_labels, preds)
r2 = r2_score(test_labels, preds)
print("MSE, MAE, r2: %f,%f,%f" % (rmse, rmae, r2))


MSE: 10153.000136
MAE: 27.392674
r2: 0.992851


In [None]:
df2 = pd.DataFrame({"rising_idx":[157],
    "falling_idx":[341],
    "volume":[12039],
    "rising_weight":[35.2498],
    "falling_weight":[-28.1037], 
    "first_val":[12],
    "last_val":[0]})
print(predict(df2, rising_model, falling_model))
print('True label = %f' % 509.093)