In [226]:
import pickle
import pandas as pd
import ast
import tensorflow as tf
from sklearn import linear_model
from tensorflow import keras
from tensorflow.keras import Sequential, layers, callbacks
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
import numpy as np
import matplotlib.pyplot as plt
import polars as pl
from sklearn.model_selection import train_test_split

In [145]:
with open('time_data.pickle', 'rb') as f:
    raw = pickle.load(f)
contract_lookup = dict([(contract['id'], contract) for contract, series in raw])


In [146]:
d = dict([(contract['id'],float(contract['liquidity'])) for contract in contract_lookup.values() if float(contract['liquidity'])>= 100000])
interested_ids = {k: v for k, v in sorted(d.items(), key=lambda item: item[1])}
def in_range(v,l,h):
    if v<h and v>l:
        return True
    return False
y_ids = [key for key in interested_ids.keys() if in_range([float(value) for value in ast.literal_eval(contract_lookup[key]['outcomePrices'])][0], .1,.9)][0:5]
x_ids = [id for id in interested_ids.keys() if id not in y_ids]


In [147]:
data = [(contract['id'], series['t'], series['p']) for contract, series in raw if contract['id'] in interested_ids.keys()]

In [148]:
import polars as pl

# Collect all the data into a list of dictionaries
rows = []
for row in data:
    col_name = row[0]
    indices = row[1]
    values = row[2]
    for idx, val in zip(indices, values):
        rows.append({'index': idx, 'column': col_name, 'value': val})

# Create a DataFrame from the list of dictionaries
df = pl.DataFrame(rows)

# Pivot the DataFrame to get the desired format
df = df.pivot(
    values='value',
    index='index',
    on='column',
    aggregate_function='first'  # or 'sum', 'mean', etc., depending on your needs
)
df = df.fill_null(strategy="forward")

# display(df,df)


In [178]:
def cross_entropy(X, y, w):
  return np.mean(np.log(1 + np.exp(-y * np.dot(X, w))))


def logistic_reg(X, y, w_init, max_its, eta, grad_threshold, lam, reg):
  assert reg in [1,2]
  iteration = 0
  w = w_init
  grads = []
  while iteration < max_its:
    denom = 1 + np.exp(y * np.dot(X, w))
    temp = (y / denom)
    temp = (y / denom)[:, np.newaxis] * X
    grad = -np.sum(temp, axis=0) / len(X)
    v = grad
    iteration += 1
    grads.append(v)
    abs_grad = np.abs(grad)
    if reg == 1:
      w_new = w - eta * v
      w = np.sign(w_new) * np.maximum(0, np.abs(w_new) - eta * lam)

    elif reg == 2:
      w =(1-2 *eta * lam) * w - eta * v
    if max(abs_grad) < grad_threshold:
      break

  return w, iteration,cross_entropy(X,y,w),grads

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def data_splitting(X_in,y_col,data):
    X = data.select(X_in).to_numpy()
    y = data.select([y_col]).to_numpy()
    y = y[~np.isnan(y)]
    X = X[len(X) - len(y):]
    y = y.reshape(len(X))
    return X,y

def logistic_alpha_models(x_cols, y_cols, data):
    dummy = data.drop('index')
    weight_columns = dummy.columns
    weights = []
    test_error = []
    for col in y_cols:
        X,y = data_splitting(x_cols, col , dummy)
        x_train, x_test = train_test_split(X)
        y_train, y_test = train_test_split(y)
        w, iteration,error,grads = logistic_reg(x_train, y_train, np.zeros(len(train_x[0])), 10**4, .1, 10**-4, .05, 1)
        Eout = cross_entropy(x_test, y_test,w)
        weights.append(w)
        test_error.append(Eout)
    return weight_columns, weights, test_error
model1 = logistic_alpha_models(x_ids, y_ids, df)

In [224]:
def toRNNdata(x,y):
    rx_train =[]
    ry_train = []
    steps = 20
    for i in range(steps, x.shape[0]-steps):
        rx_train.append(x[i-steps:i, :])
        ry_train.append(y[i])
    rx_train, ry_train = np.array(rx_train), np.array(ry_train)
    return rx_train,ry_train
def LSTMGRU(x_train,y_train):
    rx_train,ry_train = toRNNdata(x_train,y_train)

    epochs = 30
    model = Sequential()
    model.add(LSTM(units=50, return_sequences = True, input_shape = (rx_train.shape[1],rx_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(GRU(units=50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(GRU(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(loss="mse", optimizer="adam")
    model.fit(rx_train, y_train[40:], batch_size = 32, epochs = epochs,verbose=False)
    return model

def RNN_alpha_models(x_cols,y_cols,data):
    dummy = data.drop('index')
    weight_columns = dummy.columns
    weights = []
    test_error = []
    
    output = []
    
    rnns = []
    results = []
    for col in y_cols:
        X,y = data_splitting(x_cols, col , dummy)
        x_train, x_test = train_test_split(X)
        y_train, y_test = train_test_split(y)
        rnn = LSTMGRU(x_train,y_train)
        rx_test,ry_test = toRNNdata(x_test,y_test)
        results = rnn.evaluate(rx_test, ry_test, batch_size=32, verbose = False)
        rnns.append((rnn,results))
    return rnns
model1 = RNN_alpha_models(x_ids, y_ids, df)

In [243]:
def Linear(x,y):
    clf = linear_model.LinearRegression()
    clf.fit(x,y)
    return clf

def Linear_alpha_models(x_cols,y_cols,data):
    dummy = data.drop('index')
    weight_columns = dummy.columns
    weights = []
    test_error = []
    for col in y_cols:
        X,y = data_splitting(x_cols, col , dummy)
        x_train, x_test = train_test_split(X)
        y_train, y_test = train_test_split(y)
        lin_model = Linear(x_train,y_train)
        yhat = lin_model.predict(x_test)
        weights.append(lin_model)
        test_error.append((np.square(np.subtract(y_test,yhat)).mean()))
    return weight_columns, weights, test_error
model2 = Linear_alpha_models(x_ids, y_ids, df)

In [248]:
model2[1][3].coef_

array([ 1.09095723,  0.35192552, -0.49715675, -0.39085878,  0.17469247,
        0.72878678, -1.83564209, -0.12488592,  0.40590425, -0.04904839,
       -0.0559849 , -0.2919703 , -1.05754497, -0.62091484,  0.72814755,
        0.33833176, -0.51591335, -0.21025536,  0.49567783,  0.51382667,
       -0.11969304, -0.08539693,  0.12879632, -0.00930146,  0.09403661,
        0.03959986, -1.07982265,  0.74793248, -1.29828806,  0.48013153,
        0.37360764,  0.7491991 , -0.15392427,  0.27660875,  0.02005387,
       -0.12283886,  0.06068352,  0.11808976,  0.03536876,  0.00989839,
       -0.19870934,  0.06288139,  0.06107794,  0.1024381 , -0.10563867,
       -0.11041665, -0.04150815])