In [1]:
import pandas as pd
import numpy as np
import math
from tools import seq_to_num, acc_score

In [2]:
def create_system(sequence, order, start_index_a):
    '''
    :param sequence: list, where type(item)=int 
    :param order: recurrent relation order, int(min=2)
    :param start_index_a: int, form which index start
    :return: a,b (ax=b)
    '''
    # validation
    if len(sequence) < start_index_a + order + order + 1:
        # print("Impossible create system")
        return None
    # x3=ax0+bx1+c
    index_b = start_index_a + order
    a = list()
    b = [sequence[i] for i in range(index_b, index_b + order + 1)]
    for i in range(start_index_a, start_index_a + order + 1):
        a.append([sequence[item] for item in range(i, i + order)])
    a = np.array(a)
    z = np.ones((order + 1, 1))
    a = np.append(a, z, axis=1)
    b = np.array(b)
    return a, b


In [3]:
create_system([2, 4, 8, 16, 32], 1, 2)

(array([[ 8.,  1.],
        [16.,  1.]]), array([16, 32]))

In [60]:
def check_solution(sequence, solution, start=3):
    n = len(sequence)
    # -1 bcs free coef
    order = len(solution) - 1
    for i in range(start, n - len(solution) + 1):
        coefs, constant = solution[:-1], solution[-1]
        a_pred = sequence[i: i + order] @ coefs + constant
        if not np.isclose(a_pred, sequence[i + order]):
            return False
    return True

In [61]:
def check_k_order(sequence, order, start_index):
    """
    
    @return:
        solution: contains solution if system was solved, otherwise None
    """
    try:
        solution = np.linalg.solve(a, b)
    except (np.linalg.linalg.LinAlgError, IndexError):
        return None
    # check if solution satisfied all items in sequence
    check = check_solution(sequence, solution)
    if check:
        return solution
    else:
        return None

In [62]:
def predict_1(sequence, max_order, start_index=3, verbose=False):
    """
    Check given sequence relation to be linearly dependant
    on previous terms with some constant coefficients.
    """
    for order in range(1, max_order + 1):
        system = create_system(sequence, order, start_index)
        if system is None:  # no success now, exiting
            break
        else:
            try:
                a, b = system
                solution = np.linalg.solve(a, b)
            except (np.linalg.linalg.LinAlgError, IndexError):
                continue
        if check_solution(sequence, solution, start_index):
            # order satisfied
            if verbose:
                print(f"{order}-th RR found. Coefficients: {solution[:3]}...")
            coefs, constant = solution[:-1], solution[-1]
            return sequence[-order:] @ coefs + constant
    return np.nan

In [63]:
predict_1([1, 1, 2, 3, 5, 8, 13, 21], 2, start_index=0)

34.0

In [64]:
predict_1([1, 1, 2, 3, 5], 5, start_index=0, verbose=True)

2-th RR found. Coefficients: [1. 1. 0.]...


8.0

In [99]:
# slice - залишити останні
def make_prediction(data, max_order=5, minlen=10, maxlen=15, start_index=3, verbose=False):
    predicted_values = []
    indices = []
    ind_iter = data.index if isinstance(data, (np.ndarray, pd.Series)) else list(range(len(data)))
    for ind, seq in zip(ind_iter, data):
        if len(seq) < minlen:
            continue
        sequence = seq[-maxlen:] if maxlen != -1 else seq
        pred_val = predict_1(sequence, max_order, start_index, verbose=verbose)
        if np.isnan(pred_val):
#             if verbose:
#                 print(f"{sequence[-5:]}... has no linear combination")
            continue
        predicted_values.append(np.round(pred_val))
        indices.append(ind)
    return predicted_values, indices

In [127]:
make_prediction([[1, 1, 2, 3, 5, 8, 13, 21]], start_index=0, minlen=5, verbose=True)

2-th RR found. Coefficients: [1. 1. 0.]...


([34.0], [0])

In [101]:
df_train = pd.read_csv('../data/train.csv', index_col=0)
train_X, train_y = seq_to_num(df_train.Sequence, pad=False)

In [131]:
pred, ind = make_prediction(train_X[:50000], start_index=0, minlen=20, maxlen=-1, verbose=False, max_order=3)

In [132]:
len(ind)

2134

In [133]:
acc_score(pred, train_y[ind])

0.9531396438612934