# Inference on diagonals

The point of this notebook is to see how many terms "on a diagonal" (meaning series which all sum to the same value) the model needs to be trained on before it "learns" all series on the diagonal.

In [4]:
import pandas as pd
import numpy as np

import random

## Data generation

In [5]:
def _generate_sample_from_y(n_terms, n_digits, y):
    # Generates a sample which sums to y (used to uniformly distribute the sums)
    x = []
    while len(x) < n_terms - 1:
        # Don't allow it to pick a number causing sum(x) to exceed y, but also subject
        # to the restriction of n_digits.

        # Also, don't allow it to pick such a small number that it would be impossible
        # for the remaining terms to be chosen to sum to y (for example, if y = 150 and
        # n_terms = 2, n_digits = 2, we can't pick 49, or else you would need 101 to sum
        # to y.
        y_upper_bound = y - np.sum(x)
        n_digits_upper_bound = 10 ** n_digits - 1
        upper_bound = min([y_upper_bound, n_digits_upper_bound])
        lower_bound = (y - np.sum(x) - (10 ** n_digits - 1) * (n_terms - len(x) - 1))
        lower_bound = max([0, lower_bound])

        if upper_bound > 0:
            x.append(np.random.randint(lower_bound, upper_bound + 1))
        else:
            x.append(0)
    x.append(y - np.sum(x))
    random.shuffle(x)

    return x, y

In [6]:
_generate_sample_from_y(3, 2, 100)

([7, 36, 57], 100)

In [None]:
def all_series_on_diagonal(n_terms, n_digits, y):
    
    X = np.zeros(shape=())