## Data Generation for Testing VanillaMLP framework

In [1]:
import pandas as pd
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


### 1. Correctness Test

In [54]:
## Returns a dataset of (Y, X) where X is an n x (p + 1) matrix whose first columns is 1s. Each y_i = weights * X_i + b_i + error_i, where 
## all error_i follow i.i.d. N(0,1).

def generate_linear_data(n, p, weights, bias, error_sd=0.1):
    dataDict = {}
    x0 = np.ones(n)
    dataDict['y'] = np.zeros(n)
    dataDict['y'] += x0 * bias
    for i in range(1, p + 1):
        x_name = 'x' + str(i)
        x_values = np.random.normal(i, 0.5, n)
        dataDict['y'] += x_values * weights[i - 1]
        dataDict[x_name] = x_values
    errors = np.random.normal(0, error_sd, n)
    dataDict['y'] += errors
    df = pd.DataFrame(dataDict)
    return df

In [79]:
np.random.seed(123)
data_linear_small = generate_linear_data(35, 7, [1,3,34,23,15,17,8], 0, error_sd=0.0001)
data_linear_large = generate_linear_data(3500, 7, [1,3,34,23,15,17,8], 0, error_sd=0.0001)


In [80]:
data_linear_small.to_csv("data_linear_small.csv")
data_linear_large.to_csv("data_linear_large.csv")