In [1]:
from functions import *

import os
import pandas as pd
import numpy as np

settings = read_settings()

In [2]:
def create_random_dataframe_with_params(n_rows, n_cols, params, seed=None):
    """
    Create a DataFrame with random values and an additional binary target column based on the sum of products of values and parameters.
    
    Parameters:
    - n_rows: int, number of rows in the DataFrame
    - n_cols: int, number of columns in the DataFrame
    - params: list or array-like, parameters for each column
    - seed: int, random seed for reproducibility (default is None)
    
    Returns:
    - DataFrame with shape (n_rows, n_cols+1) where the last column is a binary target based on the sum of products.
    """
    if seed is not None:
        np.random.seed(seed)
        
    if len(params) != n_cols:
        raise ValueError("The length of params must be equal to the number of columns (n_cols).")
    
    data = np.random.rand(n_rows, n_cols)
    df = pd.DataFrame(data, columns=[f'col_{i+1}' for i in range(n_cols)])
    
    # Calculate the sum_product column
    df['sum_product'] = np.dot(df.values, params) + 0.5
    
    # Calculate the target column
    df['target'] = (np.random.rand(n_rows) < df['sum_product']).astype(int)
    
    # Drop the sum_product column
    df = df.drop(columns=['sum_product'])

    # Move target column to the first position
    columns = ['target'] + [col for col in df.columns if col != 'target']
    df = df[columns]    
    
    return df

params = [-1, -1, -0.5, 0, 0, 0.5, 1, 1]
df = create_random_dataframe_with_params(n_rows = 100000, n_cols = 8, params = params, seed = 42)

In [3]:
s3_key = f"{settings['project_path_s3']}/data/raw/data.csv"
save_df_to_s3(df = df, bucket_name = settings['bucket_name'], s3_key = s3_key, decimal_places = 5)

In [4]:
!aws s3api get-object --bucket {settings['bucket_name']} --key {s3_key} /dev/stdout | head -n 5

target,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8
0,0.37454,0.95071,0.73199,0.59866,0.15602,0.15599,0.05808,0.86618
0,0.60112,0.70807,0.02058,0.96991,0.83244,0.21234,0.18182,0.18340
0,0.30424,0.52476,0.43195,0.29123,0.61185,0.13949,0.29214,0.36636
0,0.45607,0.78518,0.19967,0.51423,0.59241,0.04645,0.60754,0.17052

[Errno 32] Broken pipe
