##### Notebook to generate a large sample of random points. The sample is confined within the limits ra=[0, 100] (degrees), dec=[-25, 25] and z=[0.01, 2]. The sample also has weights for each point. The weights are generated by taking values ​​from a normal distribution with mean=0.5 and variance=0.05. The final sample consists of 500 files. Each file contains 20 000 000 objects. The files are in parquet format.

In [9]:
import numpy as np
import pandas as pd

def uniform_sky(ralim, declim, zlim=False, n=1):
    """
    Generate n uniform random points (ra, dec) in a given piece of sky.
    
    Parameters:
    ralim (list or tuple): The range of right ascension (in degrees).
    declim (list or tuple): The range of declination (in degrees).
    n (int): The number of random points to generate (default is 1).
    
    Returns:
    tuple: Two arrays, one for right ascension and one for declination.
    """
    # Convert declination limits from degrees to radians, then to sine
    declim_rad = np.sin(np.radians(declim))
    # Generate n uniform random values for the sine of declination within the given limits
    declim_rad_temp = np.random.uniform(declim_rad[0], declim_rad[1], n)
    # Convert the sine values back to declination in degrees
    dec = np.degrees(np.arcsin(declim_rad_temp))
    # Generate n uniform random values for right ascension within the given limits
    ra = np.random.uniform(ralim[0], ralim[1], n)
    if zlim != False:
        z = np.random.uniform(zlim[0], zlim[1], n)
    mu, sigma = 0.5, 0.05
    gals_w = np.random.normal(mu, sigma, n)
    return ra, dec, z, gals_w

ralim  = [0,100] # 100
declim = [-25,25] # 50
zlim   = [0.01,2] # 2

for i in range(500):
    file_name = 'ocean-shared/bigrand_corrgi/p_files/big_random_part_' + str(i) + '.parquet'
    ra, dec, z, weight = uniform_sky(ralim, declim, zlim, n=20000000)
    combined_array = np.column_stack((ra, dec, z, weight))
    column_names = ['ra', 'dec', 'z', 'weight']
    df = pd.DataFrame(combined_array, columns=column_names)
    df.to_parquet(file_name, engine='pyarrow')
