# Importing Libraries
Import the necessary Python libraries for data manipulation, statistical analysis, and plotting. These include Pandas for data handling, Matplotlib for plotting, NumPy for numerical operations, and SciPy for statistical functions.


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, pareto
from scipy.special import erf

# Data Generation Functions
Define functions to generate and manipulate distributions:
- `get_clipped_lognormal`: Generates a clipped lognormal distribution.
- `get_n`: Divides data into specified quantiles and counts the number of observations in each quantile.
- `generate_pareto_samples`: Produces samples from a Pareto distribution and returns their logarithm.
These functions are essential for simulating different economic data distributions used in the analysis.


In [3]:

def get_clipped_lognormal(mu, sigma, lower_bound, sample_size):
    """
    Generate a sample of lognormal distribution clipped at a lower bound.

    :param mu: Mean of the lognormal distribution
    :param sigma: Standard deviation of the lognormal distribution
    :param lower_bound: The threshold to clip the distribution
    :param sample_size: The desired size of the sample
    :return: Array of samples from the clipped lognormal distribution
    """
    # Calculate the cumulative threshold to adjust the sample size
    z = (mu - lower_bound) / sigma
    cumulative_threshold = 1 - .5 * (1 + erf(z / np.sqrt(2)))

    # Adjust sample size to account for the clipping
    adjusted_sample_size = int(round(sample_size / (1 - cumulative_threshold)))
    samples = np.random.normal(mu, sigma, adjusted_sample_size)

    # Clip the distribution and return the required number of samples
    clipped_samples = np.sort(samples)[-sample_size:]
    return clipped_samples


def get_n(x, num_quantiles):
    """
    Divide the array x into num_quantiles quantiles and return the bins and counts in each bin.

    :param x: Array of values to be divided into quantiles.
    :param num_quantiles: Number of quantiles to divide x into.
    :return: Tuple (bins, counts) where bins is the Series of quantile bins and counts is the array of counts in each bin.
    """
    cumulative_sum = np.cumsum(np.power(10, x))
    bins = pd.cut(pd.Series(cumulative_sum), num_quantiles)
    counts = bins.value_counts().values
    return bins, counts

def generate_pareto_samples(n, shape_param, scale_param):
    """
    Generate Pareto-distributed samples and return their logarithm.

    :param n: Number of samples to generate.
    :param shape_param: Shape parameter (b) for the Pareto distribution.
    :param scale_param: Scale parameter (scale) for the Pareto distribution.
    :return: Array of log-transformed Pareto samples.
    """
    pareto_samples = pareto.rvs(b=-shape_param, size=n, scale=10**scale_param) + 1
    return np.log10(pareto_samples)


# Setting Up Distribution Parameters and Generating Data
- Define parameters for a lognormal distribution (`sigma`, `mu`) and a clipping threshold (`lower_bound`).
- Generate a sample of a lognormal distribution clipped at `lower_bound`.
- Use the `get_n` function to divide this distribution into quantiles and calculate the counts in each quantile.
- Prepare for generating a Pareto distribution by setting its parameters (`z_0`, `value_qs_1`).
- Generate Pareto distribution samples and store them for analysis.


In [4]:

# Parameters for the clipped lognormal distribution
sigma = 1.2810
mu = 4.53690
lower_bound = 3.0
sample_size = int(1e5)

# Generating the clipped lognormal distribution
x_logn_clip3 = get_clipped_lognormal(mu, sigma, lower_bound, sample_size)

# Quantiles for the clipped lognormal distribution
bins, partition_ns = get_n(x_logn_clip3, 10)
N_tail = partition_ns[1:].sum()

# Generate samples for the clipped lognormal distribution with 90% tail
x_logn_clip3_90 = x_logn_clip3[-N_tail:]

# Parameters for the Pareto distribution
z_0 = -1.10420
value_qs_1 = 6.67465
norm_factor = 1.375

# Generating Pareto distribution samples
x_pareto = generate_pareto_samples(N_tail, z_0, value_qs_1)

# Analyzing and Storing Experiment Results
- Convert the accumulated results from the experiments into a Pandas DataFrame for easier analysis and visualization.
- The DataFrame `experiment_data` contains detailed results for each experiment iteration, including distribution type, size distribution, shock intensity, and calculated metrics.
- Print a completion message to signify the end of the experiments.


In [5]:
import numpy as np
import pandas as pd

# Experiment parameters
ss = np.arange(0.1, 0.8, 0.2)
num_repetitions = 100
num_years = 17
num_quantiles = 10
distributions = ['norm', 'lapl']
size_distributions = [x_logn_clip3, x_logn_clip3_90, x_pareto]
size_dist_names = ['Logn', 'Logn90', 'Pareto']

# Prepare for results
experiment_results = []

# Run experiments for each distribution and size distribution
for dist in distributions:
    for size_dist_index, size_dist in enumerate(size_distributions):
        print('Running experiments for', size_dist_names[size_dist_index])
        bins, partition_counts = get_n(size_dist, num_quantiles)
        for quantile_index, quantile in enumerate(bins.unique()):
            firm_sizes = pd.Series(size_dist).loc[bins == quantile].values
            num_firms = partition_counts[quantile_index]
            for s in ss:
                for repetition in range(num_repetitions):
                    # Generate shocks based on distribution
                    if dist == 'norm':
                        shocks = np.random.normal(0, s, (num_firms, num_years))
                    elif dist == 'lapl':
                        shocks = np.random.laplace(0, s/np.sqrt(2), (num_firms, num_years))

                    # Calculate log of aggregated ratios
                    aggregated_ratios = np.log10(np.power(10, firm_sizes[:, None] + shocks).sum(0) /
                                                 np.power(10, firm_sizes[:, None]).sum(0))

                    # Store results
                    experiment_results.append([dist, size_dist_names[size_dist_index], s, num_firms, 
                                               repetition, aggregated_ratios.mean(), aggregated_ratios.std()])

# Convert results to DataFrame
experiment_data = pd.DataFrame(experiment_results, 
                               columns=['Distribution', 'Size_Dist', 'Shock_Intensity', 
                                        'Num_Firms', 'Repetition', 'Mean_Aggregated_Ratio', 'Std_Aggregated_Ratio'])

print("Experiments completed.")