In [4]:
import openpyxl
import openpyxl.styles
import openpyxl.utils

import openpyxl.worksheet.formula
import os
import re

In [5]:
file = open("./continuousDistributions.ts", "r", encoding="utf-8")

# Patrón de expresión regular
patron = r"cdf: function\s*\((.*?)\)"

parameters_replacements = {}
parameters = {}
for linea in file.readlines():
    if len(linea) > 2:
        if linea[-2] == "{" and "function (" not in linea and "else" not in linea and "if (" not in linea and "const" not in linea:
            dist = linea.replace("\n", "").replace(" ", "").replace(":{", "").replace("\t", "")
    coincidencia = re.search(patron, linea)
    if coincidencia:
        argumentos = coincidencia.group(1).split(",")
        argumentos = [arg.strip() for arg in argumentos]
        argumentos = argumentos[1:]
        parameters[dist] = argumentos
        parameters_replacements[dist] = dict(zip([f"C{i+3}" for i in range(len(argumentos))], argumentos))

In [6]:
def obtener_maximo_valor_columna(workbook, num_hoja, letra_columna):
    sheet = workbook[workbook.sheetnames[num_hoja]]
    return max((c.row for c in sheet[letra_columna] if c.value is not None))

In [7]:
# =DISTR.BETA.N(x;a;b;VERDADERO)	                         jStat.ibeta(x,a,b)
# =INV.BETA.N(u;a;b)	                                     jStat.ibetainv(u,a,b)
# =EXP(GAMMA.LN(a)+GAMMA.LN(a)-GAMMA.LN(a+b))	             jStat.betafn(a,b)
# =DISTR.GAMMA.N(x;a;1;VERDADERO)	                         jStat.lowRegGamma(a,x)
# =INV.GAMMA(u;a;1)	                                     jStat.gammapinv(u, a)
# =GAMMA(x)	                                             jStat.gammafn(x)
# =DISTR.NORM.ESTAND.N(x;VERDADERO)	                     jStat.std_cdf(x)
# =DISTR.NORM.ESTAND.N(x;FALSO)	                         jStat.std_pdf(x)
# =INV.NORM.ESTAND(u)	                                     jStat.inv_std_cdf(u)
# =BESSELI(x;n)	                                         BESSEL.besseli(x,n)
# =COMBINAT(n;r)                                            nCr(n, r)

In [8]:
def formula_replace(formula: str, replacements: dict[str, str]):
    formula = str(formula)
    formula = formula.replace("$", "")
    replacements = {
        **replacements,
        "SQRT": "Math.sqrt",
        "BESSELI": "BESSEL.besseli",
        "_xlfn.GAMMA": "jStat.gammafn",
        "EXP": "Math.exp",
        "PI()": "Math.PI",
        "complicated": "undefined",
        "_xlfn.BETA.INV": "jStat.ibetainv",
        "=": "",
        "^": "**",
        "L4": "k",
        "L5": "k",
        "L6": "k",
        "L7": "k",
        "LN": "Math.log",
        "None": "undefined",
        "SINH": "Math.sinh",
        "COSH": "Math.cosh",
    }
    regex = re.compile("(%s)" % "|".join(map(re.escape, replacements.keys())))
    return regex.sub(lambda mo: replacements[mo.string[mo.start():mo.end()]], formula) 


In [9]:
def format1(
    distribution_parameters,
    distribution_name,
    mean_formula,
    variance_formula,
    standard_deviation_formula,
    skewness_formula,
    kurtosis_formula,
    median_formula,
    mode_formula,
    parameters_numbers,
    values,
):
    bl = "\n"
    x = f"""
jStat = require("../node_modules/jstat");

dists = {{
    {distribution_name}: {{
        measurements: {{
            nonCentralMoments: function (k, {distribution_parameters}) {{
                return undefined;
            }},
            centralMoments: function (k, {distribution_parameters}) {{
                return undefined;
            }},
            stats: {{
                mean: function ({distribution_parameters}) {{
                    return {mean_formula};
                }},
                variance: function ({distribution_parameters}) {{
                    return {variance_formula};
                }},
                standardDeviation: function ({distribution_parameters}) {{
                    return Math.sqrt(this.variance({distribution_parameters}));
                }},
                skewness: function ({distribution_parameters}) {{
                    return {skewness_formula};
                }},
                kurtosis: function ({distribution_parameters}) {{
                    return {kurtosis_formula};
                }},
                median: function ({distribution_parameters}) {{
                    return dists.{distribution_name}.measurements.ppf(0.5, {distribution_parameters});
                }},
                mode: function ({distribution_parameters}) {{
                    return {mode_formula};
                }},
            }},
        }}
    }}
}}
console.log(dists.{distribution_name}.measurements.stats.mean({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.variance({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.standardDeviation({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.skewness({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.kurtosis({', '.join(parameters_numbers)}))
// console.log(dists.{distribution_name}.measurements.stats.median({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.mode({', '.join(parameters_numbers)}))

{f"{bl}".join([f"// {k}: {v}" for k,v in values.items()])}
"""

    file = open(f"./outputs/{distribution_name}.js", "w", encoding="utf-8")
    file.write(x[1:-1])
    file.close()

In [10]:
def format2(
    distribution_parameters,
    distribution_name,
    nc_µ1_formula,
    mode_formula,
    parameters_numbers,
    values,
):
    mean_formula_correction = "µ1"
    if "loc" in distribution_parameters and "scale" in distribution_parameters:
        mean_formula_correction = "loc + scale * µ1"
    elif "loc" in distribution_parameters:
        mean_formula_correction = "loc + µ1"
    elif "scale" in distribution_parameters:
        mean_formula_correction = "scale * µ1"
    if "miu" in distribution_parameters and "sigma" in distribution_parameters:
        mean_formula_correction = "miu + sigma * µ1"
        

    variance_formula_correction = "µ2 - µ1 ** 2"
    if "scale" in distribution_parameters:
        variance_formula_correction = "(scale ** 2) * (µ2 - µ1 ** 2)"
    elif "sigma" in distribution_parameters:
        variance_formula_correction = "(sigma ** 2) * (µ2 - µ1 ** 2)"

    bl = "\n"
    x = f"""
jStat = require("../node_modules/jstat");

dists = {{
    {distribution_name}: {{
        measurements: {{
            nonCentralMoments: function (k, {distribution_parameters}) {{
                return {nc_µ1_formula}
            }},
            centralMoments: function (k, {distribution_parameters}) {{
                const µ1 = this.nonCentralMoments(1, {distribution_parameters});
                const µ2 = this.nonCentralMoments(2, {distribution_parameters});
                const µ3 = this.nonCentralMoments(3, {distribution_parameters});
                const µ4 = this.nonCentralMoments(4, {distribution_parameters});

                let result;
                switch (k) {{
                    case 1: result = 0; break;
                    case 2: result = µ2 - µ1 ** 2; break;
                    case 3: result = µ3 - 3 * µ1 * µ2 + 2 * µ1 ** 3; break;
                    case 4: result = µ4 - 4 * µ1 * µ3 + 6 * (µ1 ** 2) * µ2 - 3 * (µ1 ** 4); break;
                }};
                return result
            }},
            stats: {{
                mean: function ({distribution_parameters}) {{
                    const µ1 = dists.{distribution_name}.measurements.nonCentralMoments(1, {distribution_parameters});
                    return {mean_formula_correction};
                }},
                variance: function ({distribution_parameters}) {{
                    const µ1 = dists.{distribution_name}.measurements.nonCentralMoments(1, {distribution_parameters});
                    const µ2 = dists.{distribution_name}.measurements.nonCentralMoments(2, {distribution_parameters});
                    return {variance_formula_correction};
                }},
                standardDeviation: function ({distribution_parameters}) {{
                    return Math.sqrt(this.variance({distribution_parameters}));
                }},
                skewness: function ({distribution_parameters}) {{
                    const central_µ3 = dists.{distribution_name}.measurements.centralMoments(3, {distribution_parameters});
                    return central_µ3 / (this.standardDeviation({distribution_parameters}) ** 3);
                }},
                kurtosis: function ({distribution_parameters}) {{
                    const central_µ4 = dists.{distribution_name}.measurements.centralMoments(4, {distribution_parameters});
                    return central_µ4 / (this.standardDeviation ({distribution_parameters}) ** 4);
                }},
                median: function ({distribution_parameters}) {{
                    return dists.{distribution_name}.measurements.ppf(0.5, {distribution_parameters});
                }},
                mode: function ({distribution_parameters}) {{
                    return {mode_formula};
                }},
            }},
        }}
    }}
}}
console.log(dists.{distribution_name}.measurements.stats.mean({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.variance({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.standardDeviation({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.skewness({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.kurtosis({', '.join(parameters_numbers)}))
// console.log(dists.{distribution_name}.measurements.stats.median({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.mode({', '.join(parameters_numbers)}))

{f"{bl}".join([f"// {k}: {v}" for k,v in values.items()])}
"""

    file = open(f"./outputs/{distribution_name}.js", "w", encoding="utf-8")
    file.write(x[1:-1])
    file.close()

In [11]:
def format3(
    distribution_parameters,
    distribution_name,
    nc_µ1_formula,
    nc_µ2_formula,
    nc_µ3_formula,
    nc_µ4_formula,
    mode_formula,
    parameters_numbers,
    values,
):
    mean_formula_correction = "µ1"
    if "loc" in distribution_parameters and "scale" in distribution_parameters:
        mean_formula_correction = "loc + scale * µ1"
    elif "loc" in distribution_parameters:
        mean_formula_correction = "loc + µ1"
    elif "scale" in distribution_parameters:
        mean_formula_correction = "scale * µ1"
    if "miu" in distribution_parameters and "sigma" in distribution_parameters:
        mean_formula_correction = "miu + sigma * µ1"

    variance_formula_correction = "µ2 - µ1 ** 2"
    if "scale" in distribution_parameters:
        variance_formula_correction = "(scale ** 2) * (µ2 - µ1 ** 2)"
    elif "sigma" in distribution_parameters:
        variance_formula_correction = "(sigma ** 2) * (µ2 - µ1 ** 2)"

    bl = "\n"
    x = f"""
jStat = require("../node_modules/jstat");

dists = {{
    {distribution_name}: {{
        measurements: {{
            nonCentralMoments: function (k, {distribution_parameters}) {{
                let result;
                switch (k) {{
                    case 1: result = {nc_µ1_formula}; break;
                    case 2: result = {nc_µ2_formula}; break;
                    case 3: result = {nc_µ3_formula}; break;
                    case 4: result = {nc_µ4_formula}; break;
                }};
                return result
            }},
            centralMoments: function (k, {distribution_parameters}) {{
                const µ1 = this.nonCentralMoments(1, {distribution_parameters});
                const µ2 = this.nonCentralMoments(2, {distribution_parameters});
                const µ3 = this.nonCentralMoments(3, {distribution_parameters});
                const µ4 = this.nonCentralMoments(4, {distribution_parameters});

                let result;
                switch (k) {{
                    case 1: result = 0; break;
                    case 2: result = µ2 - µ1 ** 2; break;
                    case 3: result = µ3 - 3 * µ1 * µ2 + 2 * µ1 ** 3; break;
                    case 4: result = µ4 - 4 * µ1 * µ3 + 6 * (µ1 ** 2) * µ2 - 3 * (µ1 ** 4); break;
                }};
                return result
            }},
            stats: {{
                mean: function ({distribution_parameters}) {{
                    const µ1 = dists.{distribution_name}.measurements.nonCentralMoments(1, {distribution_parameters});
                    return {mean_formula_correction};
                }},
                variance: function ({distribution_parameters}) {{
                    const µ1 = dists.{distribution_name}.measurements.nonCentralMoments(1, {distribution_parameters});
                    const µ2 = dists.{distribution_name}.measurements.nonCentralMoments(2, {distribution_parameters});
                    return {variance_formula_correction};
                }},
                standardDeviation: function ({distribution_parameters}) {{
                    return Math.sqrt(this.variance({distribution_parameters}));
                }},
                skewness: function ({distribution_parameters}) {{
                    const central_µ3 = dists.{distribution_name}.measurements.centralMoments(3, {distribution_parameters});
                    return central_µ3 / (this.standardDeviation({distribution_parameters}) ** 3);
                }},
                kurtosis: function ({distribution_parameters}) {{
                    const central_µ4 = dists.{distribution_name}.measurements.centralMoments(4, {distribution_parameters});
                    return central_µ4 / (this.standardDeviation ({distribution_parameters}) ** 4);
                }},
                median: function ({distribution_parameters}) {{
                    return dists.{distribution_name}.measurements.ppf(0.5, {distribution_parameters});
                }},
                mode: function ({distribution_parameters}) {{
                    return {mode_formula};
                }},
            }},
        }}
    }}
}}
console.log(dists.{distribution_name}.measurements.stats.mean({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.variance({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.standardDeviation({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.skewness({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.kurtosis({', '.join(parameters_numbers)}))
// console.log(dists.{distribution_name}.measurements.stats.median({', '.join(parameters_numbers)}))
console.log(dists.{distribution_name}.measurements.stats.mode({', '.join(parameters_numbers)}))

{f"{bl}".join([f"// {k}: {v}" for k,v in values.items()])}
"""

    file = open(f"./outputs/{distribution_name}.js", "w", encoding="utf-8")
    file.write(x[1:-1])
    file.close()


In [12]:
def generate_file_distribution(file_name: str):
    distribution_name = file_name.split(".")[0].lower()

    workbook_template = openpyxl.load_workbook(f"../excel-files/continuous/{file_name}")
    sheet_0_template = workbook_template[workbook_template.sheetnames[0]]

    workbook_template_values = openpyxl.load_workbook(f"../excel-files/continuous/{file_name}", data_only=True)
    sheet_0_values = workbook_template_values[workbook_template_values.sheetnames[0]]

    mean_value = sheet_0_values.cell(row=3, column=10).value
    variance_value = sheet_0_values.cell(row=4, column=10).value
    standard_deviation_value = sheet_0_values.cell(row=5, column=10).value
    skewness_value = sheet_0_values.cell(row=6, column=10).value
    kurtosis_value = sheet_0_values.cell(row=7, column=10).value
    median_value = sheet_0_values.cell(row=8, column=10).value
    mode_value = sheet_0_values.cell(row=9, column=10).value

    values = {
        "mean_value": mean_value,
        "variance_value": variance_value,
        "standard_deviation_value": standard_deviation_value,
        "skewness_value": skewness_value,
        "kurtosis_value": kurtosis_value,
        "median_value": median_value,
        "mode_value": mode_value,
    }

    mean_formula = sheet_0_template.cell(row=3, column=10).value
    variance_formula = sheet_0_template.cell(row=4, column=10).value
    standard_deviation_formula = sheet_0_template.cell(row=5, column=10).value
    skewness_formula = sheet_0_template.cell(row=6, column=10).value
    kurtosis_formula = sheet_0_template.cell(row=7, column=10).value
    median_formula = sheet_0_template.cell(row=8, column=10).value
    mode_formula = sheet_0_template.cell(row=9, column=10).value

    nc_µ1_formula = sheet_0_template.cell(row=4, column=13).value
    nc_µ2_formula = sheet_0_template.cell(row=5, column=13).value
    nc_µ3_formula = sheet_0_template.cell(row=6, column=13).value
    nc_µ4_formula = sheet_0_template.cell(row=7, column=13).value

    distribution_parameters = ", ".join(parameters[distribution_name])


    n_params = obtener_maximo_valor_columna(workbook_template, 0, "C") - 2
    parameters_numbers = [str(sheet_0_template.cell(row=row, column=3).value) for row in range(3, 3 + n_params) if type(sheet_0_template.cell(row=row, column=3).value) != str]

    if nc_µ1_formula is None:
        format1(
            distribution_parameters,
            distribution_name,
            formula_replace(mean_formula, parameters_replacements[distribution_name]),
            formula_replace(variance_formula, parameters_replacements[distribution_name]),
            formula_replace(standard_deviation_formula, parameters_replacements[distribution_name]),
            formula_replace(skewness_formula, parameters_replacements[distribution_name]),
            formula_replace(kurtosis_formula, parameters_replacements[distribution_name]),
            formula_replace(median_formula, parameters_replacements[distribution_name]),
            formula_replace(mode_formula, parameters_replacements[distribution_name]),
            parameters_numbers,
            values,
        )
    else:
        b = (
            formula_replace(nc_µ1_formula, parameters_replacements[distribution_name])
            == formula_replace(nc_µ2_formula, parameters_replacements[distribution_name])
            == formula_replace(nc_µ3_formula, parameters_replacements[distribution_name])
            == formula_replace(nc_µ4_formula, parameters_replacements[distribution_name])
        )
        if b:
            format2(
                distribution_parameters,
                distribution_name,
                formula_replace(nc_µ1_formula, parameters_replacements[distribution_name]),
                formula_replace(mode_formula, parameters_replacements[distribution_name]),
                parameters_numbers,
                values,
            )
        else:
            format3(
                distribution_parameters,
                distribution_name,
                formula_replace(nc_µ1_formula, parameters_replacements[distribution_name]),
                formula_replace(nc_µ2_formula, parameters_replacements[distribution_name]),
                formula_replace(nc_µ3_formula, parameters_replacements[distribution_name]),
                formula_replace(nc_µ4_formula, parameters_replacements[distribution_name]),
                formula_replace(mode_formula, parameters_replacements[distribution_name]),
                parameters_numbers,
                values,
            )


In [13]:
# for file_name in os.listdir("./excel-files"):
#     if file_name not in os.listdir("./outputs"):
#         try:
#             generate_file_distribution(file_name)
#         except:
#             print(file_name)

In [14]:
# for file_name in os.listdir("./files"):
#     os.remove(f"./files/{file_name}")

In [15]:
i = 3

In [16]:
for file_name in os.listdir("./outputs")[i:]:
    os.remove(f"./outputs/{file_name}")

In [41]:
# for file_name in os.listdir("../excel-files/continuous/"):
#     try:
#         generate_file_distribution(file_name)
#         # print(file_name)
#         # print(file_name, parameters_replacements[file_name.replace(".xlsx", "").lower()])
#     except:
#         print("xxx" + file_name)

In [17]:
for file_name in os.listdir("../excel-files/continuous/")[i:]:
    generate_file_distribution(file_name)

PermissionError: [Errno 13] Permission denied: '../excel-files/continuous/~$beta_prime.xlsx'