In [16]:
import os
import re

In [17]:
def process_py_file(file_path):
    with open(file_path, "r", encoding="utf8") as f:
        file_content = f.read()

    # Remove import statements
    file_content = re.sub(r"^import.*$|^from .* import.*$", "", file_content, flags=re.MULTILINE)

    file_content = re.sub(r"^sys.path.append.*\n", "", file_content, flags=re.MULTILINE)

    # Remove comments
    file_content = re.sub(r"#.*", "", file_content)

    # # Remove function descriptions and docstrings
    patron_docstrings_triple_comillas = r"(\"\"\")(.*?)(\"\"\")"
    patron_docstrings_comillas_simples = r"(\'\'\')(.*?)(\'\'\')"
    patron_total = f"{patron_docstrings_triple_comillas}|{patron_docstrings_comillas_simples}"

    file_content = re.sub(patron_total, "", file_content, flags=re.DOTALL)

    # Remove the code inside if __name__ == "__main__": block
    file_content = re.sub(r'if __name__ == "__main__":[\s\S]*', "", file_content)

    # Remove any empty lines and extra whitespaces
    file_content = re.sub(r"^\s*\n", "", file_content, flags=re.MULTILINE)
    file_content = re.sub(r"\n\s*\n", "\n", file_content)
    file_content = file_content.strip()

    return file_content

In [18]:
def get_import_lines(file_path):
    with open(file_path, "r", encoding="utf8") as f:
        file_content = f.read()

    # Find all import lines using regular expression
    import_lines = re.findall(r"^import.*$|^from .* import.*$", file_content, re.MULTILINE)

    return import_lines

In [19]:
input_file_path = "../../continuous/distributions/fatigue_life.py"
processed_content = process_py_file(input_file_path)
print(processed_content)

class FATIGUE_LIFE:
    def __init__(self, measurements):
        self.parameters = self.get_parameters(measurements)
        self.gamma = self.parameters["gamma"]
        self.loc = self.parameters["loc"]
        self.scale = self.parameters["scale"]
    def cdf(self, x: float) -> float:
        z = lambda t: numpy.sqrt((t - self.loc) / self.scale)
        result = scipy.stats.norm.cdf((z(x) - 1 / z(x)) / (self.gamma))
        return result
    def pdf(self, x: float) -> float:
        z = lambda t: numpy.sqrt((t - self.loc) / self.scale)
        result = (z(x) + 1 / z(x)) / (2 * self.gamma * (x - self.loc)) * scipy.stats.norm.pdf((z(x) - 1 / z(x)) / (self.gamma))
        return result
    def get_num_parameters(self) -> int:
        return len(self.parameters)
    def parameter_restrictions(self) -> bool:
        v1 = self.scale > 0
        v2 = self.gamma > 0
        return v1 and v2
    def get_parameters(self, measurements) -> dict[str, float | int]:
        scipy_params = scipy.s

In [20]:
input_file_path = "../../continuous/distributions/fatigue_life.py"
import_lines = get_import_lines(input_file_path)

In [21]:
IMPORTS = []
for file in os.listdir("../../continuous/distributions"):
    if ".py" in file:
        import_lines = get_import_lines(f"../../continuous/distributions/{file}")
        IMPORTS.extend(import_lines)

input_file_path = "../../continuous/measurements/measurements_continuous.py"
import_lines = get_import_lines(input_file_path)
IMPORTS.extend(import_lines)

In [22]:
IMPORTS.append("import joblib")

In [23]:
CODE = "\n".join(sorted(list(set(IMPORTS)))) + "\n\n"
for file in os.listdir("../../continuous/distributions"):
    if ".py" in file:
        processed_content = process_py_file(f"../../continuous/distributions/{file}")
        CODE += processed_content + "\n\n"

In [24]:
input_file_path = "../../continuous/measurements/measurements_continuous.py"
measuerements_code = process_py_file(input_file_path)
CODE += measuerements_code + "\n\n"

input_file_path = "../../continuous/test_chi_square_continuous.py"
test_chi_square_continuous_code = process_py_file(input_file_path)
CODE += test_chi_square_continuous_code + "\n\n"

input_file_path = "../../continuous/test_kolmogorov_smirnov_continuous.py"
test_kolmogorov_smirnov_continuous_code = process_py_file(input_file_path)
CODE += test_kolmogorov_smirnov_continuous_code + "\n\n"

# input_file_path = "../../utilities/ad_marsaglia.py"
# anderson_darling_code = process_py_file(input_file_path)
# CODE += anderson_darling_code + "\n\n"

input_file_path = "../../continuous/test_anderson_darling_continuous.py"
test_anderson_darling_continuous_code = process_py_file(input_file_path)
test_anderson_darling_continuous_code = test_anderson_darling_continuous_code.replace("ad.", "")
CODE += test_anderson_darling_continuous_code + "\n\n"

In [25]:
class_phitter_continuous_code = process_py_file("../../phitter/continuous/phitter_continuous.py")
CODE += class_phitter_continuous_code + "\n\n"

In [26]:
class_phitter_continuous_code

'class PHITTER_CONTINUOUS:\n    def __init__(\n        self,\n        data: list[int | float],\n        num_bins: int | None = None,\n        confidence_level=0.95,\n        minimum_sse=float("inf"),\n    ):\n        self.data = data\n        self.measurements = MEASUREMENTS_CONTINUOUS(self.data, num_bins, confidence_level)\n        self.confidence_level = confidence_level\n        self.minimum_sse = minimum_sse\n        self.distribution_results = {}\n        self.none_results = {"test_statistic": None, "critical_value": None, "p_value": None, "rejected": None}\n    def test(self, test_function, label: str, distribution):\n        validation_test = False\n        try:\n            test = test_function(distribution, self.measurements, confidence_level=self.confidence_level)\n            if numpy.isnan(test["test_statistic"]) == False and numpy.isinf(test["test_statistic"]) == False and test["test_statistic"] > 0:\n                self.distribution_results[label] = {\n                  

In [27]:
code_file = open("./production_continuous.py", "+w", encoding="utf8")
code_file.write(CODE)
code_file.close()

In [28]:
if_name_code = """
if __name__ == "__main__":
    path = "../../continuous/data/data_beta.txt"
    sample_distribution_file = open(path, "r")
    data = [float(x.replace(",", ".")) for x in sample_distribution_file.read().splitlines()]

    phitter_continuous = PHITTER_CONTINUOUS(data)
    sorted_results_sse, not_rejected_results = phitter_continuous.fit()

    for distribution, results in not_rejected_results.items():
        print(f"Distribution: {distribution}, SSE: {results['sse']}, Aprobados: {results['n_test_passed']}")
"""

In [29]:
CODE += if_name_code

In [30]:
code_file = open("./test_production_continuous.py", "+w", encoding="utf8")
code_file.write(CODE)
code_file.close()