In [4]:
import os
import nbformat
from nbformat.v4 import new_notebook, new_code_cell

def extract_cells_from_txt(file_path):
    """
    Extract code cells from a .txt file. 
    Content between 'EXECUTE' and 'STDOUT/STDERR' is extracted.
    """
    cells = []
    with open(file_path, 'r', encoding="utf8") as file:
        lines = file.readlines()
    
    inside_cell = False
    cell_content = []
    
    for line in lines:
        if "EXECUTE\n" in line:
            inside_cell = True
            cell_content = []  # Start a new cell
        elif "STDOUT/STDERR" in line:
            if inside_cell:
                cells.append("".join(cell_content).strip())
                inside_cell = False
        elif inside_cell:
            cell_content.append(line)
    
    return cells

def create_notebook_from_cells(cells, output_file):
    """
    Create a Jupyter notebook from a list of cell contents.
    """
    notebook = new_notebook()
    notebook.cells = [new_code_cell(cell) for cell in cells]
    
    with open(output_file, 'w') as f:
        nbformat.write(notebook, f)

def txt_to_ipynb_in_place(input_dir):
    """
    Convert all .txt files in the input directory and its subdirectories to .ipynb files.
    The generated notebooks are saved in the same folder as their corresponding .txt files.
    """
    for root, _, files in os.walk(input_dir):
        for file_name in files:
            if file_name.endswith('.txt'):
                input_file = os.path.join(root, file_name)
                output_file = os.path.join(root, file_name.replace('.txt', '.ipynb'))
                
                print(f"Processing {input_file}...")
                cells = extract_cells_from_txt(input_file)
                create_notebook_from_cells(cells, output_file)
                print(f"Created {output_file}")

if __name__ == "__main__":
    input_dir = "./"  # Replace with your root input directory
    
    txt_to_ipynb_in_place(input_dir)


Processing ./T1\01\ipynb.txt...
Created ./T1\01\ipynb.ipynb
Processing ./T1\02\ipynb.txt...
Created ./T1\02\ipynb.ipynb
Processing ./T1\03\ipynb.txt...
Created ./T1\03\ipynb.ipynb
Processing ./T1\04\ipynb.txt...
Created ./T1\04\ipynb.ipynb
Processing ./T1\05\ipynb.txt...
Created ./T1\05\ipynb.ipynb
Processing ./T1\06\ipynb.txt...
Created ./T1\06\ipynb.ipynb
Processing ./T1\07\ipynb.txt...
Created ./T1\07\ipynb.ipynb
Processing ./T1\08\ipynb.txt...
Created ./T1\08\ipynb.ipynb
Processing ./T1\09\ipynb.txt...
Created ./T1\09\ipynb.ipynb
Processing ./T1\10\ipynb.txt...
Created ./T1\10\ipynb.ipynb
Processing ./T2\01\ipynb.txt...
Created ./T2\01\ipynb.ipynb
Processing ./T2\02\ipynb.txt...
Created ./T2\02\ipynb.ipynb
Processing ./T2\02\output.txt...
Created ./T2\02\output.ipynb
Processing ./T2\03\ipynb.txt...
Created ./T2\03\ipynb.ipynb
Processing ./T2\04\ipynb.txt...
Created ./T2\04\ipynb.ipynb
Processing ./T2\05\ipynb.txt...
Created ./T2\05\ipynb.ipynb
Processing ./T2\06\ipynb.txt...
Create

In [1]:
import os
import io
from pylint.lint import Run
import radon.complexity as radon_complexity
from radon.metrics import mi_visit, mi_rank
from radon.complexity import cc_rank

def extract_cells_from_txt(file_path):
    """
    Extract code cells from a .txt file. 
    Content between 'EXECUTE' and 'STDOUT/STDERR' is extracted.
    """
    cells = []
    with open(file_path, 'r', encoding="utf8") as file:
        lines = file.readlines()
    
    inside_cell = False
    cell_content = []
    
    for line in lines:
        if "EXECUTE\n" in line:
            inside_cell = True
            cell_content = []  # Start a new cell
        elif "STDOUT/STDERR" in line:
            if inside_cell:
                # Exclude lines starting with "%pip install"
                filtered_content = [l for l in cell_content if not l.strip().startswith("%pip install")]
                cells.append("".join(filtered_content).strip())
                inside_cell = False
        elif inside_cell:
            cell_content.append(line)
    
    return cells

def create_python_file(cells, output_file):
    """
    Create a Python file from a list of cell contents.
    """
    with open(output_file, 'w') as f:
        for cell in cells:
            f.write(cell + '\n\n')

def calculate_metrics(py_file):
    """
    Calculate pylint score and Cyclomatic complexity for a given Python file.
    """
    from contextlib import redirect_stdout

    pylint_output = io.StringIO()
    with redirect_stdout(pylint_output):
        Run([py_file], exit=False)
    pylint_result = pylint_output.getvalue()
    pylint_output.close()

    pylint_score = None
    for line in pylint_result.splitlines():
        if line.startswith("Your code has been rated at"):
            pylint_score = float(line.split(" ")[6].split("/")[0])
            break

    # Cyclomatic complexity
    with open(py_file, 'r') as f:
        content = f.read()
    complexity_analysis = radon_complexity.cc_visit(content)
    avg_cc = sum([func.complexity for func in complexity_analysis]) / len(complexity_analysis) if complexity_analysis else 0
    complexity_rank = cc_rank(avg_cc)

    # Maintainability Index
    mi = mi_visit(content, True)
    mi_rating = mi_rank(mi)

    return pylint_score, avg_cc, complexity_rank, mi, mi_rating

def txt_to_py_with_metrics(input_dir, output_metrics_file):
    """
    Convert all .txt files to .py files and calculate pylint score and Cyclomatic complexity.
    Write results to an output file.
    """
    results = []
    for root, _, files in os.walk(input_dir):
        for file_name in files:
            if file_name.endswith('.txt'):
                input_file = os.path.join(root, file_name)
                output_file = os.path.join(root, file_name.replace('.txt', '.py'))
                
                print(f"Processing {input_file}...")
                cells = extract_cells_from_txt(input_file)
                create_python_file(cells, output_file)
                
                pylint_score, avg_cc, complexity_rank, mi, mi_rating = calculate_metrics(output_file)
                results.append({
                    'file': output_file,
                    'pylint_score': pylint_score,
                    'avg_cyclomatic_complexity': avg_cc,
                    'complexity_rank': complexity_rank,
                    'maintainability_index': mi,
                    'mi_rating': mi_rating
                })
                print(f"Metrics calculated for {output_file}")
    
    # Write results to a file
    with open(output_metrics_file, 'w') as f:
        f.write("File, Pylint Score, Avg Cyclomatic Complexity, Complexity Rank, Maintainability Index, MI Rating\n")
        for result in results:
            f.write(f"{result['file']}, {result['pylint_score']}, {result['avg_cyclomatic_complexity']}, "
                    f"{result['complexity_rank']}, {result['maintainability_index']}, {result['mi_rating']}\n")

if __name__ == "__main__":
    input_dir = "./"  # Replace with your root input directory
    output_metrics_file = "metrics_output.csv"  # Replace with your desired output file name
    
    txt_to_py_with_metrics(input_dir, output_metrics_file)


Processing ./T1\01\ipynb.txt...
Metrics calculated for ./T1\01\ipynb.py
Processing ./T1\02\ipynb.txt...
Metrics calculated for ./T1\02\ipynb.py
Processing ./T1\03\ipynb.txt...
Metrics calculated for ./T1\03\ipynb.py
Processing ./T1\04\ipynb.txt...
Metrics calculated for ./T1\04\ipynb.py
Processing ./T1\05\ipynb.txt...
Metrics calculated for ./T1\05\ipynb.py
Processing ./T1\06\ipynb.txt...
Metrics calculated for ./T1\06\ipynb.py
Processing ./T1\07\ipynb.txt...
Metrics calculated for ./T1\07\ipynb.py
Processing ./T1\08\ipynb.txt...
Metrics calculated for ./T1\08\ipynb.py
Processing ./T1\09\ipynb.txt...
Metrics calculated for ./T1\09\ipynb.py
Processing ./T1\10\ipynb.txt...
Metrics calculated for ./T1\10\ipynb.py
Processing ./T2\01\ipynb.txt...
Metrics calculated for ./T2\01\ipynb.py
Processing ./T2\02\ipynb.txt...
Metrics calculated for ./T2\02\ipynb.py
Processing ./T2\03\ipynb.txt...
Metrics calculated for ./T2\03\ipynb.py
Processing ./T2\04\ipynb.txt...
Metrics calculated for ./T2\04\i