In [1]:
import os
import json
import subprocess
from tqdm import tqdm
import sys
import re

### Loading path so that the installed apps work

In [11]:
env = os.environ.copy()
env["PATH"] = "/home/pooria/.nvm/versions/node/v22.17.1/bin:" + env["PATH"]
env["PATH"] = os.path.expanduser("~/.dotnet/tools") + ":" + env["PATH"]

subprocess.run(["dotnet-script", "--version"], env=env)

1.6.0


CompletedProcess(args=['dotnet-script', '--version'], returncode=0)

### Python

In [None]:
import py_compile

directory = "files/python/codes/"
output_file = "results/python_lint.json"
all_results = []

py_files = [f for f in os.listdir(directory) if f.endswith(".py")]
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Disable .pyc creation globally
sys.dont_write_bytecode = True

for filename in tqdm(py_files, desc="Linting files", unit="file"):
    filepath = os.path.join(directory, filename)

    try:
        # Compile with syntax check, no .pyc files written
        py_compile.compile(filepath, doraise=True)
    except py_compile.PyCompileError as e:
        err = {
            "type": "syntax-error",
            "module": filename,
            "obj": "",
            "line": getattr(e.exc_value, "lineno", None),
            "column": None,
            "path": filepath,
            "symbol": "syntax-error",
            "message": str(e.exc_value),
            "message-id": ""
        }
        all_results.append(err)

# Write all results to JSON once after all files are processed
with open(output_file, "w") as f:
    json.dump(all_results, f, indent=2)


### Javascript
Make sure you have eslint installed

In [None]:

directory = "files/javascript/codes/"
output_file = "results/js_lint.json"
os.makedirs(os.path.dirname(output_file), exist_ok=True)

js_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".js")]
all_results = []

# Prepare the environment with your Node path as before
env = os.environ.copy()
env["PATH"] = "/home/pooria/.nvm/versions/node/v22.17.1/bin:" + env["PATH"]

result = subprocess.run(
    [
        "eslint",
        "--config",
        "linting_rules/js.eslint.config.mjs",
        "--format",
        "json",
        *js_files,
    ],
    capture_output=True,
    text=True,
    env=env,
)

try:
    file_results = json.loads(result.stdout)
    for file_result in file_results:
        filename = os.path.basename(file_result.get("filePath", ""))
        for msg in file_result.get("messages", []):
            if msg.get("fatal", False) or msg.get("severity", 0) == 2:
                all_results.append({
                    "type": "syntax-error",
                    "module": filename,
                    "obj": "",
                    "line": msg.get("line"),
                    "column": msg.get("column"),
                    "path": file_result.get("filePath"),
                    "symbol": "syntax-error",
                    "message": msg.get("message", ""),
                    "message-id": "",
                })
except json.JSONDecodeError:
    print("⚠️ Failed to parse JSON from ESLint output:")
    print(result.stdout)
with open(output_file, "w") as f:
    json.dump(all_results, f, indent=2)


### Java files

In [None]:
!sudo apt install default-jdk

In [None]:
import shutil
import hashlib

directory = "files/java/codes/"
good_dir = os.path.join(directory, "good")
os.makedirs(good_dir, exist_ok=True)

java_files = [f for f in os.listdir(directory) if f.endswith(".java")]
public_class_pattern = re.compile(r'public\s+class\s+(\w+)')

# Map from hash (hex string) to original filename
mapping = {}

def hash_file_content(path):
    hasher = hashlib.sha256()
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            hasher.update(chunk)
    return hasher.hexdigest()

for filename in java_files:
    filepath = os.path.join(directory, filename)
    with open(filepath, "r", encoding="utf-8") as f:
        content = f.read()

    # Find public class name
    match = public_class_pattern.search(content)
    if match:
        class_name = match.group(1)
        file_hash = hash_file_content(filepath)

        new_filename = f"{class_name}.java"
        dest_path = os.path.join(good_dir, new_filename)

        if os.path.exists(dest_path):
            continue
        else:
            print(f"Copying {filename} → good/{new_filename}")
            shutil.copy2(filepath, dest_path)

        # Map hash → original filename
        mapping[file_hash] = filename


In [None]:
directory = "files/java/codes/good"
output_file = "results/java_lint.json"
os.makedirs(os.path.dirname(output_file), exist_ok=True)

java_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".java")]
all_results = []

os.makedirs("/tmp/java_linting", exist_ok=True)

class_files = [f for f in os.listdir(directory) if f.endswith(".java")]

hash_to_original = mapping  # hash -> original filename

hash_to_classfile = {}



def hash_file_content(path):
    hasher = hashlib.sha256()
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            hasher.update(chunk)
    return hasher.hexdigest()

class_to_original = {}

for class_filename in class_files:
    class_filepath = os.path.join(directory, class_filename)
    file_hash = hash_file_content(class_filepath)
    original = hash_to_original.get(file_hash, None)
    if original:
        class_to_original[class_filename] = original
    else:
        # fallback: map to itself if no mapping found
        class_to_original[class_filename] = class_filename

def add_error(file, errors):
    base = os.path.basename(file)
    original_filename = class_to_original.get(base, base)  # fallback to base

    if errors:
        all_results.append({
            "type": "syntax-error",
            "module": os.path.basename(original_filename) if original_filename else "unknown",
            "obj": "",
            "line": None,
            "column": None,
            "path": original_filename if original_filename else "",
            "symbol": "syntax-error",
            "message": "\n".join(errors),
            "message-id": "",
        })

result = subprocess.run(
    ["javac", "-d", "/tmp/java_linting", *java_files],
    capture_output=True,
    text=True,
)

if result.returncode != 0:
    current_file = None
    current_errors = []

    for line in result.stderr.splitlines():
        if ".java:" in line and "error:" in line:
            if current_file is not None:
                add_error(current_file, current_errors)
            current_errors = [line]
            current_file = line.split(":")[0]
        else:
            current_errors.append(line)
    if current_file is not None:
        add_error(current_file, current_errors)

with open(output_file, "w") as f:
    json.dump(all_results, f, indent=2)


### C

In [None]:

# Setup directories
directory = "files/c/codes/"
output_file = "results/c_lint.json"
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Gather all .c files
c_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".c")]
all_results = []

# Run GCC in syntax-only mode on all files
result = subprocess.run(
    ["gcc", "-fsyntax-only", *c_files],
    capture_output=True,
    text=True,
)

# Parse output if there were errors
if result.returncode != 0:
    for line in result.stderr.splitlines():
        if ": error:" in line:
            parts = line.split(":")
            file_path = parts[0]
            line_no = int(parts[1]) if parts[1].isdigit() else None
            column_no = int(parts[2]) if parts[2].isdigit() else None
            message = line.split(": error:")[1].strip()
            
            # Decode any unicode punctuation to ASCII equivalents
            message = message.replace("\u2018", "'").replace("\u2019", "'")
            
            all_results.append({
                "type": "syntax-error",
                "module": os.path.basename(file_path),
                "obj": "",
                "line": line_no,
                "column": column_no,
                "path": file_path,
                "symbol": "syntax-error",
                "message": message,
                "message-id": "",
            })

# Write results to file
with open(output_file, "w") as f:
    json.dump(all_results, f, indent=2)
