In [None]:
import * as tslab from "tslab";
import { readFileSync } from "fs";

const css = readFileSync("../style.css", "utf-8");
tslab.display.html(`<style>${css}</style>`);

# A Theorem Prover for First-Order Logic without Equality

## Auxiliary Functions

We need the parser for first order formulas, hence we import it.

Formulas are represented as nested arrays. In order to convert a string into a nested array we use the `LogicParser` that is found in the file `FOL-Parser.ts`. Our parser distinguishes variables and function symbols as follows:
- A word starting with a lower case letter is interpreted as a *variable*.
- A word starting with an upper case letter is assumed to be a *function* or *predicate symbol*.

In [None]:
import { RecursiveSet } from 'recursive-set';
import { LogicParser, parse, type Formula, type Term, type Signature } from './FOL-Parser';
import { 
    normalize, 
    applySubstitution as apply,
    allVariables,
    prettifyCNF,
    type Literal,
    type Clause, 
    type CNF,
    type Substitution as CNFSubstitution
} from './09-FOL-CNF';
import {
    unify,
    apply as applyTerm,
    prettifySubstitution,
    type Substitution,
    type Equation
} from './10-Unification';

The function `parseTerm` takes a string `s` representing a formula from first-order logic. It returns a nested array representing this formula.

In [None]:
// Wenn du eine FORMEL parsen willst:
function parseFormulaString(s: string, signature: Signature): Formula {
    return parse(s, signature);  // Die parse-Funktion aus FOL-Parser
}

// Wenn du einen TERM parsen willst:
function parseTermString(s: string, signature: Signature): Term {
    const parser = new LogicParser(s, signature);
    return parser.parseTermEntry();
}

The resolution calculus works with clauses. The notebook `09-FOL-CNF.ipynb` implements the function $\texttt{normalize}(f)$ that turns a formula $f$ into a set of clauses.

In [None]:
const s = '∀g:∀c:(Grandparent(g, c) ↔ ∃p: (Parent(g, p) ∧ Parent(p, c)))';

// Definiere die Signatur mit den verwendeten Prädikaten
const signature: Signature = {
    functions: new Map(),  // Keine Funktionen in dieser Formel
    predicates: new Map([
        ['Grandparent', 2],  // 2-stelliges Prädikat
        ['Parent', 2]        // 2-stelliges Prädikat
    ])
};

const f = parse(s, signature);
f


In [None]:
const Clauses = normalize(f);
prettifyCNF(Clauses);

The module `unify` implements [unification](https://en.wikipedia.org/wiki/Unification_(computer_science)) via the agorithm of [Martelli and Montanari](https://dl.acm.org/doi/pdf/10.1145/357162.357169).

In [None]:
import { unify } from './10-Unification'; 

The function call $\texttt{arb}(S)$ returns an arbitrary element from the set $S$.

In [None]:
import { RecursiveSet, type Value, Tuple } from 'recursive-set';

function arb<T extends Value>(S: RecursiveSet<T>): T | undefined {
    return S.pickRandom();
}

Given a literal $l$ the function $\texttt{complement}(l)$ computes the complement $\overline{\,l\,}$ of the literal $l$.

In [None]:
import {
    createVarTerm,
    createFunTerm,
    createPredFormula,
    createConstFormula,
    createNotFormula,
    createBinaryFormula,
    createQuantifierFormula,

} from "./FOL-Parser";

In [None]:
import { getOrThrow } from './09-FOL-CNF';
function complement(l: Literal): Literal {
    const first = l.get(0);
    
    if (first === '¬' && l.length === 2) {
        const operand = getOrThrow(l, 1);
        if (operand instanceof Tuple) {
            return operand;
        }
        throw new Error("Invalid negation structure");
    }
    
    return createNotFormula(l);
}

In [None]:
const literal = createPredFormula('P', [createVarTerm('x')]);
complement(literal).toString();

In [None]:
const negLiteral = createNotFormula(
    createPredFormula('P', [createVarTerm('x')])
);

complement(negLiteral).toString();

Given a clause $C$, the function $\texttt{collectVariables}(C)$ computes the set of all variables occurring in $C$.  The function $\texttt{collectVariables}$ can also compute the variables occurring in a literal or a term.

In [None]:
function collectVariables(C: Clause | Literal | Term): RecursiveSet<string> {
    // Fall 1: Klausel (RecursiveSet von Literalen)
    if (C instanceof RecursiveSet) {
        let vars = new RecursiveSet<string>();
        for (const literal of C) {
            const literalVars = allVariables(literal);
            for (const v of literalVars) {
                vars.add(v);
            }
        }
        return vars;
    }
    
    // Fall 2: Literal oder Term (Tuple)
    if (C instanceof Tuple) {
        const vars = allVariables(C);
        const result = new RecursiveSet<string>();
        for (const v of vars) {
            result.add(v);
        }
        return result;
    }
    
    return new RecursiveSet<string>();
}

In [None]:
for (const C of Clauses) {
    console.log(`collectVariables(${C.toString()}) = \n\t{${[...collectVariables(C)].join(', ')}}`);
}

In [None]:
const asciiLowercase = 'abcdefghijklmnopqrstuvwxyz';

The function $\texttt{renameVariables}(f, g)$ takes two clauses `f` and `g` and renames the variables in the clauses `f` so that they are different from the variables occurring in `g`.

In [None]:
function isLiteral(value: Formula | Term): value is Literal {
    if (!(value instanceof Tuple)) return false;
    const first = value.get(0);
    // Ein Literal ist entweder ein Prädikat oder eine Negation
    // Ein Term (VarTerm oder FunTerm) hat length 1 oder ist ein Funktionsterm mit lowercase
    if (value.length === 1) return false; // VarTerm
    if (typeof first === 'string') {
        // Prädikate beginnen mit Großbuchstaben, Negation ist "¬"
        return first === '¬' || first[0] === first[0].toUpperCase();
    }
    return false;
}

function renameVariables(f: Clause, g: Clause): Clause {
    const OldVars = collectVariables(f);
    const gVars = collectVariables(g);
    const NewVarsArray = asciiLowercase.split('').filter(char => !gVars.has(char));

    // *** FIX: Sortiere OldVars für deterministische Reihenfolge! ***
    const OldVarsArray = Array.from(OldVars).sort();

    const sigma: Substitution = new Map();
    let i = 0;
    for (const x of OldVarsArray) {  // ← Jetzt alphabetisch sortiert!
        if (i < NewVarsArray.length) {
            sigma.set(x, createVarTerm(NewVarsArray[i++]));
        } else {
            throw new Error("Not enough fresh variables for renaming available.");
        }
}

    const renamedClause = new RecursiveSet<Literal>();
    for (const literal of f) {
        const renamedLiteral = apply(literal, sigma);
        if (isLiteral(renamedLiteral)) {
            renamedClause.add(renamedLiteral);
        } else {
            throw new Error("Renamed literal must be a Literal (Formula)");
        }
    }
    
    return renamedClause;
}

In [None]:
for (const C of Clauses) {
    const renamed = renameVariables(C, C);
    console.log(`${C.toString()}  ->  ${renamed.toString()}`);
}

# A Calculus for First Order Logic

The [resolution](https://en.wikipedia.org/wiki/Resolution_(logic)) rule is an inference rule that is defined as follows: If
 * $C_1$ and $C_2$ are clauses from first order logic,</li>
 * $p(s_1,\cdots,s_n)$ and $p(t_1,\cdots,t_n)$ are atomic formulas,</li> 
 * the syntactical equation $p(s_1,\cdots,s_n) \doteq p(t_1,\cdots,t_n)$ is solvable and
     $$ \mu = \mathtt{mgu}\bigl(p(s_1,\cdots,s_n), p(t_1,\cdots,t_n)\bigr), $$
then
$$\frac{C_1 \cup\{ p(s_1,\cdots,s_n)\} \quad\quad \{\neg p(t_1,\cdots,t_n)\} \cup C_2}{
                 C_1\mu \cup C_2\mu} 
$$
is an application of the resolution rule.

Given a two clauses <tt>C1</tt> and <tt>C2</tt>, the function $\texttt{resolve}(\texttt{C1}, \texttt{C2})$ computes a set of all clauses that can be inferred from <tt>C1</tt> and <tt>C2</tt> by applying the resolution rule.

In [None]:
import { compose } from './10-Unification';

function getAtom(l: Literal): Literal {
    const first = l.get(0);
    if (first === '¬' && l.length === 2) {
        const operand = getOrThrow(l, 1);
        if (operand instanceof Tuple) {
            return operand;
        }
        throw new Error("Invalid negation structure");
    }
    return l;
}

function unifyLiterals(l1: Literal, l2: Literal): Substitution | null {
    const atom1 = getAtom(l1);
    const atom2 = getAtom(l2);
    
    // Beide müssen Prädikate mit gleicher Signatur sein
    const pred1 = atom1.get(0);
    const pred2 = atom2.get(0);
    
    if (pred1 !== pred2 || atom1.length !== 2 || atom2.length !== 2) {
        return null;
    }
    
    const args1 = getOrThrow(atom1, 1);
    const args2 = getOrThrow(atom2, 1);
    
    if (!(args1 instanceof Tuple) || !(args2 instanceof Tuple)) {
        return null;
    }
    
    if (args1.length !== args2.length) {
        return null;
    }
    
    // Unifiziere alle Argument-Paare
    let sigma: Substitution = new Map();
    for (let i = 0; i < args1.length; i++) {
        const arg1 = getOrThrow(args1, i);
        const arg2 = getOrThrow(args2, i);
        
        if (!(arg1 instanceof Tuple) || !(arg2 instanceof Tuple)) {
            return null;
        }
        
        // Wende bisherige Substitution auf beide Argumente an
        const arg1Subst = applyTerm(arg1, sigma);
        const arg2Subst = applyTerm(arg2, sigma);
        
        const mgu = unify(arg1Subst, arg2Subst);
        if (mgu === null) {
            return null;
        }
        
        // Komponiere Substitutionen
        sigma = compose(sigma, mgu);
    }
    
    return sigma;
}

function resolve(C1: Clause, C2: Clause): RecursiveSet<Clause> {
    const C2Renamed = renameVariables(C2, C1);
    const Result = new RecursiveSet<Clause>();
    
    for (const L1 of C1) {
        for (const L2 of C2Renamed) {
            const compL2 = complement(L2);
            const mu = unifyLiterals(L1, compL2);
            
            if (mu !== null) {
                // Filtere L1 aus C1 und L2 aus C2Renamed
                const C1_minus_L1 = new RecursiveSet<Literal>();
                for (const l of C1) {
                    if (!l.equals(L1)) {
                        C1_minus_L1.add(l);
                    }
                }
                
                const C2_minus_L2 = new RecursiveSet<Literal>();
                for (const l of C2Renamed) {
                    if (!l.equals(L2)) {
                        C2_minus_L2.add(l);
                    }
                }
                
                const resolvent = C1_minus_L1.union(C2_minus_L2);
                
                // Wende Substitution auf Resolvent an
                const appliedResolvent = new RecursiveSet<Literal>();
                for (const literal of resolvent) {
                    const applied = apply(literal, mu);
                    if (isLiteral(applied)) {
                        appliedResolvent.add(applied);
                    }
                }
                
                Result.add(appliedResolvent);
            }
        }
    }
    return Result;
}


## Some Formulas for Testing

According to <a href="https://de.wikipedia.org/wiki/Uwe_Schöning">Uwe Schöning</a>, the theory of red dragons is
given by the following axioms:
<ol>
<li>
Every dragon is happy if all its children can fly:
$$ \forall x: \Bigl(\forall y: \big(\texttt{Child}(y,x) \rightarrow \texttt{CanFly}(y)\big) \rightarrow \texttt{Happy}(x)\Bigr) 
$$
</li>
<li> 
All red dragons can fly:
$$
 \forall x: \bigl(\texttt{Red}(x) \rightarrow \texttt{CanFly}(x)\bigr)
$$
</li>
<li> The children of red dragons are themselves red:
$$
\forall x: \bigl(\texttt{Red}(x) \rightarrow \forall y:\bigl( \texttt{Child}(y,x) \rightarrow \texttt{Red}(y)\bigr)\bigr)
$$
</li>
</ol>
We will show that these axioms imply that all red dragons are happy:
$$
 \forall x: \bigl(\texttt{Red}(x) \rightarrow \texttt{Happy}(x)\bigr)
$$
To this end, the formula stating that all red dragons can fly is negated.  Then we will show that the set consisting of the negated formula together with the axioms is inconsistent.  We start by defining the formulas.

In [None]:
const s1 = '∀x:(∀y:(Child(y, x) → CanFly(y)) → Happy(x))';
const s2 = '∀x:(Red(x) → CanFly(x))';
const s3 = '∀x:(Red(x) → ∀y:(Child(y, x) → Red(y)))';
const s4 = '¬∀x:(Red(x) → Happy(x))';

Next, the formulas are parsed and transformed into clauses.

In [None]:
// Definiere die Signatur für die Drachenformeln
const dragonSignature: Signature = {
    functions: new Map([
        ['sk1', 0],  // Skolem-Funktionen (werden dynamisch erzeugt)
        ['sk2', 1],
        ['sk3', 0]
    ]),
    predicates: new Map([
        ['Child', 2],
        ['CanFly', 1],
        ['Happy', 1],
        ['Red', 1]
    ])
};

const f1 = parse(s1, dragonSignature);
prettifyCNF(normalize(f1));

In [None]:
const f2 = parse(s2, dragonSignature);
prettifyCNF(normalize(f2));

In [None]:
const f3 = parse(s3, dragonSignature);
prettifyCNF(normalize(f3));

In [None]:
const f4 = parse(s4, dragonSignature);
prettifyCNF(normalize(f4));

In [None]:
let Clauses = normalize(f1)
    .union(normalize(f2))
    .union(normalize(f3))
    .union(normalize(f4));
    
prettifyCNF(Clauses);


We give names to the clauses in order to be able to refer to them.

In [None]:
const C1: Clause = new RecursiveSet<Literal>(
    createPredFormula('Red', [createFunTerm('sk3', [])])
);

In [None]:
const C2: Clause = new RecursiveSet<Literal>(
    createNotFormula(createPredFormula('Happy', [createFunTerm('sk3', [])]))
);

In [None]:
const C3: Clause = new RecursiveSet<Literal>(
    createNotFormula(createPredFormula('CanFly', [createVarTerm('x')])),
    createPredFormula('Red', [createVarTerm('x')])
);

In [None]:
const C4: Clause = new RecursiveSet<Literal>(
    createPredFormula('Child', [
        createFunTerm('sk2', [createVarTerm('x')]),
        createVarTerm('x')
    ]),
    createNotFormula(createPredFormula('Happy', [createVarTerm('x')]))
);

In [None]:
const C5: Clause = new RecursiveSet<Literal>(
    createPredFormula('Happy', [createVarTerm('x')]),
    createNotFormula(createPredFormula('CanFly', [
        createFunTerm('sk2', [createVarTerm('x')])
    ]))
);

In [None]:
const C6: Clause = new RecursiveSet<Literal>(
    createNotFormula(createPredFormula('Red', [createVarTerm('y')])),
    createNotFormula(createPredFormula('Child', [
        createVarTerm('y'),
        createVarTerm('x')
    ])),
    createPredFormula('Red', [createVarTerm('x')])
);

In [None]:
const C7 = arb(resolve(C1, C6));
C7 

Now we are ready to show that the set consisting of these clauses is inconsistent.

In [None]:
let C8: Clause | undefined;
if (C7) {
    C8 = arb(resolve(C7, C4));
    console.log(C8?.toString());
}

In [None]:
let C9: Clause | undefined;
if (C8) {
    C9 = arb(resolve(C8, C2));
    console.log(C9?.toString());
}

In [None]:
let C10: Clause | undefined;
if (C9) {
    C10 = arb(resolve(C9, C3));
    console.log(C10?.toString());
}

In [None]:
let C11: Clause | undefined;
if (C10) {
    C11 = arb(resolve(C10, C5));
    console.log(C11?.toString());
}

In [None]:
if(C11){
    console.log(arb(resolve(C11, C2)));
}

As we have derived the empty set, we have shown that all <b style="color:red;">communist dragons</b> are happy!

## Factorization

A calculus which only contains the resolution rule is not complete. We also need the factorization rule. If
- $C$ is a clause from first order logic,
- $p(s_1,\cdots,s_n)$ and $p(t_1,\cdots,t_n)$ are atomic formulas,
- the syntactical equation $p(s_1,\cdots,s_n) \doteq p(t_1,\cdots,t_n)$ is solvable and 
$$\mu = \mathtt{mgu}\bigl(p(s_1,\cdots,s_n), p(t_1,\cdots,t_n)\bigr),$$

then both 

$$
\displaystyle \frac{C \cup \bigl\{p(s_1,\cdots,s_n),\, p(t_1,\cdots,t_n)\bigr\}}{C\mu \cup \bigl\{p(s_1,\cdots,s_n)\mu\bigr\}} 
$$ 

and 

$$\displaystyle \frac{C \cup \bigl\{ \neg p(s_1,\cdots,s_n),\, \neg p(t_1,\cdots,t_n)\bigr\}}{C\mu \cup \bigl\{\neg p(s_1,\cdots,s_n)\mu\bigr\}}$$

are applications of the factorization rule.

The function $\texttt{factorize}(C)$ takes a clause $C$ from first order logic and computes all clauses that can be derived from $C$ via factorization.

In [None]:
function factorize(C: Clause): RecursiveSet<Clause> {
    const Result = new RecursiveSet<Clause>();
    const literals = Array.from(C);
    
    for (let i = 0; i < literals.length; i++) {
        for (let j = i + 1; j < literals.length; j++) {
            const mu = unifyLiterals(literals[i], literals[j]);
            
            if (mu !== null) {
                const Cmu = new RecursiveSet<Literal>();
                for (const lit of C) {
                    const applied = apply(lit, mu);
                    if (isLiteral(applied)) Cmu.add(applied);
                }
                Result.add(Cmu);
            }
        }
    }
    return Result;
}

The clauses 
$$C_1 := \forall x: \forall y: P(F(x),y) \vee \forall u: \forall v:P(u, G(v))$$
and
$$C_2 := \forall x: \forall y: \bigl(\neg P(F(x),y)\bigr) \vee \forall u: \forall v: \bigl(\neg P(u, G(v))\bigr)$$
are inconsistent. However, the resolution rule alone is not sufficient to show this.

In [None]:
const factorizeSignature: Signature = {
    functions: new Map([
        ['F', 1],
        ['G', 1]
    ]),
    predicates: new Map([
        ['P', 2]
    ])
};

const C1 = arb(normalize(parse('∀x:∀y:P(F(x),y) ∨ ∀u:∀v:P(u,G(v))', factorizeSignature)));
console.log(C1?.toString());

In [None]:
const C2 = arb(normalize(parse('∀x:∀y:(¬P(F(x),y)) ∨ ∀u:∀v:(¬P(u,G(v)))', factorizeSignature)));
console.log(C2?.toString());

In [None]:
let C3: Clause | undefined;
if (C1) {
    C3 = arb(factorize(C1));
    console.log(C3?.toString());
}

In [None]:
let C4: Clause | undefined;
if (C2) {
    C4 = arb(factorize(C2));
    console.log(C4?.toString());
}

In [None]:
if (C3 && C4) {
    const result = arb(resolve(C3, C4));
    console.log(result?.toString());
}

## Automatic Theorem Proving

The function $\texttt{infere}(\texttt{Clauses})$ returns all possible clauses that result from:
- the resolution of two clauses $C_1, C_2 \in \texttt{Clauses}$,
- the factorization of a clause $C \in \texttt{Clauses}$.

In [None]:
type ReasonMap = Map<string, Clause[]>;

function infere(Clauses: CNF): { newClauses: CNF, reasons: ReasonMap } {
    const newClauses = new RecursiveSet<Clause>();
    const reasons: ReasonMap = new Map();
    const clausesArray = Array.from(Clauses);
    
    for (let i = 0; i < clausesArray.length; i++) {
        for (let j = 0; j < clausesArray.length; j++) {
            if (i === j) continue; 
            const C1 = clausesArray[i];
            const C2 = clausesArray[j];
            const resolvents = resolve(C1, C2);
            
            for (const res of resolvents) {
                newClauses.add(res);
                const key = res.toString();
                if (!reasons.has(key)) {
                    reasons.set(key, [C1, C2]);
                }
            }
        }
    }
    
    for (const C of Clauses) {
        const factors = factorize(C);
        for (const factor of factors) {
            newClauses.add(factor);
            const key = factor.toString();
            if (!reasons.has(key)) {
                reasons.set(key, [C]);
            }
        }
    }
    
    return { newClauses, reasons };
}


In [None]:
function prettyPrint(Clauses: CNF): void {
    console.log(prettifyCNF(Clauses));
}

In [None]:
prettyPrint(Clauses);

The function $\texttt{saturateWithProof}(\texttt{Cs})$ takes a set of clauses $\texttt{Cs}$ as input and tries to infer the empty clause. If it is not possible to infer the empty clause, the function runs until saturation is reached or memory is exhausted.

In [None]:
function saturateWithProof(Cs: CNF): ReasonMap {
    let Clauses = new RecursiveSet<Clause>(...Cs);
    let cnt = 1;
    const Reasons: ReasonMap = new Map(); 
    
    while (true) {
        // Prüfe auf leere Klausel
        for (const C of Clauses) {
            if (C.size === 0) {
                console.log("Empty clause found!");
                return Reasons; 
            }
        }
        
        const result = infere(Clauses); 
        const newClauses = result.newClauses;
        const stepReasons = result.reasons;
        let newAddedCount = 0;
        
        for (const C of newClauses) {
            if (!Clauses.has(C)) {
                Clauses.add(C);
                newAddedCount++;
                const cKey = C.toString();
                const parents = stepReasons.get(cKey);
                if (parents && !Reasons.has(cKey)) {
                    Reasons.set(cKey, parents);
                }
            }
        }
        
        console.log(`cnt = ${cnt}, number of clauses: ${Clauses.size}, new: ${newAddedCount}`);
        cnt++;
        
        if (newAddedCount === 0) {
            console.log("Saturation reached (no empty clause found).");
            return Reasons;
        }
    }
}


In [None]:
console.time("saturate")
const proofReasons = saturateWithProof(Clauses);
console.timeEnd("saturate")

In [None]:
function updateProof(p1: string[], p2: string[]): string[] {
    const res = [...p1];
    for (const line of p2) {
        if (!res.includes(line)) {
            res.push(line);
        }
    }
    return res;
}

Given a dictionary $\texttt{Reasons}$ and a clause $\texttt{clause}$, the function `constructProof` returns a proof of $\texttt{clause}$.

In [None]:
function constructProof(clause: Clause, Reasons: ReasonMap): string[] {
    const clauseKey = clause.toString();
    const clauseStr = clause.toString(); 
    
    if (Reasons.has(clauseKey)) {
        const parents = Reasons.get(clauseKey)!; 
        if (parents.length === 1) {
            const parent = parents[0];
            const parentStr = parent.toString();
            const parentProof = constructProof(parent, Reasons);
            return updateProof(parentProof, [
                `Factorization: ${parentStr}`,
                `             ⊢ ${clauseStr}`
            ]);
        } else if (parents.length === 2) {
            const [p1, p2] = parents;
            const p1Str = p1.toString();
            const p2Str = p2.toString();
            const proof1 = constructProof(p1, Reasons);
            const proof2 = constructProof(p2, Reasons);
            let combined = updateProof(proof1, proof2);
            combined.push(`Resolution: ${p1Str},`);
            combined.push(`            ${p2Str}`);
            combined.push(`          ⊢ ${clauseStr}`);
            return combined;
        }
    }
    return [`Axiom: ${clauseStr}`];
}

In [None]:
const emptyClause = new RecursiveSet<Literal>();
const proofLines = constructProof(emptyClause, proofReasons);
for (const line of proofLines) {
    console.log(line);
}

In [None]:
prettifyCNF(Clauses);