diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
new file mode 100644
index 0000000..99bbe62
--- /dev/null
+++ b/.github/workflows/Typos.yml
@@ -0,0 +1,16 @@
+name: Check Typos
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  run:
+    name: SpellCheck
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: crate-ci/typos@master
diff --git a/_typos.toml b/_typos.toml
new file mode 100644
index 0000000..5f78e9b
--- /dev/null
+++ b/_typos.toml
@@ -0,0 +1,21 @@
+[default]
+extend-ignore-re = [
+  "\\b[0-9A-Za-z+/]{91}(=|==)?\\b",  # base64 strings
+  "[0-9a-fA-F]{7,}",                 # git commit hashes
+  "\\b[0-9A-Za-z+/]{33,}(=|==)?\\b", # SHA/tpub/adresses etc strings
+]
+
+[default.extend-words]
+Tesselation = "Tesselation"
+tesselation = "tesselation"
+Missings = "Missings"
+rcall = "rcall"
+CACH = "CACH"
+
+[type.gitignore]
+extend-glob = [".gitignore"]
+check-file = false
+
+[type.bib]
+extend-glob = ["*.bib"]
+check-file = false
diff --git a/docs/src/binary-classification.jl b/docs/src/binary-classification.jl
index 7e99ebe..1867f39 100644
--- a/docs/src/binary-classification.jl
+++ b/docs/src/binary-classification.jl
@@ -242,7 +242,7 @@ A point on the left means that a lower value for the data is related to higher o
 The subfigure on the right shows the thresholds used by the rules in the cross-validation folds via the vertical lines.
 In the background, the histograms show the data.
 
-For example, for the `nodes`, it can be seen that all rules (fitted in the different cross-validation folds) base their decision on whether the `nodes` are below, roughly, 5. 
+For example, for the `nodes`, it can be seen that all rules (fitted in the different cross-validation folds) base their decision on whether the `nodes` are below, roughly, 5.
 Next, the left side indicates in which direction this effect works.
 More specifically, the individuals who had less than 5 nodes are more likely to survive, according to the model. 
 The sizes of the dots indicate the weight that the rule has, so a bigger dot means that a rule plays a larger role in the final outcome. 
@@ -251,7 +251,7 @@ These dots are sized in such a way that a doubling in weight means a doubling in
 
 # ╔═╡ ab5423cd-c8a9-488e-9bb0-bb41e583c2fa
 md"""
-What this plot shows is that the `nodes` feature is on average chosen as the feature with the most predictive power because the `nodes` feature is shown as the first feature and the tickness of the dots is the biggest.
+What this plot shows is that the `nodes` feature is on average chosen as the feature with the most predictive power because the `nodes` feature is shown as the first feature and the thickness of the dots is the biggest.
 Furthermore, there is agreement on the effect of the `nodes` and `age` features.
 In both cases, a lower number is associated with survival.
 This is as expected because the model essentially implies that people where fewer cancerous axillary nodes are detected and who are younger are more likely to survive.
@@ -281,8 +281,8 @@ With the rule-based model, we can say exactly at which number of `nodes` and at
 md"""
 ## Conclusion
 
-Compared to decision trees, the rule-based classifier is more stable, more accurate and similarly easy to interpet.
-Compared to the random forest, the rule-based classifier is only slightly less accurate, but much easier to interpet.
+Compared to decision trees, the rule-based classifier is more stable, more accurate and similarly easy to interpret.
+Compared to the random forest, the rule-based classifier is only slightly less accurate, but much easier to interpret.
 Due to the interpretability, it is likely easier to verify the model and therefore the rule-based classifier will be more accurate in real-world settings.
 This makes rule-based highly suitable for many machine learning tasks.
 """
diff --git a/src/dependent.jl b/src/dependent.jl
index 0c0a005..ebfa379 100644
--- a/src/dependent.jl
+++ b/src/dependent.jl
@@ -69,7 +69,7 @@ and one zeroes column:
 |  !A &&  B  |   1  |  0 |  1 |  0 |  1  |
 |  !A && !B  |   1  |  0 |  0 |  1 |  0  |
 
-In other words, the matrix represents which rules are implied by each syntetic datapoint
+In other words, the matrix represents which rules are implied by each synthetic datapoint
 (conditions in the rows).
 Next, this can be used to determine which rules are linearly dependent by checking whether
 the rank increases when adding rules.
diff --git a/src/rules.jl b/src/rules.jl
index f19c85a..6f662ee 100644
--- a/src/rules.jl
+++ b/src/rules.jl
@@ -7,7 +7,7 @@ For example, the rule `if X[i, 1] > 3 & X[i, 2] < 4, then ...` contains two subc
 
 A subclause is equivalent to a split in a decision tree.
 In other words, each rule is based on one or more subclauses.
-In pratice, a rule is based on at most two subclauses (has at most two subclauses).
+In pratise, a rule is based on at most two subclauses (has at most two subclauses).
 The reason for this is that rules with more than two subclauses will not end up
 in the final model, as is discussed in the original SIRUS paper.
 
diff --git a/src/weights.jl b/src/weights.jl
index df4cc6b..7c70b01 100644
--- a/src/weights.jl
+++ b/src/weights.jl
@@ -41,7 +41,7 @@ For example, the data for fitting could have the following variables:
 |--------|--------|--------|---------|
 | ...... | ...... | ...... | ....... |
 
-The regression then finds a coeffient for each rule which is based on how much
+The regression then finds a coefficient for each rule which is based on how much
 each rule is associated with the outcome. I don't know why this makes sense,
 but based on benchmarks it does.
 
diff --git a/test/rules.jl b/test/rules.jl
index 7fab116..18416e3 100644
--- a/test/rules.jl
+++ b/test/rules.jl
@@ -82,7 +82,7 @@ generated = map(i -> generate_rules(), 1:10)
 """
 Return whether the score for the model is roughly equal to check whether RNG is used correctly.
 Checking the scores is easier than the raw models since those seem to differ slightly (probably
-due to mutli-threading, which can change the order).
+due to multi-threading, which can change the order).
 """
 function equal_output(stage::Symbol)
     V = getproperty.(generated, stage)