diff --git a/kingfisher_scrapy/exceptions.py b/kingfisher_scrapy/exceptions.py
index f1f0e191..080eded2 100644
--- a/kingfisher_scrapy/exceptions.py
+++ b/kingfisher_scrapy/exceptions.py
@@ -10,9 +10,5 @@ class SpiderArgumentError(KingfisherScrapyError):
     """Raised when a spider argument's value is invalid"""
 
 
-class MissingRequiredFieldError(KingfisherScrapyError, KeyError):
-    """Raised when an item is missing a required field"""
-
-
 class MissingNextLinkError(KingfisherScrapyError):
     """Raised when a next link is not found on the first page of results"""
diff --git a/kingfisher_scrapy/item_schema/File.json b/kingfisher_scrapy/item_schema/File.json
new file mode 100644
index 00000000..c726798e
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/File.json
@@ -0,0 +1,20 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherFileItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "post_to_api": {
+      "type": "boolean"
+    },
+    "path": {
+      "type": "string"
+    },
+    "files_store": {
+      "type": "string"
+    }
+  }
+}
diff --git a/kingfisher_scrapy/item_schema/FileError.json b/kingfisher_scrapy/item_schema/FileError.json
new file mode 100644
index 00000000..8f1b935d
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/FileError.json
@@ -0,0 +1,18 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "errors": {
+      "type": "string",
+      "minLength": 1
+    }
+  },
+  "required": [
+    "errors"
+  ]
+}
diff --git a/kingfisher_scrapy/item_schema/FileItem.json b/kingfisher_scrapy/item_schema/FileItem.json
new file mode 100644
index 00000000..3e49413f
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/FileItem.json
@@ -0,0 +1,18 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherFileItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "number": {
+      "type": "integer",
+      "minimum": 1
+    }
+  },
+  "required": [
+    "number"
+  ]
+}
diff --git a/kingfisher_scrapy/item_schema/item.json b/kingfisher_scrapy/item_schema/item.json
new file mode 100644
index 00000000..9d96c7f1
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/item.json
@@ -0,0 +1,63 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "KingfisherItem": {
+      "type": "object",
+      "properties": {
+        "file_name": {
+          "type": "string",
+          "pattern": "^[^/]+$"
+        },
+        "url": {
+          "type": "string",
+          "format": "uri"
+        }
+      },
+      "required": [
+        "file_name",
+        "url"
+      ]
+    },
+    "KingfisherFileItem": {
+      "allOf": [
+        {
+          "$ref": "#/definitions/KingfisherItem"
+        }
+      ],
+      "type": "object",
+      "properties": {
+        "data_type": {
+          "type": "string",
+          "enum": [
+            "record",
+            "release",
+            "record_list",
+            "release_list",
+            "compiled_release",
+            "record_package",
+            "release_package",
+            "record_package_list",
+            "release_package_list",
+            "record_package_list_in_results",
+            "release_package_list_in_results",
+            "release_package_json_lines",
+            "record_package_json_lines",
+            "release_package_in_ocdsReleasePackage_in_list_in_results",
+            "release_in_Release"
+          ]
+        },
+        "encoding": {
+          "type": "string"
+        },
+        "data": {
+          "type": "string",
+          "minLength": 1
+        }
+      },
+      "required": [
+        "data",
+        "data_type"
+      ]
+    }
+  }
+}
diff --git a/kingfisher_scrapy/items.py b/kingfisher_scrapy/items.py
index a05d5c2b..7ce0e95e 100644
--- a/kingfisher_scrapy/items.py
+++ b/kingfisher_scrapy/items.py
@@ -1,23 +1,12 @@
 # https://docs.scrapy.org/en/latest/topics/items.html
-import scrapy
 
-from kingfisher_scrapy.exceptions import MissingRequiredFieldError
+import scrapy
 
 
 class KingfisherItem(scrapy.Item):
     file_name = scrapy.Field()
     url = scrapy.Field()
-
-    def validate(self):
-        """
-        Raises an error if any required field is missing.
-
-        :raises kingfisher_scrapy.extensions.MissingRequiredFieldError: if any required field is missing
-        """
-        if hasattr(self, 'required'):
-            for field in self.required:
-                if field not in self:
-                    raise MissingRequiredFieldError(field)
+    validate = True
 
 
 class File(KingfisherItem):
@@ -32,13 +21,6 @@ class File(KingfisherItem):
     path = scrapy.Field()
     files_store = scrapy.Field()
 
-    required = [
-        'file_name',
-        'url',
-        'data',
-        'data_type',
-    ]
-
 
 class FileItem(KingfisherItem):
     number = scrapy.Field()
@@ -46,20 +28,6 @@ class FileItem(KingfisherItem):
     data_type = scrapy.Field()
     encoding = scrapy.Field()
 
-    required = [
-        'number',
-        'file_name',
-        'url',
-        'data',
-        'data_type',
-    ]
-
 
 class FileError(KingfisherItem):
     errors = scrapy.Field()
-
-    required = [
-        'file_name',
-        'url',
-        'errors',
-    ]
diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py
index 72255757..68172e69 100644
--- a/kingfisher_scrapy/pipelines.py
+++ b/kingfisher_scrapy/pipelines.py
@@ -1,18 +1,31 @@
 # https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # https://docs.scrapy.org/en/latest/topics/signals.html#item-signals
+
+import os
+import pathlib
+
+import jsonref as jsonref
+from jsonschema import FormatChecker
+from jsonschema.validators import Draft4Validator
+
 from kingfisher_scrapy.items import File, FileItem
 
 
 class Validate:
     def __init__(self):
+        self.validators = {}
         self.files = set()
         self.file_items = set()
+        schema_path = pathlib.Path(os.path.dirname(os.path.abspath(__file__)), 'item_schema')
+        for item in ('File', 'FileError', 'FileItem'):
+            filename = os.path.join(schema_path, f'{item}.json')
+            with open(filename) as f:
+                schema = jsonref.load(f, base_uri=schema_path.as_uri() + '/')
+            self.validators[item] = Draft4Validator(schema, format_checker=FormatChecker())
 
     def process_item(self, item, spider):
         if hasattr(item, 'validate'):
-            # We call this in the item pipeline to guarantee that all items are validated. However, its backtrace isn't
-            # as helpful for debugging, so we could also call it in ``BaseSpider`` if this becomes an issue.
-            item.validate()
+            self.validators.get(item.__class__.__name__).validate(dict(item))
 
         if isinstance(item, FileItem):
             key = (item['file_name'], item['number'])
diff --git a/requirements.in b/requirements.in
index cb4eca0d..41899567 100644
--- a/requirements.in
+++ b/requirements.in
@@ -3,8 +3,11 @@
 
 ijson>=3
 jsonpointer
+jsonref
+jsonschema
 rarfile
 requests
+rfc3987
 Scrapy
 scrapyd-client
 sentry-sdk
diff --git a/requirements.txt b/requirements.txt
index 0bea0b9d..65ebc155 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile
 #
-attrs==19.3.0             # via automat, service-identity, twisted
+attrs==19.3.0             # via automat, jsonschema, service-identity, twisted
 automat==0.8.0            # via twisted
 certifi==2019.11.28       # via requests, sentry-sdk
 cffi==1.13.2              # via cryptography
@@ -15,8 +15,11 @@ cssselect==1.1.0          # via parsel, scrapy
 hyperlink==19.0.0         # via twisted
 idna==2.8                 # via hyperlink, requests
 ijson==3.0.3
+importlib-metadata==1.6.1  # via jsonschema
 incremental==17.5.0       # via twisted
 jsonpointer==2.0
+jsonref==0.2
+jsonschema==3.2.0
 lxml==4.4.2               # via parsel, scrapy
 parsel==1.5.2             # via scrapy
 protego==0.1.16           # via scrapy
@@ -26,17 +29,20 @@ pycparser==2.19           # via cffi
 pydispatcher==2.0.5       # via scrapy
 pyhamcrest==1.9.0         # via twisted
 pyopenssl==19.1.0         # via scrapy
+pyrsistent==0.16.0        # via jsonschema
 queuelib==1.5.0           # via scrapy
 rarfile==3.1
 requests==2.22.0
+rfc3987==1.3.8
 scrapy==1.8.0
 scrapyd-client==1.1.0
 sentry-sdk==0.14.4
 service-identity==18.1.0  # via scrapy
-six==1.13.0               # via automat, cryptography, parsel, protego, pyhamcrest, pyopenssl, scrapy, scrapyd-client, w3lib
+six==1.13.0               # via automat, cryptography, jsonschema, parsel, protego, pyhamcrest, pyopenssl, pyrsistent, scrapy, scrapyd-client, w3lib
 twisted==20.3.0           # via scrapy
 urllib3==1.25.7           # via requests, sentry-sdk
 w3lib==1.21.0             # via parsel, scrapy
+zipp==3.1.0               # via importlib-metadata
 zope.interface==4.7.1     # via scrapy, twisted
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 9f58a35a..e010b745 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -21,13 +21,15 @@ flake8==3.7.9
 hyperlink==19.0.0
 idna==2.8
 ijson==3.0.3
-importlib-metadata==1.3.0  # via pluggy, pytest
+importlib-metadata==1.6.1
 incremental==17.5.0
 isort==4.3.21
 jsonpointer==2.0
+jsonref==0.2
+jsonschema==3.2.0
 lxml==4.4.2
 mccabe==0.6.1             # via flake8
-more-itertools==8.0.2     # via pytest, zipp
+more-itertools==8.0.2     # via pytest
 packaging==19.2           # via pytest
 parsel==1.5.2
 pip-tools==5.1.0
@@ -43,11 +45,13 @@ pyflakes==2.1.1           # via flake8
 pyhamcrest==1.9.0
 pyopenssl==19.1.0
 pyparsing==2.4.5          # via packaging
+pyrsistent==0.16.0
 pytest-cov==2.8.1
 pytest==5.3.2
 queuelib==1.5.0
 rarfile==3.1
 requests==2.22.0
+rfc3987==1.3.8
 scrapy==1.8.0
 scrapyd-client==1.1.0
 sentry-sdk==0.14.4
@@ -57,7 +61,7 @@ twisted==20.3.0
 urllib3==1.25.7
 w3lib==1.21.0
 wcwidth==0.1.7            # via pytest
-zipp==0.6.0               # via importlib-metadata
+zipp==3.1.0
 zope.interface==4.7.1
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/tests/test_validate.py b/tests/test_validate.py
index a89fa4ba..9dcbca08 100644
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -1,7 +1,7 @@
 import pytest
+from jsonschema import ValidationError
 
-from kingfisher_scrapy.exceptions import MissingRequiredFieldError
-from kingfisher_scrapy.items import File, FileItem
+from kingfisher_scrapy.items import File, FileError, FileItem
 from kingfisher_scrapy.pipelines import Validate
 from tests import spider_with_crawler
 
@@ -9,10 +9,10 @@
 def test_process_item():
     pipeline = Validate()
     item = File({
-        'file_name': '',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
     })
 
     assert pipeline.process_item(item, None) == item
@@ -20,9 +20,69 @@ def test_process_item():
 
 def test_process_item_error():
     pipeline = Validate()
-    item = File()
+    item = File({
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+    })
+
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['file_name'] = 'test'
+    item['data_type'] = 'not a valid data type'
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+
+
+def test_process_file_item():
+    pipeline = Validate()
+    item = FileItem({
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+        'number': 1
+    })
+    assert pipeline.process_item(item, None) == item
+
+
+def test_process_file_item_error():
+    pipeline = Validate()
+    item = FileItem({
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+        'number': "2"
+    })
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['number'] = None
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+
+
+def test_process_file_error():
+    pipeline = Validate()
+    item = FileError({
+        'file_name': 'test',
+        'url': 'http://test.com',
+        'errors': 'Error'
+    })
+    assert pipeline.process_item(item, None) == item
+
 
-    with pytest.raises(MissingRequiredFieldError):
+def test_process_file_item_error_error():
+    pipeline = Validate()
+    item = FileError({
+        'file_name': 'test',
+        'url': 'http://test.com'
+    })
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['errors'] = 'Error'
+    item['url'] = 'not an url'
+    with pytest.raises(ValidationError):
         pipeline.process_item(item, None)
 
 
@@ -31,9 +91,9 @@ def test_duplicate_file(caplog):
     spider = spider_with_crawler()
     item = File({
         'file_name': 'test1',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://example.com',
     })
 
     pipeline.process_item(item, spider)
@@ -51,9 +111,9 @@ def test_duplicate_file_item(caplog):
     spider = spider_with_crawler()
     item = FileItem({
         'file_name': 'test1',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://example.com',
         'number': 1
     })