Skip to content

Commit

Permalink
ran monkeytype on entire scrapy codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
slix committed Jan 7, 2022
1 parent 2b30e0b commit 2f22b84
Show file tree
Hide file tree
Showing 240 changed files with 7,204 additions and 704 deletions.
21 changes: 21 additions & 0 deletions docker-for-typing/generate-with-monkeytype.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.6

WORKDIR /usr/src/app

RUN wget -O repo.zip https://github.com/slix/scrapy/archive/2b30e0b89d0de2b735ae4040d309de8fa606bded.zip && unzip repo.zip && rm repo.zip && mv scrapy-2b30e0b89d0de2b735ae4040d309de8fa606bded/ scrapy-repo/

RUN pip install --no-cache-dir setuptools==59.6.0 pip==21.3.1 wheel==0.37.1
COPY ./requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir -e scrapy-repo/

WORKDIR scrapy-repo/

ENV MONKEYTYPE_TRACE_MODULES=scrapy,parsel,w3lib,itemadapter,itemloaders,cssselect,queuelib
RUN monkeytype run -m pytest -- --durations=10 docs scrapy tests --reactor=default \
&& monkeytype run -m pytest -- --durations=10 docs scrapy tests --reactor=asyncio \
&& xz -v -9 monkeytype.sqlite3

COPY ./run-on-all-files.py ./run-on-all-files.py
RUN python run-on-all-files.py
RUN unxz monkeytype.sqlite3.xz --keep && python run-on-all-files.py && rm monkeytype.sqlite3
2 changes: 2 additions & 0 deletions docker-for-typing/monkeytype.sqlite3.sha256sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
4df8ac1b158391d5fb0eb1a7655a0d3511b81ddd8f3fb06c8ff9446f2f8ca199 *monkeytype.sqlite3
867e239786958b34f101f17d55298cc3051f0404c475a0564640c5aa533a317a *monkeytype.sqlite3.xz
124 changes: 124 additions & 0 deletions docker-for-typing/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
asn1crypto==1.4.0
attrs==21.4.0
Automat==20.2.0
backcall==0.2.0
backports.cached-property==1.0.1
blessings==1.7
blinker==1.4
boto==2.49.0
botocore==1.23.26
bpython==0.22.1
brotlipy==0.7.0
cachetools==4.2.4
certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.9
click==6.7
constantly==15.1.0
coverage==6.2
cryptography==2.3.1
cssselect==1.1.0
curtsies==0.3.10
cwcwidth==0.1.5
dataclasses==0.8
decorator==5.1.0
execnet==1.9.0
google-api-core==2.3.2
google-auth==2.3.3
google-cloud-core==2.2.1
google-cloud-storage==1.43.0
google-crc32c==1.3.0
google-resumable-media==2.1.0
googleapis-common-protos==1.54.0
greenlet==1.1.2
h11==0.7.0
h2==3.2.0
hpack==3.0.0
hyperframe==5.2.0
hyperlink==21.0.0
idna==3.3
importlib-metadata==4.8.3
incremental==21.3.0
iniconfig==1.1.1
ipython==7.16.2
ipython-genutils==0.2.0
itemadapter==0.4.0
itemloaders==1.0.4
jedi==0.17.2
jmespath==0.10.0
kaitaistruct==0.8
ldap3==2.5.2
libcst==0.3.23
lxml==4.7.1
lxml-stubs==0.2.0
mitmproxy==4.0.4
MonkeyType==21.5.0
mypy==0.921
mypy-extensions==0.4.3
packaging==21.3
parsel==1.6.0
parso==0.7.1
passlib==1.7.4
pexpect==4.8.0
pickleshare==0.7.5
Pillow==8.4.0
pip==21.3.1
pluggy==1.0.0
priority==1.3.0
prompt-toolkit==3.0.24
Protego==0.1.16
protobuf==3.19.1
ptyprocess==0.7.0
py==1.11.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.21
PyDispatcher==2.0.5
pyftpdlib==1.5.6
Pygments==2.10.0
pyOpenSSL==18.0.0
pyparsing==2.2.2
pyperclip==1.6.5
pytest==6.2.5
pytest-cov==3.0.0
pytest-forked==1.4.0
pytest-xdist==2.5.0
python-dateutil==2.8.2
pyxdg==0.27
PyYAML==6.0
queuelib==1.6.2
reppy==0.4.14
requests==2.26.0
robotexclusionrulesparser==1.7.1
rsa==4.8
ruamel.yaml==0.15.100
service-identity==21.1.0
setuptools==59.6.0
six==1.16.0
sortedcontainers==2.0.5
sybil==3.0.0
testfixtures==6.18.3
toml==0.10.2
tomli==1.2.3
tornado==5.1.1
traitlets==4.3.3
Twisted==21.7.0
typed-ast==1.5.1
types-cryptography==3.3.10
types-enum34==1.1.1
types-ipaddress==1.0.1
types-pyOpenSSL==20.0.3
types-setuptools==57.0.0
typing==3.7.4.3
typing-inspect==0.7.1
typing_extensions==4.0.1
urllib3==1.26.7
urwid==2.0.1
uvloop==0.14.0
w3lib==1.22.0
wcwidth==0.2.5
wheel==0.37.1
wsproto==0.11.0
zipp==3.6.0
zope.interface==5.4.0
zstandard==0.16.0
97 changes: 97 additions & 0 deletions docker-for-typing/run-on-all-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import os
import shutil
import subprocess
from subprocess import PIPE, CompletedProcess
from pathlib import Path

from typing import Dict, Callable


def path_for_module(module: str) -> Path:
prefix = './' + module.replace('.', '/')
path1 = Path(prefix + '/__init__.py')
path2 = Path(prefix + '.py')

if path1.is_file() and path2.is_file():
raise Exception(f'Ambiguous module: {module}')
elif not path1.is_file() and not path2.is_file():
raise Exception(f'No file for module found: {module}')
elif path1.is_file():
return path1
else:
return path2


def workaround_for_monkeytype_flakes(run: Callable[[], CompletedProcess]) -> CompletedProcess:
result: CompletedProcess
for _ in range(30):
result = run()
if result.returncode == 0:
return result

if b"AttributeError: '_Union' object has no attribute '__name__'" not in result.stderr:
return result

# Try again to deflake

# After this many attempts, the flake must be permanent.
return result


if __name__ == '__main__':
all_modules = os.popen("monkeytype list-modules").read().split()
target_modules = (m for m in all_modules if (m == 'scrapy' or m.startswith('scrapy.')))

replace_later: Dict[Path, Path] = {}

for module in target_modules:
path = path_for_module(module)

diffed = workaround_for_monkeytype_flakes(lambda: subprocess.run(f"monkeytype --disable-type-rewriting -v -l 99999999999999999 stub --diff {module}", stdout=PIPE, stderr=PIPE, shell=True))
if diffed.returncode == 0:
diffed_output = diffed.stdout
decoded = diffed_output.decode('utf-8')
is_only_whitespace = (len(decoded) == 0 or decoded.isspace())
if not is_only_whitespace:
# .pyi.diff
path.with_name(path.name + "i.diff").write_bytes(diffed_output)

orig_path = path.with_name(path.name + '.orig')
shutil.copyfile(path, orig_path)

applied = workaround_for_monkeytype_flakes(lambda: subprocess.run(f"monkeytype --disable-type-rewriting -v -l 99999999999999999 apply {module}",
stdout=PIPE, stderr=PIPE, shell=True))
if applied.returncode == 0:
if len(applied.stderr) != 0:
path.with_name(path.name + '.apply-success-warnings.txt').write_bytes(applied.stderr)

# apply breaks Python code. But future apply invocations need all the project code to compile.
applied_path = path.with_name(path.name + '.applied')
path.rename(applied_path)
orig_path.rename(path)
del orig_path

replace_later[path] = applied_path
continue
else:
path.with_name(path.name + '.apply-error.txt').write_bytes(applied.stderr)

orig_path.replace(path)
del orig_path

# apply failed, but a stub is likely to work.
stub_path = path.with_name(path.name + 'i')
assert stub_path.name.endswith('.pyi')

stubbed = workaround_for_monkeytype_flakes(lambda: subprocess.run(f"monkeytype --disable-type-rewriting -v -l 99999999999999999 stub {module}",
stdout=PIPE, stderr=PIPE, shell=True))
if stubbed.returncode == 0:
stub_path.write_bytes(stubbed.stdout)

if len(stubbed.stderr) != 0:
stub_path.with_name(stub_path.name + '.stub-success-warnings.txt').write_bytes(stubbed.stderr)
else:
stub_path.with_name(stub_path.name + '.stub-error.txt').write_bytes(stubbed.stderr)

for dest, source in replace_later.items():
source.replace(dest)
10 changes: 5 additions & 5 deletions scrapy/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Base class for Scrapy commands
"""
import os
from optparse import OptionGroup
from typing import Any, Dict
from optparse import OptionParser, Values, OptionGroup
from typing import List, Any, Dict

from twisted.python import failure

Expand All @@ -21,7 +21,7 @@ class ScrapyCommand:

exitcode = 0

def __init__(self):
def __init__(self) -> None:
self.settings = None # set in scrapy.cmdline

def set_crawler(self, crawler):
Expand Down Expand Up @@ -55,7 +55,7 @@ def help(self):
"""
return self.long_desc()

def add_options(self, parser):
def add_options(self, parser: OptionParser) -> None:
"""
Populate option parse with options available for this command
"""
Expand All @@ -76,7 +76,7 @@ def add_options(self, parser):

parser.add_option_group(group)

def process_options(self, args, opts):
def process_options(self, args: List[str], opts: Values) -> None:
try:
self.settings.setdict(arglist_to_dict(opts.set),
priority='cmdline')
Expand Down
26 changes: 15 additions & 11 deletions scrapy/contracts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,28 @@
import sys
from functools import wraps
from inspect import getmembers
from typing import Dict
from typing import Any, Callable, List, Optional, Type, Union, Dict
from unittest import TestCase

from scrapy.http import Request
from scrapy.utils.python import get_spec
from scrapy.utils.spider import iterate_spider_output
from scrapy.contracts.default import CallbackKeywordArgumentsContract, ReturnsContract, ScrapesContract, UrlContract
from scrapy.http.request import Request
from tests.test_contracts import CustomContractFailSpider, CustomContractSuccessSpider, CustomFailContract, CustomFormContract, CustomSuccessContract, InheritsTestSpider
from unittest.runner import TextTestResult


class Contract:
""" Abstract class for contracts """
request_cls = None

def __init__(self, method, *args):
def __init__(self, method: Callable, *args) -> None:
self.testcase_pre = _create_testcase(method, f'@{self.name} pre-hook')
self.testcase_post = _create_testcase(method, f'@{self.name} post-hook')
self.args = args

def add_pre_hook(self, request, results):
def add_pre_hook(self, request: Request, results: TextTestResult) -> Request:
if hasattr(self, 'pre_process'):
cb = request.callback

Expand All @@ -42,7 +46,7 @@ def wrapper(response, **cb_kwargs):

return request

def add_post_hook(self, request, results):
def add_post_hook(self, request: Request, results: TextTestResult) -> Request:
if hasattr(self, 'post_process'):
cb = request.callback

Expand All @@ -66,18 +70,18 @@ def wrapper(response, **cb_kwargs):

return request

def adjust_request_args(self, args):
def adjust_request_args(self, args: Union[Dict[str, Optional[Union[Callable, str, int, Dict[str, str]]]], Dict[str, Optional[Union[Callable, str, int]]]]) -> Union[Dict[str, Optional[Union[Callable, str, int, Dict[str, str]]]], Dict[str, Optional[Union[Callable, str, int]]]]:
return args


class ContractsManager:
contracts: Dict[str, Contract] = {}

def __init__(self, contracts):
def __init__(self, contracts: List[Union[Type[UrlContract], Type[CallbackKeywordArgumentsContract], Type[ReturnsContract], Type[ScrapesContract], Type[CustomFormContract], Type[CustomSuccessContract], Type[CustomFailContract]]]) -> None:
for contract in contracts:
self.contracts[contract.name] = contract

def tested_methods_from_spidercls(self, spidercls):
def tested_methods_from_spidercls(self, spidercls: Union[Type[CustomContractSuccessSpider], Type[CustomContractFailSpider], Type[InheritsTestSpider]]) -> List[str]:
is_method = re.compile(r"^\s*@", re.MULTILINE).search
methods = []
for key, value in getmembers(spidercls):
Expand All @@ -86,7 +90,7 @@ def tested_methods_from_spidercls(self, spidercls):

return methods

def extract_contracts(self, method):
def extract_contracts(self, method: Callable) -> List[Union[ReturnsContract, UrlContract, CustomSuccessContract, CustomFailContract, CustomFormContract, CallbackKeywordArgumentsContract, ScrapesContract]]:
contracts = []
for line in method.__doc__.split('\n'):
line = line.strip()
Expand All @@ -99,7 +103,7 @@ def extract_contracts(self, method):

return contracts

def from_spider(self, spider, results):
def from_spider(self, spider: Union[CustomContractSuccessSpider, CustomContractFailSpider, InheritsTestSpider], results: TextTestResult) -> List[Optional[Union[Request, Any]]]:
requests = []
for method in self.tested_methods_from_spidercls(type(spider)):
bound_method = spider.__getattribute__(method)
Expand All @@ -111,7 +115,7 @@ def from_spider(self, spider, results):

return requests

def from_method(self, method, results):
def from_method(self, method: Callable, results: TextTestResult) -> Optional[Request]:
contracts = self.extract_contracts(method)
if contracts:
request_cls = Request
Expand Down Expand Up @@ -145,7 +149,7 @@ def from_method(self, method, results):
self._clean_req(request, method, results)
return request

def _clean_req(self, request, method, results):
def _clean_req(self, request: Request, method: Callable, results: TextTestResult) -> None:
""" stop the request from returning objects and records any errors """

cb = request.callback
Expand Down
3 changes: 3 additions & 0 deletions scrapy/contracts/__init__.py.apply-success-warnings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
WARNING: Failed decoding trace: Module 'tests.test_contracts' has no attribute 'ContractsManagerTest.test_same_url.<locals>'
WARNING: Failed decoding trace: Module 'tests.test_contracts' has no attribute 'ContractsManagerTest.test_same_url.<locals>'
WARNING: Failed decoding trace: Module 'scrapy.contracts' has no attribute '_create_testcase.<locals>'

0 comments on commit 2f22b84

Please sign in to comment.