Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mach: Add `mach clean-cargo-cache` command #16593

Merged
merged 2 commits into from May 8, 2017
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

@@ -30,6 +30,7 @@ matrix:
- $HOME/.ccache
before_cache:
- ./mach clean-nightlies --keep 2 --force
- ./mach clean-cargo-cache --keep 2 --force
env: CCACHE=/usr/bin/ccache
addons:
apt:
@@ -18,6 +18,7 @@
import subprocess
import sys
import urllib2
import glob

from mach.decorators import (
CommandArgument,
@@ -26,7 +27,7 @@
)

import servo.bootstrap as bootstrap
from servo.command_base import CommandBase, BIN_SUFFIX
from servo.command_base import CommandBase, BIN_SUFFIX, cd
from servo.util import delete, download_bytes, download_file, extract, host_triple


@@ -338,3 +339,175 @@ def clean_nightlies(self, force=False, keep=None):
elif not force:
print("Nothing done. "
"Run `./mach clean-nightlies -f` to actually remove.")

@Command('clean-cargo-cache',
description='Clean unused Cargo packages',
category='bootstrap')
@CommandArgument('--force', '-f',
action='store_true',
help='Actually remove stuff')
@CommandArgument('--show-size', '-s',
action='store_true',
help='Show packages size')
@CommandArgument('--keep',
default='1',
help='Keep up to this many most recent dependencies')
@CommandArgument('--custom-path', '-c',
action='store_true',
help='Get Cargo path from CARGO_HOME environment variable')
def clean_cargo_cache(self, force=False, show_size=False, keep=None, custom_path=False):
def get_size(path):
if os.path.isfile(path):
return os.path.getsize(path) / (1024 * 1024.0)
total_size = 0
for dirpath, dirnames, filenames in os.walk(path):

This comment has been minimized.

Copy link
@wafflespeanut

wafflespeanut Apr 30, 2017

Member

I'm not really comfortable doing a os.walk through the cache, without knowing how long it takes for, say, 50 outdated crates? (which, I guess, is a perfectly normal situation for people who build servo very often).

While displaying crate sizes is a nice feedback, walking through checked out repositories (and crates) is probably a bad idea (what if the repository is huge? I'm sure python will suck on that!). So, we should see how long it takes if we don't walk (i.e., don't display size information). Then, depending on the difference, we should either remove that code, or have a flag for disabling that (since we don't want to see the size information all the time).

Also, the back-ported scandir module is worth looking at for walking and getting file info.

This comment has been minimized.

Copy link
@UK992

UK992 Apr 30, 2017

Author Contributor

scandir on Windows needs Visual C++ Compiler for Python 2.7

update:
In my case (to delete 3776.33 MB):
with os.walk: 27s
with scandir: 16s

This comment has been minimized.

Copy link
@wafflespeanut

wafflespeanut May 2, 2017

Member

That's not much of a difference. I don't think having that dependency is worth it. Let's stick to os.walk then. Also, what if we avoid walking entirely? (i.e., don't display sizes?)

This comment has been minimized.

Copy link
@UK992

UK992 May 8, 2017

Author Contributor

6 seconds

This comment has been minimized.

Copy link
@wafflespeanut

wafflespeanut May 8, 2017

Member

Yeah, that's expected. So, it's good to have it as an option. Anyway, feel free to r=me once you've amended the fixup :)

for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size / (1024 * 1024.0)

removing_anything = False
packages = {
'crates': {},
'git': {},
}
import toml
if os.environ.get("CARGO_HOME", "") and custom_path:
cargo_dir = os.environ.get("CARGO_HOME")
else:
cargo_dir = path.join(self.context.topdir, ".cargo")
cargo_file = open(path.join(self.context.topdir, "Cargo.lock"))
content = toml.load(cargo_file)

for package in content.get("package", []):
source = package.get("source", "")
version = package["version"]
if source == u"registry+https://github.com/rust-lang/crates.io-index":
crate_name = "{}-{}".format(package["name"], version)
if not packages["crates"].get(crate_name, False):
packages["crates"][package["name"]] = {
"current": [],
"exist": [],
}
packages["crates"][package["name"]]["current"].append(crate_name)
elif source.startswith("git+"):
name = source.split("#")[0].split("/")[-1].replace(".git", "")
branch = ""
crate_name = "{}-{}".format(package["name"], source.split("#")[1])
crate_branch = name.split("?")
if len(crate_branch) > 1:
branch = crate_branch[1].replace("branch=", "")
name = crate_branch[0]

if not packages["git"].get(name, False):
packages["git"][name] = {
"current": [],
"exist": [],
}
packages["git"][name]["current"].append(source.split("#")[1][:7])
if branch:
packages["git"][name]["current"].append(branch)

crates_dir = path.join(cargo_dir, "registry")
crates_cache_dir = ""
crates_src_dir = ""
if os.path.isdir(path.join(crates_dir, "cache")):
for p in os.listdir(path.join(crates_dir, "cache")):
crates_cache_dir = path.join(crates_dir, "cache", p)
crates_src_dir = path.join(crates_dir, "src", p)

git_dir = path.join(cargo_dir, "git")
git_db_dir = path.join(git_dir, "db")
git_checkout_dir = path.join(git_dir, "checkouts")
git_db_list = filter(lambda f: not f.startswith('.'), os.listdir(git_db_dir))
git_checkout_list = os.listdir(git_checkout_dir)

for d in list(set(git_db_list + git_checkout_list)):
crate_name = d.replace("-{}".format(d.split("-")[-1]), "")
if not packages["git"].get(crate_name, False):
packages["git"][crate_name] = {
"current": [],
"exist": [],
}
if os.path.isdir(path.join(git_checkout_dir, d)):
for d2 in os.listdir(path.join(git_checkout_dir, d)):
dep_path = path.join(git_checkout_dir, d, d2)
if os.path.isdir(dep_path):
packages["git"][crate_name]["exist"].append((path.getmtime(dep_path), d, d2))
elif os.path.isdir(path.join(git_db_dir, d)):
packages["git"][crate_name]["exist"].append(("db", d, ""))

for d in os.listdir(crates_src_dir):
crate_name = re.sub(r"\-\d+(\.\d+){1,3}.+", "", d)
if not packages["crates"].get(crate_name, False):
packages["crates"][crate_name] = {
"current": [],
"exist": [],
}
packages["crates"][crate_name]["exist"].append(d)

total_size = 0
for packages_type in ["git", "crates"]:
sorted_packages = sorted(packages[packages_type])
for crate_name in sorted_packages:
crate_count = 0
existed_crates = packages[packages_type][crate_name]["exist"]
for exist in sorted(existed_crates, reverse=True):
current_crate = packages[packages_type][crate_name]["current"]
size = 0
exist_name = exist
exist_item = exist[2] if packages_type == "git" else exist
if exist_item not in current_crate:
crate_count += 1
removing_anything = True
if int(crate_count) >= int(keep) or not current_crate:

This comment has been minimized.

Copy link
@wafflespeanut

wafflespeanut Apr 30, 2017

Member

I'm curious how tracking the keep count helps in keeping the "N" most recent deps. It may probably work for downloaded crates, but the git repos have their commit hashes instead of versions, which means we're not really doing a great job there, right?

This comment has been minimized.

Copy link
@UK992

UK992 Apr 30, 2017

Author Contributor

for git packages i use the same logic as in find_dep_path_newest, based on modification date.

crate_paths = []
if packages_type == "git":
exist_checkout_path = path.join(git_checkout_dir, exist[1])
exist_db_path = path.join(git_db_dir, exist[1])
exist_name = path.join(exist[1], exist[2])
exist_path = path.join(git_checkout_dir, exist_name)

if exist[0] == "db":
crate_paths.append(exist_db_path)
crate_count += -1
else:
crate_paths.append(exist_path)

# remove crate from checkout if doesn't exist in db directory
if not os.path.isdir(exist_db_path):
crate_count += -1

with cd(path.join(exist_path, ".git", "objects", "pack")):
for pack in glob.glob("*"):
pack_path = path.join(exist_db_path, "objects", "pack", pack)
if os.path.exists(pack_path):
crate_paths.append(pack_path)

if len(os.listdir(exist_checkout_path)) <= 1:
crate_paths.append(exist_checkout_path)
if os.path.isdir(exist_db_path):
crate_paths.append(exist_db_path)
else:
crate_paths.append(path.join(crates_cache_dir, "{}.crate".format(exist)))
crate_paths.append(path.join(crates_src_dir, exist))

size = sum(get_size(p) for p in crate_paths) if show_size else 0
total_size += size
print_msg = (exist_name, " ({}MB)".format(round(size, 2)) if show_size else "", cargo_dir)
if force:
print("Removing `{}`{} package from {}".format(*print_msg))
for crate_path in crate_paths:
if os.path.exists(crate_path):
delete(crate_path)
else:
print("Would remove `{}`{} package from {}".format(*print_msg))

if removing_anything and show_size:
print("\nTotal size of {} MB".format(round(total_size, 2)))

if not removing_anything:
print("Nothing to remove.")
elif not force:
print("\nNothing done. "
"Run `./mach clean-cargo-cache -f` to actually remove.")
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.