Skip to content

Commit

Permalink
Group optimization with auto parameter update
Browse files Browse the repository at this point in the history
Perform group optimization on Computed Style's RareNonInherited and
RareInherited groups. The grouping parameters will update whenever
update_css_ranking.py is called.
The detail design is discuss in https://docs.google.com/a/google.com/document/d/1VqyZfjWf4b3dlVYmOuNHYQpnlenGjojMAfyRgjCUORA/edit?usp=sharing
Gen Diff: https://gist.github.com/nguyen-minh-duc/49faa7c2878e2eafd7d061bb30ea7a35/revisions

The previous CL was reverted because it caused a regression on the perfbot. 
We have changed the parameters to the best parameter tested locally.

Bug: 733502
Change-Id: I2aca34caa185b5ffd1defcdb6599bc8788d2819c
Reviewed-on: https://chromium-review.googlesource.com/644591
Reviewed-by: dstockwell <dstockwell@chromium.org>
Commit-Queue: Minh-Duc Nguyen <nmduc@google.com>
Cr-Commit-Position: refs/heads/master@{#499810}
  • Loading branch information
nguyen-minh-duc authored and Commit Bot committed Sep 6, 2017
1 parent 9b047cd commit e8df398
Show file tree
Hide file tree
Showing 8 changed files with 1,061 additions and 807 deletions.
83 changes: 83 additions & 0 deletions third_party/WebKit/Source/build/scripts/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import math
import random as rand


def l2_pairwise_distance(v1, v2):
"""Euclidean distance from each point in v1 to each point in v2
Args:
v1: list of point 1
v2: list of point 2
Returns:
distance matrix between each point in v1 and v2
"""
nrow = len(v1)
ncol = len(v2)
dist_mat = [[0 for _ in range(ncol)] for _ in range(nrow)]
for i in range(nrow):
for j in range(ncol):
dist_mat[i][j] = math.sqrt((v1[i] - v2[j]) ** 2)
return dist_mat


def calculate_error(k_means_matrix):
"""Calculate the sum of distance from each point to its nearest cluster center
Args:
k_means_matrix: distance matrix of point to cluster center
Returns:
Sum of distance from each point to its nearest cluster center
"""
return sum([min(dist) for dist in k_means_matrix])


def k_means(x_input, n_cluster=3, n_iter=100, n_tries=10):
"""Perform 1-D k-means clustering on a list of numbers x_input
Args:
x_input: list of numbers
n_cluster: number of clusters
n_iter: number of iterations
Returns:
centers: list of n_cluster elements containing the cluster centers
min_dist_idx: list of len(x_input) elements containing the nearest cluster center's id
error_value: sum of all distance from each point to its nearest cluster center
"""
results = []
for _ in range(n_tries):
error_value = 0
rand.seed(None)
centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
min_dist_idx = [0] * len(x_input)
i = 0
while i < n_iter:
failed = False
dist_mat = l2_pairwise_distance(x_input, centers)
error_value = calculate_error(dist_mat)
min_dist_idx = [dist.index(min(dist)) for dist in dist_mat]
centers = [0] * n_cluster
count = [0] * n_cluster
for j in range(len(x_input)):
centers[min_dist_idx[j]] += x_input[j]
count[min_dist_idx[j]] += 1

for j in range(n_cluster):
if count[j] == 0:
centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
failed = True
break

if failed:
i = 0
continue

for j in range(n_cluster):
centers[j] = centers[j] / count[j]
i += 1

results.append((centers, min_dist_idx, error_value))

return min(results, key=lambda x: x[2])
48 changes: 31 additions & 17 deletions third_party/WebKit/Source/build/scripts/make_computed_style_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,13 +545,18 @@ def _evaluate_rare_non_inherited_group(all_properties, properties_ranking_file,
properties_ranking = _get_properties_ranking(properties_ranking_file, partition_rule)

for property_ in all_properties:
if property_["field_group"] is not None:
if "rare-non-inherited" in property_["field_group"] and property_["name"] in properties_ranking:
property_["field_group"] = "->".join(layers_name[0:properties_ranking[property_["name"]]])
elif "rare-non-inherited" in property_["field_group"] and property_["name"] not in properties_ranking:
group_tree = property_["field_group"].split("->")
group_tree = [layers_name[0]] + group_tree
property_["field_group"] = "->".join(group_tree)
if property_["field_group"] is not None and "*" in property_["field_group"] \
and not property_["inherited"] and property_["name"] in properties_ranking:

assert property_["field_group"] == "*", "The property " + property_["name"] \
+ " will be automatically assigned a group, please put '*' as the field_group"

property_["field_group"] = "->".join(layers_name[0:properties_ranking[property_["name"]]])
elif property_["field_group"] is not None and "*" in property_["field_group"] \
and not property_["inherited"] and property_["name"] not in properties_ranking:
group_tree = property_["field_group"].split("->")[1:]
group_tree = [layers_name[0], layers_name[0] + "-sub"] + group_tree
property_["field_group"] = "->".join(group_tree)


def _evaluate_rare_inherit_group(all_properties, properties_ranking_file,
Expand All @@ -573,13 +578,18 @@ def _evaluate_rare_inherit_group(all_properties, properties_ranking_file,

layers_name = ["rare-inherited-usage-less-than-" + str(int(round(partition_rule[i] * 100))) + "-percent"
for i in range(number_of_layer)]

properties_ranking = _get_properties_ranking(properties_ranking_file, partition_rule)

for property_ in all_properties:
if property_["field_group"] is not None \
and "rare-inherited" in property_["field_group"] \
and property_["name"] in properties_ranking:
property_["field_group"] = "->".join(["rare-inherited"] + layers_name[1:properties_ranking[property_["name"]]])
if property_["field_group"] is not None and "*" in property_["field_group"] \
and property_["inherited"] and property_["name"] in properties_ranking:
property_["field_group"] = "->".join(layers_name[0:properties_ranking[property_["name"]]])
elif property_["field_group"] is not None and "*" in property_["field_group"] \
and property_["inherited"] and property_["name"] not in properties_ranking:
group_tree = property_["field_group"].split("->")[1:]
group_tree = [layers_name[0], layers_name[0] + "-sub"] + group_tree
property_["field_group"] = "->".join(group_tree)


class ComputedStyleBaseWriter(make_style_builder.StyleBuilderWriter):
Expand Down Expand Up @@ -629,12 +639,16 @@ def __init__(self, json5_file_paths):
# Organise fields into a tree structure where the root group
# is ComputedStyleBase.

# [0.134, 0.327, 1.0] is the best RareNonInherited partition parameter
# that was found by experiments
_evaluate_rare_non_inherited_group(all_properties, json5_file_paths[4], 3, [0.134, 0.327, 1.0])
# [0.4, 1.0] is the best RareInherited partition parameter that was
# found by experiments
_evaluate_rare_inherit_group(all_properties, json5_file_paths[4], 2, [0.4, 1.0])
group_parameters = dict([(conf["name"], conf["cumulative_distribution"]) for conf in
json5_generator.Json5File.load_from_files([json5_file_paths[5]]).name_dictionaries])

_evaluate_rare_non_inherited_group(all_properties, json5_file_paths[4],
len(group_parameters["rare_non_inherited_properties_rule"]),
group_parameters["rare_non_inherited_properties_rule"])

_evaluate_rare_inherit_group(all_properties, json5_file_paths[4],
len(group_parameters["rare_inherited_properties_rule"]),
group_parameters["rare_inherited_properties_rule"])
self._root_group = _create_groups(all_properties)
self._diff_functions_map = _create_diff_groups_map(json5_generator.Json5File.load_from_files(
[json5_file_paths[2]]
Expand Down
129 changes: 129 additions & 0 deletions third_party/WebKit/Source/build/scripts/update_css_ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# This script is used to update the CSS ranking. The CSS ranking will affect
# the grouping of CSS properties in Computed Style.
# Usage: Run `python update_css_ranking.py` to update the default
# CSS ranking file and API.
# Run `python update_css_ranking.py <ranking_file>` to update
# the ranking to another file with the default ranking API.
# Run `python update_css_ranking.py <ranking_file> <ranking_api_link>`
# to update the ranking from <ranking_api_link> API to <ranking_file>

import urllib2
import json
import sys
import cluster
import json5_generator
import math


CSS_RANKING_API = "http://www.chromestatus.com/data/csspopularity"
CSS_RANKING_FILE = "../../core/css/CSSPropertiesRanking.json5"
CSS_PROPERTIES = "../../core/css/CSSProperties.json5"
CONFIG_FILE = "../../core/css/CSSGroupConfig.json5"


def reformat_properties_name(css_properties):
for i in range(len(css_properties)):
if css_properties[i][:5] == "alias":
css_properties[i] = css_properties[i][5:]
if css_properties[i][:6] == "webkit":
css_properties[i] = "-" + css_properties[i]


def update_css_ranking(css_ranking_file, css_ranking_api):
"""Create the CSSPropertiesRanking.json5 for uses in Computed Style grouping
Args:
css_ranking_file: file directory to CSSPropertiesRanking.json5
css_ranking_api: url to CSS ranking api
"""
css_ranking = json.loads(urllib2.urlopen(css_ranking_api).read())
css_ranking_content = {"properties": {}, "data": []}
css_ranking_content["data"] = [property_["property_name"] for property_ in
sorted(css_ranking, key=lambda x: -float(x["day_percentage"]))]

reformat_properties_name(css_ranking_content["data"])

with open(css_ranking_file, "w") as fw:
fw.write("// The popularity ranking of all css properties the first properties is the most\n")
fw.write("// used property according to: https://www.chromestatus.com/metrics/css/popularity\n")
json.dump(css_ranking_content, fw, indent=4, sort_keys=False)


def find_partition_rule(css_property_set, all_properties, n_cluster, transform=lambda x: x):
"""Find partition rule for a set of CSS property based on its popularity
Args:
css_property_set: list of CSS properties and their popularity of form
[(css_property_name, popularity_score)..]
n_cluster: number of cluster to divide the set into
all_properties: all CSS properties and its score
transform: data transform function to transform the popularity score,
default value is the identity function
Returns:
partition rule for css_property_set
"""
_, cluster_alloc, _ = cluster.k_means([transform(p[1]) for p in css_property_set], n_cluster=n_cluster)
return [all_properties[css_property_set[i][0]] for i in range(len(cluster_alloc) - 1)
if cluster_alloc[i] != cluster_alloc[i + 1]] + [1.0]


def produce_partition_rule(config_file, css_ranking_api):
"""Find the partition rule for the groups and print them to config_file
Args:
config_file: the file to write the parameters to
css_ranking_api: url to CSS ranking api
"""
css_ranking = sorted(json.loads(urllib2.urlopen(css_ranking_api).read()),
key=lambda x: -x["day_percentage"])
total_css_properties = len(css_ranking)
css_ranking_dictionary = dict([(x["property_name"], x["day_percentage"] * 100) for x in css_ranking])
css_ranking_cdf = dict(zip([x["property_name"] for x in css_ranking],
[float(i) / total_css_properties for i in range(total_css_properties)]))
css_properties = json5_generator.Json5File.load_from_files([CSS_PROPERTIES]).name_dictionaries

rare_non_inherited_properties = sorted([(x["name"], css_ranking_dictionary[x["name"]])
for x in css_properties if not x["inherited"]
and x["field_group"] is not None
and "*" in x["field_group"]
and x["name"] in css_ranking_dictionary],
key=lambda x: -x[1])
rare_inherited_properties = sorted([(x["name"], css_ranking_dictionary[x["name"]])
for x in css_properties if x["inherited"]
and x["field_group"] is not None
and "*" in x["field_group"]
and x["name"] in css_ranking_dictionary],
key=lambda x: -x[1])

rni_properties_rule = find_partition_rule(rare_non_inherited_properties,
css_ranking_cdf, n_cluster=3)

ri_properties_rule = find_partition_rule(rare_inherited_properties,
css_ranking_cdf,
n_cluster=2, transform=lambda x: math.log(x + 10e-6))

with open(config_file, 'w') as fw:
fw.write("// The grouping parameter is a cumulative distribution over the whole set of ranked\n")
fw.write("// CSS properties.\n")
json.dump({
"parameters": {},
"data": [{"name": "rare_non_inherited_properties_rule", "cumulative_distribution": rni_properties_rule},
{"name": "rare_inherited_properties_rule", "cumulative_distribution": ri_properties_rule}]
}, fw, indent=4)


if __name__ == '__main__':
assert len(sys.argv) < 4, "Too many parameters"

if len(sys.argv) == 1:
update_css_ranking(CSS_RANKING_FILE, CSS_RANKING_API)
produce_partition_rule(CONFIG_FILE, CSS_RANKING_API)
elif len(sys.argv) == 2:
update_css_ranking(sys.argv[1], CSS_RANKING_API)
produce_partition_rule(CONFIG_FILE, CSS_RANKING_API)
elif len(sys.argv) == 3:
update_css_ranking(sys.argv[1], sys.argv[2])
produce_partition_rule(CONFIG_FILE, sys.argv[2])
1 change: 1 addition & 0 deletions third_party/WebKit/Source/core/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ css_properties("make_core_generated_computed_style_base") {
"css/ComputedStyleDiffFunctions.json5",
"css/CSSValueKeywords.json5",
"css/CSSPropertiesRanking.json5",
"css/CSSGroupConfig.json5",
]
other_inputs = [
"../build/scripts/templates/fields/field.tmpl",
Expand Down
22 changes: 22 additions & 0 deletions third_party/WebKit/Source/core/css/CSSGroupConfig.json5
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// The grouping parameter is a cumulative distribution over the whole set of ranked
// CSS properties.
{
"data": [
{
"name": "rare_non_inherited_properties_rule",
"cumulative_distribution": [
0.134,
0.327,
1.0
]
},
{
"name": "rare_inherited_properties_rule",
"cumulative_distribution": [
0.4,
1.0
]
}
],
"parameters": {}
}
Loading

0 comments on commit e8df398

Please sign in to comment.