-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Group optimization with auto parameter update
Perform group optimization on Computed Style's RareNonInherited and RareInherited groups. The grouping parameters will update whenever update_css_ranking.py is called. The detail design is discuss in https://docs.google.com/a/google.com/document/d/1VqyZfjWf4b3dlVYmOuNHYQpnlenGjojMAfyRgjCUORA/edit?usp=sharing Gen Diff: https://gist.github.com/nguyen-minh-duc/49faa7c2878e2eafd7d061bb30ea7a35/revisions The previous CL was reverted because it caused a regression on the perfbot. We have changed the parameters to the best parameter tested locally. Bug: 733502 Change-Id: I2aca34caa185b5ffd1defcdb6599bc8788d2819c Reviewed-on: https://chromium-review.googlesource.com/644591 Reviewed-by: dstockwell <dstockwell@chromium.org> Commit-Queue: Minh-Duc Nguyen <nmduc@google.com> Cr-Commit-Position: refs/heads/master@{#499810}
- Loading branch information
1 parent
9b047cd
commit e8df398
Showing
8 changed files
with
1,061 additions
and
807 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import math | ||
import random as rand | ||
|
||
|
||
def l2_pairwise_distance(v1, v2): | ||
"""Euclidean distance from each point in v1 to each point in v2 | ||
Args: | ||
v1: list of point 1 | ||
v2: list of point 2 | ||
Returns: | ||
distance matrix between each point in v1 and v2 | ||
""" | ||
nrow = len(v1) | ||
ncol = len(v2) | ||
dist_mat = [[0 for _ in range(ncol)] for _ in range(nrow)] | ||
for i in range(nrow): | ||
for j in range(ncol): | ||
dist_mat[i][j] = math.sqrt((v1[i] - v2[j]) ** 2) | ||
return dist_mat | ||
|
||
|
||
def calculate_error(k_means_matrix): | ||
"""Calculate the sum of distance from each point to its nearest cluster center | ||
Args: | ||
k_means_matrix: distance matrix of point to cluster center | ||
Returns: | ||
Sum of distance from each point to its nearest cluster center | ||
""" | ||
return sum([min(dist) for dist in k_means_matrix]) | ||
|
||
|
||
def k_means(x_input, n_cluster=3, n_iter=100, n_tries=10): | ||
"""Perform 1-D k-means clustering on a list of numbers x_input | ||
Args: | ||
x_input: list of numbers | ||
n_cluster: number of clusters | ||
n_iter: number of iterations | ||
Returns: | ||
centers: list of n_cluster elements containing the cluster centers | ||
min_dist_idx: list of len(x_input) elements containing the nearest cluster center's id | ||
error_value: sum of all distance from each point to its nearest cluster center | ||
""" | ||
results = [] | ||
for _ in range(n_tries): | ||
error_value = 0 | ||
rand.seed(None) | ||
centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)]) | ||
min_dist_idx = [0] * len(x_input) | ||
i = 0 | ||
while i < n_iter: | ||
failed = False | ||
dist_mat = l2_pairwise_distance(x_input, centers) | ||
error_value = calculate_error(dist_mat) | ||
min_dist_idx = [dist.index(min(dist)) for dist in dist_mat] | ||
centers = [0] * n_cluster | ||
count = [0] * n_cluster | ||
for j in range(len(x_input)): | ||
centers[min_dist_idx[j]] += x_input[j] | ||
count[min_dist_idx[j]] += 1 | ||
|
||
for j in range(n_cluster): | ||
if count[j] == 0: | ||
centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)]) | ||
failed = True | ||
break | ||
|
||
if failed: | ||
i = 0 | ||
continue | ||
|
||
for j in range(n_cluster): | ||
centers[j] = centers[j] / count[j] | ||
i += 1 | ||
|
||
results.append((centers, min_dist_idx, error_value)) | ||
|
||
return min(results, key=lambda x: x[2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
129 changes: 129 additions & 0 deletions
129
third_party/WebKit/Source/build/scripts/update_css_ranking.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# This script is used to update the CSS ranking. The CSS ranking will affect | ||
# the grouping of CSS properties in Computed Style. | ||
# Usage: Run `python update_css_ranking.py` to update the default | ||
# CSS ranking file and API. | ||
# Run `python update_css_ranking.py <ranking_file>` to update | ||
# the ranking to another file with the default ranking API. | ||
# Run `python update_css_ranking.py <ranking_file> <ranking_api_link>` | ||
# to update the ranking from <ranking_api_link> API to <ranking_file> | ||
|
||
import urllib2 | ||
import json | ||
import sys | ||
import cluster | ||
import json5_generator | ||
import math | ||
|
||
|
||
CSS_RANKING_API = "http://www.chromestatus.com/data/csspopularity" | ||
CSS_RANKING_FILE = "../../core/css/CSSPropertiesRanking.json5" | ||
CSS_PROPERTIES = "../../core/css/CSSProperties.json5" | ||
CONFIG_FILE = "../../core/css/CSSGroupConfig.json5" | ||
|
||
|
||
def reformat_properties_name(css_properties): | ||
for i in range(len(css_properties)): | ||
if css_properties[i][:5] == "alias": | ||
css_properties[i] = css_properties[i][5:] | ||
if css_properties[i][:6] == "webkit": | ||
css_properties[i] = "-" + css_properties[i] | ||
|
||
|
||
def update_css_ranking(css_ranking_file, css_ranking_api): | ||
"""Create the CSSPropertiesRanking.json5 for uses in Computed Style grouping | ||
Args: | ||
css_ranking_file: file directory to CSSPropertiesRanking.json5 | ||
css_ranking_api: url to CSS ranking api | ||
""" | ||
css_ranking = json.loads(urllib2.urlopen(css_ranking_api).read()) | ||
css_ranking_content = {"properties": {}, "data": []} | ||
css_ranking_content["data"] = [property_["property_name"] for property_ in | ||
sorted(css_ranking, key=lambda x: -float(x["day_percentage"]))] | ||
|
||
reformat_properties_name(css_ranking_content["data"]) | ||
|
||
with open(css_ranking_file, "w") as fw: | ||
fw.write("// The popularity ranking of all css properties the first properties is the most\n") | ||
fw.write("// used property according to: https://www.chromestatus.com/metrics/css/popularity\n") | ||
json.dump(css_ranking_content, fw, indent=4, sort_keys=False) | ||
|
||
|
||
def find_partition_rule(css_property_set, all_properties, n_cluster, transform=lambda x: x): | ||
"""Find partition rule for a set of CSS property based on its popularity | ||
Args: | ||
css_property_set: list of CSS properties and their popularity of form | ||
[(css_property_name, popularity_score)..] | ||
n_cluster: number of cluster to divide the set into | ||
all_properties: all CSS properties and its score | ||
transform: data transform function to transform the popularity score, | ||
default value is the identity function | ||
Returns: | ||
partition rule for css_property_set | ||
""" | ||
_, cluster_alloc, _ = cluster.k_means([transform(p[1]) for p in css_property_set], n_cluster=n_cluster) | ||
return [all_properties[css_property_set[i][0]] for i in range(len(cluster_alloc) - 1) | ||
if cluster_alloc[i] != cluster_alloc[i + 1]] + [1.0] | ||
|
||
|
||
def produce_partition_rule(config_file, css_ranking_api): | ||
"""Find the partition rule for the groups and print them to config_file | ||
Args: | ||
config_file: the file to write the parameters to | ||
css_ranking_api: url to CSS ranking api | ||
""" | ||
css_ranking = sorted(json.loads(urllib2.urlopen(css_ranking_api).read()), | ||
key=lambda x: -x["day_percentage"]) | ||
total_css_properties = len(css_ranking) | ||
css_ranking_dictionary = dict([(x["property_name"], x["day_percentage"] * 100) for x in css_ranking]) | ||
css_ranking_cdf = dict(zip([x["property_name"] for x in css_ranking], | ||
[float(i) / total_css_properties for i in range(total_css_properties)])) | ||
css_properties = json5_generator.Json5File.load_from_files([CSS_PROPERTIES]).name_dictionaries | ||
|
||
rare_non_inherited_properties = sorted([(x["name"], css_ranking_dictionary[x["name"]]) | ||
for x in css_properties if not x["inherited"] | ||
and x["field_group"] is not None | ||
and "*" in x["field_group"] | ||
and x["name"] in css_ranking_dictionary], | ||
key=lambda x: -x[1]) | ||
rare_inherited_properties = sorted([(x["name"], css_ranking_dictionary[x["name"]]) | ||
for x in css_properties if x["inherited"] | ||
and x["field_group"] is not None | ||
and "*" in x["field_group"] | ||
and x["name"] in css_ranking_dictionary], | ||
key=lambda x: -x[1]) | ||
|
||
rni_properties_rule = find_partition_rule(rare_non_inherited_properties, | ||
css_ranking_cdf, n_cluster=3) | ||
|
||
ri_properties_rule = find_partition_rule(rare_inherited_properties, | ||
css_ranking_cdf, | ||
n_cluster=2, transform=lambda x: math.log(x + 10e-6)) | ||
|
||
with open(config_file, 'w') as fw: | ||
fw.write("// The grouping parameter is a cumulative distribution over the whole set of ranked\n") | ||
fw.write("// CSS properties.\n") | ||
json.dump({ | ||
"parameters": {}, | ||
"data": [{"name": "rare_non_inherited_properties_rule", "cumulative_distribution": rni_properties_rule}, | ||
{"name": "rare_inherited_properties_rule", "cumulative_distribution": ri_properties_rule}] | ||
}, fw, indent=4) | ||
|
||
|
||
if __name__ == '__main__': | ||
assert len(sys.argv) < 4, "Too many parameters" | ||
|
||
if len(sys.argv) == 1: | ||
update_css_ranking(CSS_RANKING_FILE, CSS_RANKING_API) | ||
produce_partition_rule(CONFIG_FILE, CSS_RANKING_API) | ||
elif len(sys.argv) == 2: | ||
update_css_ranking(sys.argv[1], CSS_RANKING_API) | ||
produce_partition_rule(CONFIG_FILE, CSS_RANKING_API) | ||
elif len(sys.argv) == 3: | ||
update_css_ranking(sys.argv[1], sys.argv[2]) | ||
produce_partition_rule(CONFIG_FILE, sys.argv[2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// The grouping parameter is a cumulative distribution over the whole set of ranked | ||
// CSS properties. | ||
{ | ||
"data": [ | ||
{ | ||
"name": "rare_non_inherited_properties_rule", | ||
"cumulative_distribution": [ | ||
0.134, | ||
0.327, | ||
1.0 | ||
] | ||
}, | ||
{ | ||
"name": "rare_inherited_properties_rule", | ||
"cumulative_distribution": [ | ||
0.4, | ||
1.0 | ||
] | ||
} | ||
], | ||
"parameters": {} | ||
} |
Oops, something went wrong.