-
Notifications
You must be signed in to change notification settings - Fork 647
/
taxi-runner.py
77 lines (63 loc) · 2.78 KB
/
taxi-runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
# pip install git+https://github.com/intel-ai/ibis.git@develop
# pip install braceexpand
# NOTE: expects https://github.com/intel-ai/omniscripts checked out and in PYTHONPATH
import sys
USE_HDK = "--hdk" in sys.argv
# the following import turns on experimental mode in Modin,
# including enabling running things in remote cloud
import modin.experimental.pandas as pd # noqa: F401
from modin.experimental.cloud import create_cluster
from taxi import run_benchmark as run_benchmark
cluster_params = {}
if USE_HDK:
cluster_params["cluster_type"] = "hdk"
test_cluster = create_cluster(
"aws",
"aws_credentials",
cluster_name="rayscale-test",
region="eu-central-1",
zone="eu-central-1b",
image="ami-05f7491af5eef733a",
**cluster_params,
)
with test_cluster:
if USE_HDK:
from modin.experimental.cloud import get_connection
# We should move omniscripts trigger in remote conext
# https://github.com/intel-ai/omniscripts/blob/7d4599bcacf51de876952c658048571d32275ac1/taxi/taxibench_pandas_ibis.py#L482
import modin.experimental.core.execution.native.implementations.hdk_on_native.db_worker
DbWorker = (
get_connection()
.modules["modin.experimental.core.execution.native.implementations.hdk_on_native.db_worker"]
.DbWorker
)
modin.experimental.core.execution.native.implementations.hdk_on_native.db_worker.DbWorker = (
DbWorker
)
# Omniscripts check for files being present when given local file paths,
# so replace "glob" there with a remote one
import utils
utils.glob = get_connection().modules["glob"]
parameters = {
"data_file": "s3://modin-datasets/cloud/taxi/trips_xaa.csv",
"dfiles_num": 1,
"validation": False,
"no_ibis": True,
"no_pandas": False,
"pandas_mode": "Modin_on_hdk" if USE_HDK else "Modin_on_ray",
"ray_tmpdir": "/tmp",
"ray_memory": 1024 * 1024 * 1024,
}
run_benchmark(parameters)