Skip to content

Commit

Permalink
Remove spark dependency (IntelPython#102)
Browse files Browse the repository at this point in the history
Remove spark dependency from HPA; use pre-generated sdf_dt.pq
  • Loading branch information
Vyacheslav-Smirnov committed Aug 7, 2019
1 parent 9313df0 commit 9e77fde
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 11 deletions.
1 change: 0 additions & 1 deletion buildscripts/hpat-conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ requirements:
test:
requires:
- h5py
- pyspark
- scipy
imports:
- hpat
Expand Down
20 changes: 10 additions & 10 deletions hpat/tests/gen_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ def gen_pq_test(cls):


def generate_spark_data():
from pyspark.sql import SparkSession
from pyspark.sql.types import (
StructType, StructField, DateType, TimestampType)

# test datetime64, spark dates
dt1 = pd.DatetimeIndex(['2017-03-03 03:23',
'1990-10-23', '1993-07-02 10:33:01'])
df = pd.DataFrame({'DT64': dt1, 'DATE': dt1.copy()})
df.to_parquet('pandas_dt.pq')

spark = SparkSession.builder.appName("GenSparkData").getOrCreate()
schema = StructType([StructField('DT64', DateType(), True),
StructField('DATE', TimestampType(), True)])
sdf = spark.createDataFrame(df, schema)
sdf.write.parquet('sdf_dt.pq', 'overwrite')
import os
import shutil
import tarfile

if os.path.exists('sdf_dt.pq'):
shutil.rmtree('sdf_dt.pq')

spark.stop()
sdf_dt_archive = os.path.join(os.path.dirname(os.path.abspath(__file__)),'sdf_dt.pq.bz2')
tar = tarfile.open(sdf_dt_archive, "r:bz2")
tar.extractall('.')
tar.close()


def gen_lr(file_name, N, D):
Expand Down
Binary file added hpat/tests/sdf_dt.pq.bz2
Binary file not shown.

0 comments on commit 9e77fde

Please sign in to comment.