Generate package egg, that can be attached to Spark clusters in production or included in a PySpark console.
$ python=3.6.1
pip install -r requirements.txt
python setup.py install
python tests/test_mission.py
python setup.py bdist_egg
pyspark --py-files dist/pypack-0.0.1-py3.6.egg
- from pypack.spark import *
- from pypack.mission import with_life_goal
- source_data = [ ("jose", 1), ("pedro", 2) ]
- source_df = spark.createDataFrame( source_data, ["name", "age"])
- actual_df = with_life_goal(source_df)
- actual_df.show()