In [1]:
import os
import librosa
import numpy as np
import sagemaker_pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField,StringType, FloatType

In [2]:
from pyspark import SparkContext, SparkConf
from sagemaker_pyspark import classpath_jars
from pyspark.sql.functions import create_map, struct
from pydub import AudioSegment

In [3]:
LANDED = '/home/rlfo/Documents/pessoal/bigdatamusic/landed/'
RAW = '/home/rlfo/Documents/pessoal/bigdatamusic/raw/'

In [4]:
classpath = ":".join(sagemaker_pyspark.classpath_jars())

builder = SparkSession.builder.appName("MUSIC SPARK")
builder.config(
    "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2")
builder.config("spark.speculation", "false")
builder.config("spark.sql.parquet.compression.codec", "gzip")
builder.config("spark.debug.maxToStringFields", "100")
builder.config("spark.driver.extraClassPath", classpath)
builder.config("spark.driver.memory", "1g")
builder.config("spark.driver.cores", "1")
builder.config("spark.executor-memory", "20g")
builder.config("spark.executor.cores", "4")


builder.master("local[*]")


<pyspark.sql.session.SparkSession.Builder at 0x7f60f007b860>

In [5]:
def audio_to_wav(file):
    dst = file.replace('.mp3',"")+".wav"
    sound = AudioSegment.from_mp3(file)
    sound.export(dst, format="wav")
    return dst
    

In [6]:
def extract_important_feature_music(file):
    
    songname = file.split('/')[0::-1][0]
    y, sr = librosa.load(file, mono=True, duration=30)
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    spec_cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    
    chroma_stft = np.array2string(chroma_stft, precision=4, separator=',',suppress_small=True)
    spec_cent = np.array2string(spec_cent, precision=4, separator=',',suppress_small=True)
    spec_bw = np.array2string(spec_bw, precision=4, separator=',',suppress_small=True)
    rolloff = np.array2string(rolloff, precision=4, separator=',',suppress_small=True)
    zcr = np.array2string(zcr, precision=4, separator=',',suppress_small=True)
    
    
    to_append = f'{songname};{chroma_stft};{spec_cent};{spec_bw};{rolloff};{zcr}'    
    return to_append
        

In [7]:
spark = builder.getOrCreate()

In [8]:
all_music = [f'{LANDED}{file}' for file in os.listdir(LANDED) if '.mp3' in file]

In [9]:
all_music

['/home/rlfo/Documents/pessoal/bigdatamusic/landed/Aux Fox - Ellie Goulding - Flux (Aux Fox Remix).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/Young Nero - Change (Prod. Trippy T).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/Young Nero - Beyond (Prod. Scott Storch).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/wūsh - late nights with you.mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/PRDSEOHNO - OHNO - Lil Mama (prod. Fallen Roses and B Dom).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/nymano - jazz and rain.mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/HIGH ON MUSIC - Danrell x Småland - Hostage.mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/Flipp Dinero - Leave Me Alone (Prod. by Young Forever x Cast Beats).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/Cardi B - Money.mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusic/landed/ZZ - ICY (feat. Thorii).mp3',
 '/home/rlfo/Documents/pessoal/bigdatamusi

In [10]:
pipe_rdd_csv = spark.sparkContext.parallelize(all_music).map(audio_to_wav).map(extract_important_feature_music)

In [11]:
pipe_rdd_csv

PythonRDD[1] at RDD at PythonRDD.scala:52

In [12]:
schema = StructType([StructField('file_name', StringType(), True),
                     StructField('chroma', StringType(), True),
                     StructField('spec_cent', StringType(), True),
                     StructField('spec_bw', StringType(), True),
                     StructField('rolloff', StringType(), True),
                     StructField('zcr', StringType(), True)])

In [13]:
pipe_rdd_csv = pipe_rdd_csv.map(lambda x : x.split(";"))
rdd = spark.createDataFrame(pipe_rdd_csv,schema)
rdd.show(2)

+---------+------+---------+---------+---------+------+
|file_name|chroma|spec_cent|  spec_bw|  rolloff|   zcr|
+---------+------+---------+---------+---------+------+
|         |0.3115|1243.9466|1475.7774|2472.4184|0.0507|
|         |0.3245|1914.2462| 2062.584|4129.1167|0.0695|
+---------+------+---------+---------+---------+------+
only showing top 2 rows



In [14]:
RAW

'/home/rlfo/Documents/pessoal/bigdatamusic/raw/'

In [18]:
rdd.write.csv('/home/rlfo/raw',sep=';',mode='overwrite')


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-18-0eed648b293e>", line 1, in <module>
    rdd.write.csv('/home/rlfo/raw',sep=';',mode='overwrite')
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/sql/readwriter.py", line 885, in csv
    self._jwrite.csv(path)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/sql/utils.py", line 63, in deco
    return f(*a, **kw)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/py4j/protocol.py", 

Py4JJavaError: An error occurred while calling o218.csv.
: org.apache.spark.SparkException: Job aborted.
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:224)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:154)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
	at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
	at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656)
	at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:656)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:225)
	at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:644)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 3.0 failed 1 times, most recent failure: Lost task 1.0 in stage 3.0 (TID 13, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/python3.6/pkgutil.py", line 412, in get_importer
    importer = sys.path_importer_cache[path_item]
KeyError: ''

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 240, in main
    func, profiler, deserializer, serializer = read_command(pickleSer, infile)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 60, in read_command
    command = serializer._read_with_length(file)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 171, in _read_with_length
    return self.loads(obj)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 566, in loads
    return pickle.loads(obj, encoding=encoding)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 929, in subimport
    __import__(name)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/__init__.py", line 12, in <module>
    from . import core
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/core/__init__.py", line 109, in <module>
    from .time_frequency import *  # pylint: disable=wildcard-import
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/core/time_frequency.py", line 10, in <module>
    from ..util.exceptions import ParameterError
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/util/__init__.py", line 68, in <module>
    from .files import *  # pylint: disable=wildcard-import
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/util/files.py", line 7, in <module>
    import pkg_resources
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3241, in <module>
    @_call_aside
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3225, in _call_aside
    f(*args, **kwargs)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3254, in _initialize_master_working_set
    working_set = WorkingSet._build_master()
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 574, in _build_master
    ws = cls()
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 567, in __init__
    self.add_entry(entry)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 623, in add_entry
    for dist in find_distributions(entry, True):
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 1960, in find_distributions
    importer = get_importer(path_item)
  File "/usr/lib/python3.6/pkgutil.py", line 416, in get_importer
    importer = path_hook(path_item)
  File "<frozen importlib._bootstrap_external>", line 1324, in path_hook_for_FileFinder
  File "<frozen importlib._bootstrap_external>", line 102, in _path_isdir
FileNotFoundError: [Errno 2] No such file or directory

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:330)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:470)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:453)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:284)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:257)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:196)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:109)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:194)
	... 31 more
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/python3.6/pkgutil.py", line 412, in get_importer
    importer = sys.path_importer_cache[path_item]
KeyError: ''

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 240, in main
    func, profiler, deserializer, serializer = read_command(pickleSer, infile)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 60, in read_command
    command = serializer._read_with_length(file)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 171, in _read_with_length
    return self.loads(obj)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 566, in loads
    return pickle.loads(obj, encoding=encoding)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 929, in subimport
    __import__(name)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/__init__.py", line 12, in <module>
    from . import core
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/core/__init__.py", line 109, in <module>
    from .time_frequency import *  # pylint: disable=wildcard-import
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/core/time_frequency.py", line 10, in <module>
    from ..util.exceptions import ParameterError
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/util/__init__.py", line 68, in <module>
    from .files import *  # pylint: disable=wildcard-import
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/librosa/util/files.py", line 7, in <module>
    import pkg_resources
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3241, in <module>
    @_call_aside
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3225, in _call_aside
    f(*args, **kwargs)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 3254, in _initialize_master_working_set
    working_set = WorkingSet._build_master()
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 574, in _build_master
    ws = cls()
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 567, in __init__
    self.add_entry(entry)
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 623, in add_entry
    for dist in find_distributions(entry, True):
  File "/home/rlfo/.local/share/virtualenvs/bigdatamusic-K9Mh5qpQ/lib/python3.6/site-packages/pkg_resources/__init__.py", line 1960, in find_distributions
    importer = get_importer(path_item)
  File "/usr/lib/python3.6/pkgutil.py", line 416, in get_importer
    importer = path_hook(path_item)
  File "<frozen importlib._bootstrap_external>", line 1324, in path_hook_for_FileFinder
  File "<frozen importlib._bootstrap_external>", line 102, in _path_isdir
FileNotFoundError: [Errno 2] No such file or directory

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:330)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:470)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:453)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:284)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:257)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:196)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:109)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more
