Skip to content

Error in Jupyter Notebook #835

@huangbiubiu

Description

@huangbiubiu

System information

  • OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Linux Ubuntu 16.04
  • Modin installed from (source or binary): binary
  • Modin version: 0.6.2 (with Ray 0.7.3)
  • Python version: 3.7.0
  • Exact command to reproduce:
import modin.pandas as pd
pd.read_csv("train_data2_part1.csv")

Describe the problem

Failed to read CSV with read_csv method in Jupyter Notebook. But the command runs successfully in python command.

Source code / logs

Trace:

---------------------------------------------------------------------------
RayTaskError                              Traceback (most recent call last)
<ipython-input-3-0654ff268ff7> in <module>()
      1 import sys
      2 print(sys.executable)
----> 3 pd.read_csv("train_data2_part1.csv")

~/anaconda3/lib/python3.7/site-packages/modin/pandas/io.py in parser_func(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
     95         if not kwargs.get("sep", sep):
     96             kwargs["sep"] = "\t"
---> 97         return _read(**kwargs)
     98 
     99     return parser_func

~/anaconda3/lib/python3.7/site-packages/modin/pandas/io.py in _read(**kwargs)
    108         kwargs: Keyword arguments in pandas.read_csv
    109     """
--> 110     pd_obj = BaseFactory.read_csv(**kwargs)
    111     # This happens when `read_csv` returns a TextFileReader object for iterating through
    112     if isinstance(pd_obj, pandas.io.parsers.TextFileReader):

~/anaconda3/lib/python3.7/site-packages/modin/data_management/factories.py in read_csv(cls, **kwargs)
     50     @classmethod
     51     def read_csv(cls, **kwargs):
---> 52         return cls._determine_engine()._read_csv(**kwargs)
     53 
     54     @classmethod

~/anaconda3/lib/python3.7/site-packages/modin/data_management/factories.py in _read_csv(cls, **kwargs)
     54     @classmethod
     55     def _read_csv(cls, **kwargs):
---> 56         return cls.io_cls.read_csv(**kwargs)
     57 
     58     @classmethod

~/anaconda3/lib/python3.7/site-packages/modin/engines/ray/generic/io.py in read_csv(cls, filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    680         if cls.read_csv_remote_task is None:
    681             return super(RayIO, cls).read_csv(**kwargs)
--> 682         return cls._read(**kwargs)
    683 
    684     @classmethod

~/anaconda3/lib/python3.7/site-packages/modin/engines/ray/generic/io.py in _read(cls, filepath_or_buffer, **kwargs)
    766             return cls._read_csv_from_pandas(filepath_or_buffer, filtered_kwargs)
    767         else:
--> 768             return cls._read_csv_from_file_ray(filepath_or_buffer, filtered_kwargs)
    769 
    770     @classmethod

~/anaconda3/lib/python3.7/site-packages/modin/engines/ray/generic/io.py in _read_csv_from_file_ray(cls, filepath, kwargs)
    490         # or based on the column(s) that were requested.
    491         if index_col is None:
--> 492             row_lengths = ray.get(index_ids)
    493             new_index = pandas.RangeIndex(sum(row_lengths))
    494         else:

~/anaconda3/lib/python3.7/site-packages/ray/worker.py in get(object_ids)
   2245             if isinstance(value, RayError):
   2246                 last_task_error_raise_time = time.time()
-> 2247                 raise value
   2248 
   2249         # Run post processors.

RayTaskError: ray_worker (pid=29074, host=bigdata-PowerEdge-R720)
TypeError: f() takes 0 positional arguments but 6 were given

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions