feat: rework new multiple sources read feature.

timcera · Apr 29, 2021 · 3b043f0 · 3b043f0
1 parent b29dc52
commit 3b043f0
Show file tree

Hide file tree

Showing 10 changed files with 331 additions and 305 deletions.
diff --git a/tests/baseline/test_autocorrelation.png b/tests/baseline/test_autocorrelation.png
diff --git a/tests/baseline/test_time_plot.png b/tests/baseline/test_time_plot.png
diff --git a/tests/test_calculate_fdc.py b/tests/test_calculate_fdc.py
@@ -11,7 +11,7 @@
 class TestRead(TestCase):
     def setUp(self):
         """Prepare in-memory versions of the files ./data_flat.csv."""
-        self.fdata = tstoolbox.read("tests/data_flat.csv,tests/data_sunspot.csv")
+        self.fdata = tstoolbox.read("tests/data_flat.csv tests/data_sunspot.csv")
         self.fdata.index.name = "Datetime"
 
     def test_creation(self):

diff --git a/tests/test_equation.py b/tests/test_equation.py
@@ -166,11 +166,7 @@ def test_equation_multiple_cols_05(self):
     def test_equation_cols_over_nine(self):
         """Test of using equation API with columns over 9."""
         input_ts = tstoolbox.read(
-            "tests/data_multiple_cols.csv,"
-            "tests/data_multiple_cols.csv,"
-            "tests/data_multiple_cols.csv,"
-            "tests/data_multiple_cols.csv,"
-            "tests/data_multiple_cols.csv",
+            "tests/data_multiple_cols.csv tests/data_multiple_cols.csv tests/data_multiple_cols.csv tests/data_multiple_cols.csv tests/data_multiple_cols.csv",
             append="columns",
         )
         out = tstoolbox.equation("x10*10", input_ts=input_ts)

diff --git a/tests/test_peak_detect.py b/tests/test_peak_detect.py
@@ -168,6 +168,9 @@ def test_peak_minmax_cli(self):
             args, stdout=subprocess.PIPE, stdin=subprocess.PIPE
         ).communicate(input=input_peak_detection)[0]
         out = tsutils.read_iso_ts(out)
+        # input_peak_detection.to_csv("input.csv")
+        output_peak_detection.to_csv("output.csv")
+        out.to_csv("out.csv")
         assert_frame_equal(out, output_peak_detection)
 
     def test_peak_zero_crossing_cli(self):

diff --git a/tests/test_read.py b/tests/test_read.py
@@ -22,7 +22,7 @@ def setUp(self):
 
         self.read_multiple_direct = pandas.DataFrame(ts, columns=["Value"])
         self.read_multiple_direct = pandas.concat(
-            [self.read_multiple_direct, pandas.Series(ts, name="Value")], axis="columns"
+            [self.read_multiple_direct, pandas.Series(ts, name="Value_2")], axis="columns"
         )
         self.read_multiple_direct.index.name = "Datetime"
         self.read_multiple_direct = tsutils.memory_optimize(self.read_multiple_direct)
@@ -32,7 +32,7 @@ def setUp(self):
 2000-01-02,4.6
 """
 
-        self.read_multiple_cli = b"""Datetime,Value,Value
+        self.read_multiple_cli = b"""Datetime,Value,Value_2
 2000-01-01,4.5,4.5
 2000-01-02,4.6,4.6
 """
@@ -66,7 +66,7 @@ def test_read_direct(self):
     def test_read_mulitple_direct(self):
         """Test read API for multiple columns - daily."""
         out = tstoolbox.read(
-            "tests/data_simple.csv,tests/data_simple.csv", append=r"columns"
+            "tests/data_simple.csv tests/data_simple.csv", append=r"columns"
         )
         assert_frame_equal(out, self.read_multiple_direct)
 

diff --git a/tests/test_round_index.py b/tests/test_round_index.py
@@ -22,7 +22,7 @@ def setUp(self):
 
         self.round_index_multiple_direct = pandas.DataFrame(ts, columns=["Value"])
         self.round_index_multiple_direct = pandas.concat(
-            [self.round_index_multiple_direct, pandas.Series(ts, name="Value")],
+            [self.round_index_multiple_direct, pandas.Series(ts, name="Value_2")],
             axis="columns",
         )
         self.round_index_multiple_direct.index.name = "Datetime"
@@ -35,7 +35,7 @@ def setUp(self):
 2000-01-02,4.6
 """
 
-        self.round_index_multiple_cli = b"""Datetime,Value,Value
+        self.round_index_multiple_cli = b"""Datetime,Value,Value_2
 2000-01-01,4.5,4.5
 2000-01-02,4.6,4.6
 """
@@ -69,7 +69,7 @@ def test_round_index_direct(self):
     def test_round_index_mulitple_direct(self):
         """Test round_index API for multiple columns - daily."""
         out = tstoolbox.read(
-            "tests/data_simple.csv,tests/data_simple.csv",
+            "tests/data_simple.csv tests/data_simple.csv",
             append="columns",
             round_index="D",
         )
@@ -89,7 +89,7 @@ def test_round_index_cli(self):
 
     def test_round_index_multiple_cli(self):
         """Test round_index CLI for multiple columns - daily."""
-        args = 'tstoolbox read --round_index="D" tests/data_simple.csv,tests/data_simple.csv'
+        args = 'tstoolbox read --round_index="D" tests/data_simple.csv tests/data_simple.csv'
         args = shlex.split(args)
         out = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()
         self.assertEqual(out[0], self.round_index_multiple_cli)

diff --git a/tstoolbox/functions/plot.py b/tstoolbox/functions/plot.py
@@ -163,6 +163,7 @@ def plot_cli(
     end_date=None,
     clean=False,
     skiprows=None,
+    dropna="no",
     index_type="datetime",
     names=None,
     ofilename="plot.png",
@@ -696,6 +697,7 @@ def plot_cli(
     {end_date}
     {clean}
     {skiprows}
+    {dropna}
     {index_type}
     {names}
     {source_units}
@@ -805,13 +807,15 @@ def plot_cli(
         be same length as `vlines_x`.  If None will take for the standard
         linestyles list.
     """
+
     plt = plot(
-        input_ts=input_ts,
+        input_ts="-",
         columns=columns,
         start_date=start_date,
         end_date=end_date,
         clean=clean,
         skiprows=skiprows,
+        dropna=dropna,
         index_type=index_type,
         names=names,
         ofilename=ofilename,
@@ -918,6 +922,7 @@ def plot(
     end_date=None,
     clean=False,
     skiprows=None,
+    dropna="no",
     index_type="datetime",
     names=None,
     ofilename: Optional[str] = "plot.png",
@@ -1057,15 +1062,15 @@ def plot(
     from matplotlib.ticker import FixedLocator
 
     tsd = tsutils.common_kwds(
-        input_ts,
+        input_tsd=input_ts,
         skiprows=skiprows,
         names=names,
         index_type=index_type,
         start_date=start_date,
         end_date=end_date,
         pick=columns,
         round_index=round_index,
-        dropna="all",
+        dropna=dropna,
         source_units=source_units,
         target_units=target_units,
         clean=clean,
@@ -1154,7 +1159,6 @@ def plot(
     elif "auto" in bar_hatchstyles:
         bar_hatchstyles = HATCH_LIST
 
-    print(markerstyles)
     if markerstyles is None:
         markerstyles = " "
     elif "auto" in markerstyles:
@@ -1643,13 +1647,13 @@ def plot(
     elif type == "lag_plot":
         from pandas.plotting import lag_plot
 
-        lag_plot(tsd, lag=lag_plot_lag, ax=ax)
+        lag_plot(tsd.dropna(), lag=lag_plot_lag, ax=ax)
         xtitle = xtitle or "y(t)"
         ytitle = ytitle or "y(t+{0})".format(short_freq or 1)
     elif type == "autocorrelation":
         from pandas.plotting import autocorrelation_plot
 
-        autocorrelation_plot(tsd, ax=ax)
+        autocorrelation_plot(tsd.dropna(), ax=ax)
         xtitle = xtitle or "Time Lag {0}".format(short_freq)
     elif type == "bootstrap":
         from pandas.plotting import bootstrap_plot

diff --git a/tstoolbox/functions/read.py b/tstoolbox/functions/read.py
@@ -125,57 +125,36 @@ def read(
     if force_freq is not None:
         dropna = "no"
 
-    # Check for older style where comma delimited list of only files.
-    # If so, rework as space delimited.
-    fcheck = True
-    for fname in tsutils.make_list(filenames):
-        if not os.path.exists(fname):
-            fcheck = False
-            break
-    if fcheck is False:
-        # All filenames are real files.  Therefore old style and just make a simple
-        # list.
-        filenames = tsutils.make_list(filenames)
-
-    result = pd.DataFrame()
-    result_list = []
-    zones = set()
-    for i in filenames:
-        tsd = tsutils.common_kwds(
-            i,
-            skiprows=skiprows,
-            names=names,
-            index_type=index_type,
-            start_date=start_date,
-            end_date=end_date,
-            round_index=round_index,
-            dropna=dropna,
-            force_freq=force_freq,
-            clean=clean,
-            source_units=source_units,
-            target_units=target_units,
-        )
-        result_list.append(tsd)
-        zones.add(tsd.index.tzinfo)
-
-    for res in result_list:
-        if len(zones) != 1:
-            try:
-                res.index = res.index.tz_convert(None)
-            except TypeError:
-                pass
-
-        if append == "combine":
-            result = result.combine_first(res)
-
-    if append != "combine":
-        result = pd.concat(result_list, axis=append)
-
-    result = tsutils._pick(result, columns)
-
-    result.sort_index(inplace=True)
+    # # Check for older style where comma delimited list of only files.
+    # # If so, rework as space delimited.
+    # fcheck = True
+    # for fname in tsutils.make_list(filenames):
+    #     if not os.path.exists(fname):
+    #         fcheck = False
+    #         break
+    # if fcheck is False:
+    #     # All filenames are real files.  Therefore old style and just make a simple
+    #     # list.
+    #     filenames = tsutils.make_list(filenames)
+    filenames = tsutils.make_list(filenames, sep=" ")
+
+    tsd = tsutils.common_kwds(
+        input_tsd=filenames,
+        skiprows=skiprows,
+        names=names,
+        index_type=index_type,
+        start_date=start_date,
+        end_date=end_date,
+        round_index=round_index,
+        dropna=dropna,
+        force_freq=force_freq,
+        clean=clean,
+        source_units=source_units,
+        target_units=target_units,
+        usecols=columns,
+    )
 
-    return result
+    return tsd
 
 
 read.__doc__ = read_cli.__doc__