From cd67ab788167a6518feeb474677226cf987dc614 Mon Sep 17 00:00:00 2001 From: Larry White Date: Sat, 23 Oct 2021 14:29:54 -0400 Subject: [PATCH] Java doc2 (#1018) * Added asSet() to Column API, with documentation * Some tests for asSet() * documentation and cleanup for numeric columns * Completes JavaDoc for NumericColumn * Completed JavaDoc for NumberColumn * Completes JavaDoc for Stats class * Complete JavaDoc for DoubleColumnType * Complete JavaDoc for NumberInterpolator * Update JavaDoc for ColumnType * Complete JavaDoc for AbstractColumnType * Complete JavaDoc for DoubleColumnType * Complete JavaDoc for FloatColumnType and IntColumnType * additional javadoc for Int and Long columns --- .../java/tech/tablesaw/api/ColumnType.java | 2 + .../java/tech/tablesaw/api/DoubleColumn.java | 8 ++ .../java/tech/tablesaw/api/FloatColumn.java | 40 ++++++++++ .../java/tech/tablesaw/api/InstantColumn.java | 2 + .../java/tech/tablesaw/api/IntColumn.java | 40 ++++++++++ .../java/tech/tablesaw/api/LongColumn.java | 8 ++ .../java/tech/tablesaw/api/NumberColumn.java | 31 ++++--- .../java/tech/tablesaw/api/NumericColumn.java | 80 +++++++++++++++---- .../java/tech/tablesaw/api/ShortColumn.java | 8 ++ .../java/tech/tablesaw/api/TextColumn.java | 2 + .../java/tech/tablesaw/api/TimeColumn.java | 10 ++- .../tablesaw/columns/AbstractColumnType.java | 7 ++ .../java/tech/tablesaw/columns/Column.java | 8 +- .../columns/numbers/DoubleColumnType.java | 10 +++ .../columns/numbers/FloatColumnType.java | 7 ++ .../columns/numbers/IntColumnType.java | 7 ++ .../columns/numbers/LongColumnType.java | 8 ++ .../columns/numbers/NumberInterpolator.java | 6 +- .../tech/tablesaw/columns/numbers/Stats.java | 22 +++++ .../tech/tablesaw/api/DoubleColumnTest.java | 12 ++- .../tech/tablesaw/api/FloatColumnTest.java | 8 ++ 21 files changed, 288 insertions(+), 38 deletions(-) diff --git a/core/src/main/java/tech/tablesaw/api/ColumnType.java b/core/src/main/java/tech/tablesaw/api/ColumnType.java index 94a60b948..befb1ccb4 100644 --- a/core/src/main/java/tech/tablesaw/api/ColumnType.java +++ b/core/src/main/java/tech/tablesaw/api/ColumnType.java @@ -81,8 +81,10 @@ static ColumnType valueOf(String name) { /** Returns a printer-friendly version of this ColumnType's name */ String getPrinterFriendlyName(); + /** TODO: Research this method to provide a good comment */ AbstractColumnParser customParser(ReadOptions options); + /** TODO: Research this method to provide a good comment */ default boolean compare(int rowNumber, Column temp, Column original) { Object o1 = original.get(rowNumber); Object o2 = temp.get(temp.size() - 1); diff --git a/core/src/main/java/tech/tablesaw/api/DoubleColumn.java b/core/src/main/java/tech/tablesaw/api/DoubleColumn.java index ce2baab14..0c0edcf29 100644 --- a/core/src/main/java/tech/tablesaw/api/DoubleColumn.java +++ b/core/src/main/java/tech/tablesaw/api/DoubleColumn.java @@ -5,7 +5,9 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import java.util.function.DoubleConsumer; import java.util.function.DoublePredicate; import java.util.function.DoubleSupplier; @@ -382,6 +384,12 @@ public byte[] asBytes(int rowNumber) { .array(); } + /** {@inheritDoc} */ + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } + @Override public int countUnique() { DoubleSet uniqueElements = new DoubleOpenHashSet(); diff --git a/core/src/main/java/tech/tablesaw/api/FloatColumn.java b/core/src/main/java/tech/tablesaw/api/FloatColumn.java index 57f9d662b..fd07a729f 100644 --- a/core/src/main/java/tech/tablesaw/api/FloatColumn.java +++ b/core/src/main/java/tech/tablesaw/api/FloatColumn.java @@ -3,7 +3,9 @@ import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.floats.*; import java.nio.ByteBuffer; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import java.util.stream.Stream; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; @@ -63,11 +65,13 @@ public FloatColumn createCol(String name, int initialSize) { return create(name, initialSize); } + /** {@inheritDoc} */ @Override public FloatColumn createCol(String name) { return create(name); } + /** {@inheritDoc} */ @Override public Float get(int index) { float result = getFloat(index); @@ -78,6 +82,7 @@ public static boolean valueIsMissing(float value) { return FloatColumnType.valueIsMissing(value); } + /** {@inheritDoc} */ @Override public FloatColumn subset(int[] rows) { final FloatColumn c = this.emptyCopy(); @@ -105,16 +110,19 @@ public Selection isIn(final float... numbers) { return results; } + /** {@inheritDoc} */ @Override public int size() { return data.size(); } + /** {@inheritDoc} */ @Override public void clear() { data.clear(); } + /** {@inheritDoc} */ @Override public FloatColumn unique() { final FloatSet values = new FloatOpenHashSet(); @@ -128,6 +136,7 @@ public FloatColumn unique() { return column; } + /** {@inheritDoc} */ @Override public FloatColumn top(int n) { FloatArrayList top = new FloatArrayList(); @@ -139,6 +148,7 @@ public FloatColumn top(int n) { return new FloatColumn(name() + "[Top " + n + "]", top); } + /** {@inheritDoc} */ @Override public FloatColumn bottom(final int n) { FloatArrayList bottom = new FloatArrayList(); @@ -150,6 +160,7 @@ public FloatColumn bottom(final int n) { return new FloatColumn(name() + "[Bottoms " + n + "]", bottom); } + /** {@inheritDoc} */ @Override public FloatColumn lag(int n) { final int srcPos = n >= 0 ? 0 : 0 - n; @@ -167,6 +178,7 @@ public FloatColumn lag(int n) { return new FloatColumn(name() + " lag(" + n + ")", new FloatArrayList(dest)); } + /** {@inheritDoc} */ @Override public FloatColumn removeMissing() { FloatColumn result = copy(); @@ -186,6 +198,7 @@ public FloatColumn append(float i) { return this; } + /** {@inheritDoc} */ @Override public FloatColumn append(Float val) { if (val == null) { @@ -196,11 +209,13 @@ public FloatColumn append(Float val) { return this; } + /** {@inheritDoc} */ @Override public FloatColumn copy() { return new FloatColumn(name(), data.clone()); } + /** {@inheritDoc} */ @Override public Iterator iterator() { return data.iterator(); @@ -210,6 +225,7 @@ public float[] asFloatArray() { return data.toFloatArray(); } + /** {@inheritDoc} */ @Override public Float[] asObjectArray() { final Float[] output = new Float[size()]; @@ -223,11 +239,13 @@ public Float[] asObjectArray() { return output; } + /** {@inheritDoc} */ @Override public int compare(Float o1, Float o2) { return Float.compare(o1, o2); } + /** {@inheritDoc} */ @Override public FloatColumn set(int i, Float val) { return val == null ? setMissing(i) : set(i, (float) val); @@ -238,11 +256,13 @@ public FloatColumn set(int i, float val) { return this; } + /** {@inheritDoc} */ @Override public Column set(int row, String stringValue, AbstractColumnParser parser) { return set(row, parser.parseFloat(stringValue)); } + /** {@inheritDoc} */ @Override public FloatColumn append(final Column column) { Preconditions.checkArgument(column.type() == this.type()); @@ -254,18 +274,21 @@ public FloatColumn append(final Column column) { return this; } + /** {@inheritDoc} */ @Override public FloatColumn append(Column column, int row) { Preconditions.checkArgument(column.type() == this.type()); return append(((FloatColumn) column).getFloat(row)); } + /** {@inheritDoc} */ @Override public FloatColumn set(int row, Column column, int sourceRow) { Preconditions.checkArgument(column.type() == this.type()); return set(row, ((FloatColumn) column).getFloat(sourceRow)); } + /** {@inheritDoc} */ @Override public byte[] asBytes(int rowNumber) { return ByteBuffer.allocate(FloatColumnType.instance().byteSize()) @@ -273,6 +296,7 @@ public byte[] asBytes(int rowNumber) { .array(); } + /** {@inheritDoc} */ @Override public int countUnique() { FloatSet uniqueElements = new FloatOpenHashSet(); @@ -282,6 +306,7 @@ public int countUnique() { return uniqueElements.size(); } + /** {@inheritDoc} */ @Override public double getDouble(int row) { float value = data.getFloat(row); @@ -305,31 +330,37 @@ public boolean isMissingValue(float value) { return FloatColumnType.valueIsMissing(value); } + /** {@inheritDoc} */ @Override public boolean isMissing(int rowNumber) { return isMissingValue(getFloat(rowNumber)); } + /** {@inheritDoc} */ @Override public FloatColumn setMissing(int i) { return set(i, FloatColumnType.missingValueIndicator()); } + /** {@inheritDoc} */ @Override public void sortAscending() { data.sort(FloatComparators.NATURAL_COMPARATOR); } + /** {@inheritDoc} */ @Override public void sortDescending() { data.sort(FloatComparators.OPPOSITE_COMPARATOR); } + /** {@inheritDoc} */ @Override public FloatColumn appendMissing() { return append(FloatColumnType.missingValueIndicator()); } + /** {@inheritDoc} */ @Override public FloatColumn appendObj(Object obj) { if (obj == null) { @@ -341,6 +372,7 @@ public FloatColumn appendObj(Object obj) { throw new IllegalArgumentException("Could not append " + obj.getClass()); } + /** {@inheritDoc} */ @Override public FloatColumn appendCell(final String value) { try { @@ -351,6 +383,7 @@ public FloatColumn appendCell(final String value) { } } + /** {@inheritDoc} */ @Override public FloatColumn appendCell(final String value, AbstractColumnParser parser) { try { @@ -361,6 +394,7 @@ public FloatColumn appendCell(final String value, AbstractColumnParser parser } } + /** {@inheritDoc} */ @Override public String getUnformattedString(final int row) { final float value = getFloat(row); @@ -479,4 +513,10 @@ public DoubleColumn asDoubleColumn() { } return result; } + + /** {@inheritDoc} */ + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } } diff --git a/core/src/main/java/tech/tablesaw/api/InstantColumn.java b/core/src/main/java/tech/tablesaw/api/InstantColumn.java index 1010e1558..e5235687f 100644 --- a/core/src/main/java/tech/tablesaw/api/InstantColumn.java +++ b/core/src/main/java/tech/tablesaw/api/InstantColumn.java @@ -639,6 +639,8 @@ public LongIterator longIterator() { return data.iterator(); } + /** {@inheritDoc} */ + @Override public Set asSet() { Set times = new HashSet<>(); InstantColumn unique = unique(); diff --git a/core/src/main/java/tech/tablesaw/api/IntColumn.java b/core/src/main/java/tech/tablesaw/api/IntColumn.java index f3c269ab0..e3ff110d4 100644 --- a/core/src/main/java/tech/tablesaw/api/IntColumn.java +++ b/core/src/main/java/tech/tablesaw/api/IntColumn.java @@ -3,7 +3,9 @@ import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.ints.*; import java.nio.ByteBuffer; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import java.util.stream.IntStream; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; @@ -79,11 +81,13 @@ public static IntColumn indexColumn( return indexColumn; } + /** {@inheritDoc} */ @Override public int size() { return data.size(); } + /** {@inheritDoc} */ @Override public void clear() { data.clear(); @@ -93,6 +97,7 @@ public static boolean valueIsMissing(int value) { return IntColumnType.valueIsMissing(value); } + /** {@inheritDoc} */ @Override public Integer get(int index) { int result = getInt(index); @@ -103,6 +108,7 @@ public int[] asIntArray() { return data.toIntArray(); } + /** {@inheritDoc} */ @Override public IntColumn subset(final int[] rows) { final IntColumn c = this.emptyCopy(); @@ -112,6 +118,7 @@ public IntColumn subset(final int[] rows) { return c; } + /** {@inheritDoc} */ @Override public IntColumn unique() { final IntSet values = new IntOpenHashSet(); @@ -125,6 +132,7 @@ public IntColumn unique() { return column; } + /** {@inheritDoc} */ @Override public IntColumn top(int n) { final IntArrayList top = new IntArrayList(); @@ -136,6 +144,7 @@ public IntColumn top(int n) { return new IntColumn(name() + "[Top " + n + "]", top); } + /** {@inheritDoc} */ @Override public IntColumn bottom(final int n) { final IntArrayList bottom = new IntArrayList(); @@ -147,6 +156,7 @@ public IntColumn bottom(final int n) { return new IntColumn(name() + "[Bottoms " + n + "]", bottom); } + /** {@inheritDoc} */ @Override public IntColumn lag(int n) { final int srcPos = n >= 0 ? 0 : 0 - n; @@ -164,6 +174,7 @@ public IntColumn lag(int n) { return new IntColumn(name() + " lag(" + n + ")", new IntArrayList(dest)); } + /** {@inheritDoc} */ @Override public IntColumn removeMissing() { IntColumn result = copy(); @@ -183,6 +194,7 @@ public IntColumn append(int i) { return this; } + /** {@inheritDoc} */ @Override public IntColumn append(Integer val) { if (val == null) { @@ -193,11 +205,13 @@ public IntColumn append(Integer val) { return this; } + /** {@inheritDoc} */ @Override public IntColumn copy() { return new IntColumn(name(), data.clone()); } + /** {@inheritDoc} */ @Override public Iterator iterator() { return data.iterator(); @@ -207,6 +221,7 @@ public IntIterator intIterator() { return data.iterator(); } + /** {@inheritDoc} */ @Override public Integer[] asObjectArray() { final Integer[] output = new Integer[size()]; @@ -220,11 +235,13 @@ public Integer[] asObjectArray() { return output; } + /** {@inheritDoc} */ @Override public int compare(Integer o1, Integer o2) { return Integer.compare(o1, o2); } + /** {@inheritDoc} */ @Override public IntColumn set(int i, Integer val) { return val == null ? setMissing(i) : set(i, (int) val); @@ -235,6 +252,7 @@ public IntColumn set(int i, int val) { return this; } + /** {@inheritDoc} */ @Override public IntColumn append(final Column column) { Preconditions.checkArgument(column.type() == this.type()); @@ -246,28 +264,33 @@ public IntColumn append(final Column column) { return this; } + /** {@inheritDoc} */ @Override public IntColumn append(Column column, int row) { Preconditions.checkArgument(column.type() == this.type()); return append(((IntColumn) column).getInt(row)); } + /** {@inheritDoc} */ @Override public IntColumn set(int row, Column column, int sourceRow) { Preconditions.checkArgument(column.type() == this.type()); return set(row, ((IntColumn) column).getInt(sourceRow)); } + /** {@inheritDoc} */ @Override public Column set(int row, String stringValue, AbstractColumnParser parser) { return set(row, parser.parseInt(stringValue)); } + /** {@inheritDoc} */ @Override public IntColumn appendMissing() { return append(IntColumnType.missingValueIndicator()); } + /** {@inheritDoc} */ @Override public byte[] asBytes(int rowNumber) { return ByteBuffer.allocate(IntColumnType.instance().byteSize()) @@ -275,12 +298,14 @@ public byte[] asBytes(int rowNumber) { .array(); } + /** {@inheritDoc} */ @Override public String getString(final int row) { final int value = getInt(row); return String.valueOf(getPrintFormatter().format(value)); } + /** {@inheritDoc} */ @Override public int countUnique() { IntSet uniqueElements = new IntOpenHashSet(); @@ -313,6 +338,7 @@ public int getInt(int row) { return data.getInt(row); } + /** {@inheritDoc} */ @Override public double getDouble(int row) { int value = data.getInt(row); @@ -326,21 +352,25 @@ public boolean isMissingValue(int value) { return IntColumnType.valueIsMissing(value); } + /** {@inheritDoc} */ @Override public boolean isMissing(int rowNumber) { return isMissingValue(getInt(rowNumber)); } + /** {@inheritDoc} */ @Override public void sortAscending() { data.sort(IntComparators.NATURAL_COMPARATOR); } + /** {@inheritDoc} */ @Override public void sortDescending() { data.sort(IntComparators.OPPOSITE_COMPARATOR); } + /** {@inheritDoc} */ @Override public IntColumn appendObj(Object obj) { if (obj == null) { @@ -352,6 +382,7 @@ public IntColumn appendObj(Object obj) { throw new IllegalArgumentException("Could not append " + obj.getClass()); } + /** {@inheritDoc} */ @Override public IntColumn appendCell(final String value) { try { @@ -362,6 +393,7 @@ public IntColumn appendCell(final String value) { } } + /** {@inheritDoc} */ @Override public IntColumn appendCell(final String value, AbstractColumnParser parser) { try { @@ -372,6 +404,7 @@ public IntColumn appendCell(final String value, AbstractColumnParser parser) } } + /** {@inheritDoc} */ @Override public String getUnformattedString(final int row) { final int value = getInt(row); @@ -512,9 +545,16 @@ public ShortColumn asShortColumn() { return result; } + /** {@inheritDoc} */ @Override public IntColumn setMissing(int r) { set(r, IntColumnType.missingValueIndicator()); return this; } + + /** {@inheritDoc} */ + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } } diff --git a/core/src/main/java/tech/tablesaw/api/LongColumn.java b/core/src/main/java/tech/tablesaw/api/LongColumn.java index bacced4c7..c8acf450a 100644 --- a/core/src/main/java/tech/tablesaw/api/LongColumn.java +++ b/core/src/main/java/tech/tablesaw/api/LongColumn.java @@ -5,7 +5,9 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.ZoneOffset; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import java.util.stream.LongStream; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; @@ -315,6 +317,12 @@ public byte[] asBytes(int rowNumber) { .array(); } + /** {@inheritDoc} */ + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } + @Override public int countUnique() { LongSet uniqueElements = new LongOpenHashSet(); diff --git a/core/src/main/java/tech/tablesaw/api/NumberColumn.java b/core/src/main/java/tech/tablesaw/api/NumberColumn.java index 0fa5ed5b9..514966c5f 100644 --- a/core/src/main/java/tech/tablesaw/api/NumberColumn.java +++ b/core/src/main/java/tech/tablesaw/api/NumberColumn.java @@ -9,6 +9,12 @@ import tech.tablesaw.columns.numbers.DoubleColumnType; import tech.tablesaw.columns.numbers.NumberColumnFormatter; +/** + * An abstract class that provides a partial implementation for columns of numeric data + * + * @param The column type + * @param The (boxed) type of data in the column + */ public abstract class NumberColumn, T extends Number> extends AbstractColumn implements NumericColumn { @@ -44,6 +50,10 @@ public NumberColumn set(DoublePredicate condition, NumberColumn othe return this; } + /** + * Sets the value of all elements in this column matching condition to be equal to newValue and + * returns this column + */ public NumberColumn set(DoublePredicate condition, T newValue) { for (int row = 0; row < size(); row++) { if (condition.test(getDouble(row))) { @@ -53,17 +63,20 @@ public NumberColumn set(DoublePredicate condition, T newValue) { return this; } + /** {@inheritDoc} */ @Override - public void setPrintFormatter(final NumberFormat format, final String missingValueString) { - this.printFormatter = new NumberColumnFormatter(format, missingValueString); + public void setPrintFormatter(final NumberFormat format, final String missingValueIndicator) { + this.printFormatter = new NumberColumnFormatter(format, missingValueIndicator); } + /** {@inheritDoc} */ @Override public void setPrintFormatter(final NumberColumnFormatter formatter) { this.printFormatter = formatter; formatter.setColumnType(type()); } + /** Returns the NumbetPrintFormatter for this column, or null */ protected NumberColumnFormatter getPrintFormatter() { return printFormatter; } @@ -88,6 +101,7 @@ protected NumberColumnFormatter getPrintFormatter() { */ public abstract NumericColumn bottom(final int n); + /** {@inheritDoc} */ @Override public String getString(final int row) { final double value = getDouble(row); @@ -97,6 +111,7 @@ public String getString(final int row) { return String.valueOf(printFormatter.format(value)); } + /** {@inheritDoc} */ @Override public C emptyCopy() { final C column = createCol(name()); @@ -105,6 +120,7 @@ public C emptyCopy() { return column; } + /** {@inheritDoc} */ @Override public C emptyCopy(final int rowSize) { final C column = createCol(name(), rowSize); @@ -113,9 +129,6 @@ public C emptyCopy(final int rowSize) { return column; } - @Override - public abstract C copy(); - /** * Compares the given ints, which refer to the indexes of the doubles in this column, according to * the values of the doubles themselves @@ -125,18 +138,12 @@ public IntComparator rowComparator() { return comparator; } + /** {@inheritDoc} */ @Override public int byteSize() { return type().byteSize(); } - /** Returns the contents of the cell at rowNumber as a byte[] */ - @Override - public abstract byte[] asBytes(final int rowNumber); - - @Override - public abstract C appendMissing(); - /** Returns the count of missing values in this column */ @Override public int countMissing() { diff --git a/core/src/main/java/tech/tablesaw/api/NumericColumn.java b/core/src/main/java/tech/tablesaw/api/NumericColumn.java index bd527c2a4..fd4834220 100644 --- a/core/src/main/java/tech/tablesaw/api/NumericColumn.java +++ b/core/src/main/java/tech/tablesaw/api/NumericColumn.java @@ -31,11 +31,13 @@ public interface NumericColumn extends Column, NumberMapFunctions, NumberFilters { + /** {@inheritDoc} */ @Override default boolean isEmpty() { return size() == 0; } + /** {@inheritDoc} */ @Override default double[] asDoubleArray() { final double[] output = new double[size()]; @@ -45,6 +47,7 @@ default double[] asDoubleArray() { return output; } + /** {@inheritDoc} */ @Override default Selection eval(final DoublePredicate predicate) { final Selection bitmap = new BitmapBackedSelection(); @@ -57,6 +60,7 @@ default Selection eval(final DoublePredicate predicate) { return bitmap; } + /** {@inheritDoc} */ @Override default Selection eval(final BiPredicate predicate, final Number number) { final double value = number.doubleValue(); @@ -70,6 +74,7 @@ default Selection eval(final BiPredicate predicate, final Number return bitmap; } + /** {@inheritDoc} */ @Override default Selection isIn(Collection numbers) { final Selection results = new BitmapBackedSelection(); @@ -81,6 +86,7 @@ default Selection isIn(Collection numbers) { return results; } + /** {@inheritDoc} */ @Override default Selection isNotIn(Collection numbers) { final Selection results = new BitmapBackedSelection(); @@ -89,11 +95,13 @@ default Selection isNotIn(Collection numbers) { return results; } + /** {@inheritDoc} */ @Override default Selection isMissing() { return eval(isMissing); } + /** {@inheritDoc} */ @Override default Selection isNotMissing() { return eval(isNotMissing); @@ -159,12 +167,7 @@ default boolean noneMatch(DoublePredicate test) { return count(test, 1) == 0; } - /** - * Returns the maximum row according to the provided Comparator - * - * @param comp - * @return the maximum row - */ + /** Returns the maximum row according to the provided Comparator */ default Optional max(DoubleComparator comp) { boolean first = true; double d1 = 0.0; @@ -180,12 +183,7 @@ default Optional max(DoubleComparator comp) { return (first ? Optional.empty() : Optional.of(d1)); } - /** - * Returns the minimum row according to the provided Comparator - * - * @param comp - * @return the minimum row - */ + /** Returns the minimum row according to the provided Comparator */ default Optional min(DoubleComparator comp) { boolean first = true; double d1 = 0.0; @@ -201,8 +199,13 @@ default Optional min(DoubleComparator comp) { return (first ? Optional.empty() : Optional.of(d1)); } - void setPrintFormatter(final NumberFormat format, final String missingValueString); + /** + * Sets the print formatter to a new {@link tech.tablesaw.columns.ColumnFormatter} constructed + * from the given number format and missing value indicator TODO: make these return the column? + */ + void setPrintFormatter(final NumberFormat format, final String missingValueIndicator); + /** Sets the print formatter to the argument */ void setPrintFormatter(final NumberColumnFormatter formatter); /** @@ -259,11 +262,16 @@ default , RT> R mapInto(DoubleFunction fun, R return into; } + /** Returns the subset of data in this column included in the given {@link Selection} */ @Override default NumericColumn where(final Selection selection) { return (NumericColumn) subset(selection.toArray()); } + /** + * Returns a {@link NumberInterpolator} object that can be used to interpolate values for elements + * missing in the column + */ @Override default NumberInterpolator interpolate() { return new NumberInterpolator<>(this); @@ -280,67 +288,82 @@ default Double summarize(Selection selection, NumericAggregateFunction function) return function.summarize(column); } - // Reduce functions applied to the whole column + /** Returns the sum of the values in this column */ default double sum() { return sum.summarize(this); } + /** Returns the product of values in this column */ default double product() { return product.summarize(this); } + /** Returns the mean of the data in this column */ default double mean() { return mean.summarize(this); } + /** Returns the median or 50th percentile of the data in this column */ default double median() { return median.summarize(this); } + /** Returns the 1st quartile of the data in this column */ default double quartile1() { return quartile1.summarize(this); } + /** Returns the 3rd quartile of the data in this column */ default double quartile3() { return quartile3.summarize(this); } + /** Returns the given percentile of the data in this column */ default double percentile(double percentile) { return AggregateFunctions.percentile(this, percentile); } + /** Returns the range of the data in this column */ default double range() { return range.summarize(this); } + /** Returns the largest value in this column */ default double max() { return max.summarize(this); } + /** Returns the smallest value in this column */ default double min() { return min.summarize(this); } + /** Returns the sample variance of the data in this column */ default double variance() { return variance.summarize(this); } + /** Returns the population variance of the data in this column */ default double populationVariance() { return populationVariance.summarize(this); } + /** Returns the standard deviation of the data in this column */ default double standardDeviation() { return stdDev.summarize(this); } + /** Returns the sum of logs of the data in this column */ default double sumOfLogs() { return sumOfLogs.summarize(this); } + /** Returns the sum of squares of the data in this column */ default double sumOfSquares() { return sumOfSquares.summarize(this); } + /** Returns the geometric mean of the data in this column */ default double geometricMean() { return geometricMean.summarize(this); } @@ -350,10 +373,12 @@ default double quadraticMean() { return quadraticMean.summarize(this); } + /** Returns the kurtosis of the data in this column */ default double kurtosis() { return kurtosis.summarize(this); } + /** Returns the skewness of the data in this column */ default double skewness() { return skewness.summarize(this); } @@ -365,11 +390,16 @@ default double pearsons(NumericColumn otherColumn) { return new PearsonsCorrelation().correlation(x, y); } + /** Returns the auto-correlation (correlation between each element and the next) */ default double autoCorrelation() { int defaultLag = 1; return autoCorrelation(defaultLag); } + /** + * Returns the auto-correlation between elements separated by {@code lag}. If lag is 2, the + * correlation is computed between pairs of elements 0 and 2, 1 and 3; 2 and 4, etc. + */ default double autoCorrelation(int lag) { int slice = this.size() - lag; if (slice <= 1) { @@ -399,19 +429,28 @@ default double kendalls(NumericColumn otherColumn) { return new KendallsCorrelation().correlation(x, y); } + /** Returns a table of common statistical values that together describe the data in this column */ default Table summary() { return stats().asTable(); } + /** + * Returns a {@link Stats} object that collects common statistical measures of the data in this + * column + */ default Stats stats() { return Stats.create(this); } + /** {@inheritDoc} */ @Override default NumberRollingColumn rolling(final int windowSize) { return new NumberRollingColumn(this, windowSize); } + /** + * Returns a column containing the percentage change between values that are {@code periods} apart + */ default DoubleColumn pctChange(int periods) { return (DoubleColumn) rolling(periods + 1) @@ -420,6 +459,7 @@ default DoubleColumn pctChange(int periods) { name() + " " + periods + "-period " + AggregateFunctions.pctChange.functionName()); } + /** {@inheritDoc} */ @Override default NumericColumn lead(final int n) { final NumericColumn numberColumn = lag(-n); @@ -427,8 +467,11 @@ default NumericColumn lead(final int n) { return numberColumn; } + /** {@inheritDoc} */ + @Override NumericColumn lag(final int n); + /** Returns a double representation of the number at {@code index} */ double getDouble(int index); /** @@ -479,10 +522,19 @@ default DoubleColumn asDoubleColumn() { return (DoubleColumn) this.copy(); } + /** + * Returns a new ShortColumn containing a value for each value in this column + * + *

The exact behavior when overridden depends on the type of the receiver (LongColumn, + * FloatColumn, etc.) + * + *

In this version, the result is a copy of the original + */ default ShortColumn asShortColumn() { return (ShortColumn) this.copy(); } + /** {@inheritDoc} */ @Override NumericColumn copy(); diff --git a/core/src/main/java/tech/tablesaw/api/ShortColumn.java b/core/src/main/java/tech/tablesaw/api/ShortColumn.java index df4d3daa0..cd8deab35 100644 --- a/core/src/main/java/tech/tablesaw/api/ShortColumn.java +++ b/core/src/main/java/tech/tablesaw/api/ShortColumn.java @@ -11,7 +11,9 @@ import it.unimi.dsi.fastutil.shorts.ShortSet; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import java.util.stream.Stream; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; @@ -494,4 +496,10 @@ public IntColumn asIntColumn() { } return result; } + + /** {@inheritDoc} */ + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } } diff --git a/core/src/main/java/tech/tablesaw/api/TextColumn.java b/core/src/main/java/tech/tablesaw/api/TextColumn.java index f986f7544..001cbd514 100644 --- a/core/src/main/java/tech/tablesaw/api/TextColumn.java +++ b/core/src/main/java/tech/tablesaw/api/TextColumn.java @@ -397,6 +397,8 @@ public Iterator iterator() { return values.iterator(); } + /** {@inheritDoc} */ + @Override public Set asSet() { return new HashSet<>(values); } diff --git a/core/src/main/java/tech/tablesaw/api/TimeColumn.java b/core/src/main/java/tech/tablesaw/api/TimeColumn.java index 944358b1b..47e18f8cc 100644 --- a/core/src/main/java/tech/tablesaw/api/TimeColumn.java +++ b/core/src/main/java/tech/tablesaw/api/TimeColumn.java @@ -29,10 +29,7 @@ import java.sql.Time; import java.time.LocalTime; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Supplier; import java.util.stream.Stream; @@ -555,6 +552,11 @@ public boolean contains(LocalTime time) { return data.contains(t); } + @Override + public Set asSet() { + return new HashSet<>(unique().asList()); + } + /** {@inheritDoc} */ @Override public TimeColumn setMissing(int i) { diff --git a/core/src/main/java/tech/tablesaw/columns/AbstractColumnType.java b/core/src/main/java/tech/tablesaw/columns/AbstractColumnType.java index 82be3d517..fa892c973 100644 --- a/core/src/main/java/tech/tablesaw/columns/AbstractColumnType.java +++ b/core/src/main/java/tech/tablesaw/columns/AbstractColumnType.java @@ -33,24 +33,30 @@ protected AbstractColumnType(int byteSize, String name, String printerFriendlyNa ColumnType.register(this); } + /** {@inheritDoc} */ @Override public String toString() { return name; } + /** {@inheritDoc} */ @Override public String name() { return name; } + /** {@inheritDoc} */ public int byteSize() { return byteSize; } + /** {@inheritDoc} */ + @Override public String getPrinterFriendlyName() { return printerFriendlyName; } + /** {@inheritDoc} */ @Override public boolean equals(Object o) { if (this == o) return true; @@ -61,6 +67,7 @@ public boolean equals(Object o) { && Objects.equal(printerFriendlyName, that.printerFriendlyName); } + /** {@inheritDoc} */ @Override public int hashCode() { return Objects.hashCode(byteSize, name, printerFriendlyName); diff --git a/core/src/main/java/tech/tablesaw/columns/Column.java b/core/src/main/java/tech/tablesaw/columns/Column.java index 90c24f1e6..821d20c38 100644 --- a/core/src/main/java/tech/tablesaw/columns/Column.java +++ b/core/src/main/java/tech/tablesaw/columns/Column.java @@ -18,10 +18,7 @@ import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.ints.IntComparator; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Optional; +import java.util.*; import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.Predicate; @@ -177,6 +174,9 @@ default String title() { */ byte[] asBytes(int rowNumber); + /** Returns a Set containing all the unique values in this column */ + Set asSet(); + /** * Returns a {@link RollingColumn} with the given windowSize, which can be used for performing * calculations on rolling subsets of my data diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java b/core/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java index 562179c04..e9fe2e844 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java @@ -5,14 +5,17 @@ import tech.tablesaw.columns.AbstractColumnType; import tech.tablesaw.io.ReadOptions; +/** The {@link ColumnType} for {@link DoubleColumn} */ public class DoubleColumnType extends AbstractColumnType { private static final int BYTE_SIZE = 8; + /** Returns the default parser for DoubleColumn */ public static final DoubleParser DEFAULT_PARSER = new DoubleParser(ColumnType.DOUBLE); private static DoubleColumnType INSTANCE = new DoubleColumnType(BYTE_SIZE, "DOUBLE", "Double"); + /** Returns the singleton instance of DoubleColumnType */ public static DoubleColumnType instance() { if (INSTANCE == null) { INSTANCE = new DoubleColumnType(BYTE_SIZE, "DOUBLE", "Double"); @@ -24,16 +27,19 @@ private DoubleColumnType(int byteSize, String name, String printerFriendlyName) super(byteSize, name, printerFriendlyName); } + /** {@inheritDoc} */ @Override public DoubleColumn create(String name) { return DoubleColumn.create(name); } + /** {@inheritDoc} */ @Override public DoubleParser customParser(ReadOptions options) { return new DoubleParser(this, options); } + /** Returns true if the given value is the missing value indicator for this column type */ public static boolean valueIsMissing(double value) { return Double.isNaN(value); } @@ -44,6 +50,10 @@ public static boolean isMissingValue(double value) { return Double.isNaN(value); } + /** + * Returns the missing value indicator for this column type NOTE: Clients should use {@link + * DoubleColumnType:valueIsMissing()} to test for missing value indicators + */ public static double missingValueIndicator() { return Double.NaN; } diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/FloatColumnType.java b/core/src/main/java/tech/tablesaw/columns/numbers/FloatColumnType.java index 7de0d09c4..471e97cb6 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/FloatColumnType.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/FloatColumnType.java @@ -5,10 +5,12 @@ import tech.tablesaw.columns.AbstractColumnType; import tech.tablesaw.io.ReadOptions; +/** The {@link ColumnType} for {@link FloatColumn} */ public class FloatColumnType extends AbstractColumnType { public static final int BYTE_SIZE = 4; + /** Returns the default parser for {@link FloatColumn} */ public static final FloatParser DEFAULT_PARSER = new FloatParser(ColumnType.FLOAT); private static FloatColumnType INSTANCE; @@ -17,6 +19,7 @@ private FloatColumnType(int byteSize, String name, String printerFriendlyName) { super(byteSize, name, printerFriendlyName); } + /** Returns the singleton instance of FloatColumnType */ public static FloatColumnType instance() { if (INSTANCE == null) { INSTANCE = new FloatColumnType(BYTE_SIZE, "FLOAT", "float"); @@ -24,20 +27,24 @@ public static FloatColumnType instance() { return INSTANCE; } + /** {@inheritDoc} */ @Override public FloatColumn create(String name) { return FloatColumn.create(name); } + /** {@inheritDoc} */ @Override public FloatParser customParser(ReadOptions options) { return new FloatParser(this, options); } + /** Returns true if the given value is the missing value indicator for this column type */ public static boolean valueIsMissing(float value) { return Float.isNaN(value); } + /** Returns the missing value indicator for this column type */ public static float missingValueIndicator() { return Float.NaN; } diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/IntColumnType.java b/core/src/main/java/tech/tablesaw/columns/numbers/IntColumnType.java index 85cb475d9..cd91a3e56 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/IntColumnType.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/IntColumnType.java @@ -5,8 +5,10 @@ import tech.tablesaw.columns.AbstractColumnType; import tech.tablesaw.io.ReadOptions; +/** The {@link ColumnType} for {@link IntColumn} */ public class IntColumnType extends AbstractColumnType { + /** The default parser for IntColumn */ public static final IntParser DEFAULT_PARSER = new IntParser(ColumnType.INTEGER); private static final int BYTE_SIZE = 4; @@ -17,6 +19,7 @@ private IntColumnType(int byteSize, String name, String printerFriendlyName) { super(byteSize, name, printerFriendlyName); } + /** Returns the singleton instance of IntColumnType */ public static IntColumnType instance() { if (INSTANCE == null) { INSTANCE = new IntColumnType(BYTE_SIZE, "INTEGER", "Integer"); @@ -24,20 +27,24 @@ public static IntColumnType instance() { return INSTANCE; } + /** {@inheritDoc} */ @Override public IntColumn create(String name) { return IntColumn.create(name); } + /** {@inheritDoc} */ @Override public IntParser customParser(ReadOptions options) { return new IntParser(this, options); } + /** Returns true if the given value is the missing value indicator for this column type */ public static boolean valueIsMissing(int value) { return value == missingValueIndicator(); } + /** Returns the missing value indicator for this column type NOTE: */ public static int missingValueIndicator() { return Integer.MIN_VALUE; } diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/LongColumnType.java b/core/src/main/java/tech/tablesaw/columns/numbers/LongColumnType.java index abab51830..6ba3c79fc 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/LongColumnType.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/LongColumnType.java @@ -5,8 +5,10 @@ import tech.tablesaw.columns.AbstractColumnType; import tech.tablesaw.io.ReadOptions; +/** The {@link ColumnType} for {@link LongColumn} */ public class LongColumnType extends AbstractColumnType { + /** The default parser for LongColumn */ public static final LongParser DEFAULT_PARSER = new LongParser(ColumnType.LONG); private static final int BYTE_SIZE = 8; @@ -17,6 +19,7 @@ private LongColumnType(int byteSize, String name, String printerFriendlyName) { super(byteSize, name, printerFriendlyName); } + /** Returns the singleton instance of LongColumnType */ public static LongColumnType instance() { if (INSTANCE == null) { INSTANCE = new LongColumnType(BYTE_SIZE, "LONG", "Long"); @@ -24,24 +27,29 @@ public static LongColumnType instance() { return INSTANCE; } + /** {@inheritDoc} */ @Override public LongColumn create(String name) { return LongColumn.create(name); } + /** {@inheritDoc} */ public LongParser defaultParser() { return DEFAULT_PARSER; } + /** {@inheritDoc} */ @Override public LongParser customParser(ReadOptions options) { return new LongParser(this, options); } + /** Returns true if the given value is the missing value indicator for this column type */ public static boolean valueIsMissing(long value) { return value == missingValueIndicator(); } + /** Returns the missing value indicator for this column type NOTE: */ public static long missingValueIndicator() { return Long.MIN_VALUE; } diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/NumberInterpolator.java b/core/src/main/java/tech/tablesaw/columns/numbers/NumberInterpolator.java index 69fc02533..8335de1e3 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/NumberInterpolator.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/NumberInterpolator.java @@ -4,11 +4,15 @@ import tech.tablesaw.api.NumericColumn; import tech.tablesaw.interpolation.Interpolator; -/** Creates a new column with missing cells filled based off the value of nearby cells. */ +/** + * Creates a new column with missing cells filled based off the value of nearby cells.
+ * Subclass to provide alternate interpolation strategies + */ public class NumberInterpolator extends Interpolator { protected final NumericColumn col; + /** Constructs an interpolator for the given column */ public NumberInterpolator(NumericColumn col) { super(col); this.col = col; diff --git a/core/src/main/java/tech/tablesaw/columns/numbers/Stats.java b/core/src/main/java/tech/tablesaw/columns/numbers/Stats.java index 8e6cfce4e..d0fdfbeba 100644 --- a/core/src/main/java/tech/tablesaw/columns/numbers/Stats.java +++ b/core/src/main/java/tech/tablesaw/columns/numbers/Stats.java @@ -20,6 +20,10 @@ import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +/** + * An object that calculates in one pass a variety of common statistical values that describe a + * column + */ public class Stats { private long n; @@ -37,10 +41,12 @@ public class Stats { private double sumOfSquares; private final String name; + /** Constructs a Stats object with the given name */ private Stats(String name) { this.name = name; } + /** Constructs a Stats object from the given column */ public static Stats create(final NumericColumn values) { SummaryStatistics summaryStatistics = new SummaryStatistics(); for (int i = 0; i < values.size(); i++) { @@ -67,62 +73,77 @@ private static Stats getStats(NumericColumn values, SummaryStatistics summary return stats; } + /** Returns the range of values in the data */ public double range() { return (max - min); } + /** Returns the standard deviation of values in the data */ public double standardDeviation() { return standardDeviation; } + /** Returns the number of values in the data */ public long n() { return n; } + /** Returns the mean of values in the data */ public double mean() { return mean; } + /** Returns the smallest value */ public double min() { return min; } + /** Returns the largest value */ public double max() { return max; } + /** Returns the sum of the values */ public double sum() { return sum; } + /** Returns the sample variance of the values */ public double variance() { return variance; } + /** Returns the sum of squares of the values */ public double sumOfSquares() { return sumOfSquares; } + /** Returns the population variance of the values */ public double populationVariance() { return populationVariance; } + /** Returns the sum of the logs of the values */ public double sumOfLogs() { return sumOfLogs; } + /** Returns the geometric mean of the values */ public double geometricMean() { return geometricMean; } + /** Returns the quadratic mean of the values */ public double quadraticMean() { return quadraticMean; } + /** Returns the second moment of the values */ public double secondMoment() { return secondMoment; } + /** Returns the most common calculated statistics in tabular form */ public Table asTable() { Table t = Table.create(name); StringColumn measure = StringColumn.create("Measure"); @@ -157,6 +178,7 @@ public Table asTable() { return t; } + /** Returns all the calculated statistics in tabular form */ public Table asTableComplete() { Table t = asTable(); diff --git a/core/src/test/java/tech/tablesaw/api/DoubleColumnTest.java b/core/src/test/java/tech/tablesaw/api/DoubleColumnTest.java index 58ab8acf5..e9792e947 100644 --- a/core/src/test/java/tech/tablesaw/api/DoubleColumnTest.java +++ b/core/src/test/java/tech/tablesaw/api/DoubleColumnTest.java @@ -14,9 +14,7 @@ package tech.tablesaw.api; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.util.ArrayList; import java.util.Arrays; @@ -84,4 +82,12 @@ public void testCustomParser() { col.appendCell("5.0"); assertFalse(col.isMissing(col.size() - 1)); } + + @Test + public void asSet() { + final double[] values = {4, 5, 9.3, 5, 9.3}; + final DoubleColumn c = DoubleColumn.create("fc", values); + assertEquals(3, c.asSet().size()); + assertTrue(c.asSet().contains(4.0)); + } } diff --git a/core/src/test/java/tech/tablesaw/api/FloatColumnTest.java b/core/src/test/java/tech/tablesaw/api/FloatColumnTest.java index 5dd1a639e..c6433daf7 100644 --- a/core/src/test/java/tech/tablesaw/api/FloatColumnTest.java +++ b/core/src/test/java/tech/tablesaw/api/FloatColumnTest.java @@ -79,4 +79,12 @@ public void testCustomParser() { floatColumn.appendCell("5.0"); assertFalse(floatColumn.isMissing(floatColumn.size() - 1)); } + + @Test + public void asSet() { + final float[] floatColumnValues = {4, 5, 9.3f, 5, 9.3f}; + final FloatColumn floatColumn = FloatColumn.create("fc", floatColumnValues); + assertEquals(3, floatColumn.asSet().size()); + assertTrue(floatColumn.asSet().contains(4f)); + } }