@@ -0,0 +1,228 @@

/*
* Copyright (c) 2018.
*/

package com.framex.core

import com.framex.utils.FrameErrorMessages

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.reflect.ClassTag

class FrameX2(val data: Vector[Vector[ElemX]], val columnMap: Map[String, Int] = Map()) {

// used in FrameStats!
var aggMap = Map[String, Map[String, Int]]()

def columnNames = {
this.columnMap.toSeq.sortWith(_._2 < _._2).map(_._1).toList
}

def shape(): (Int, Int) = {
(data(0).size, data.size)
}

def ndim = 2

def head(n: Int = 5): FrameX2 = FrameX2(data.map(c => c.slice(0, n)), this.columnNames)


def tail(n: Int = 5): FrameX2 = FrameX2(data.map(c => c.slice(c.size - n, c.size)), this.columnNames)

def :: = 0 to data(0).size

def loc(index: Range, columns: List[String]): FrameX2 = {
val d: Vector[Vector[ElemX]] = columns.map { c =>
columnMap.get(c) match {
//note that contrary to usual python slices, both the start and the stop are included!
case Some(columnIdx) => data(columnIdx).slice(index.start, index.end + 1)
case None => throw new Exception(FrameErrorMessages.INDEX_OUT_OF_SIZE)
}
}
.toVector
FrameX2(d, columns)
}

def loc(index: Range, columnName: String): FrameX2 = {
columnMap.get(columnName) match {
case Some(columnIdx) =>
//note that contrary to usual python slices, both the start and the stop are included!
val series: Vector[ElemX] = data(columnIdx).slice(index.start, index.end + 1)
FrameX2(Vector(series), List(columnName))
case None => throw new Exception(FrameErrorMessages.INDEX_OUT_OF_SIZE)
}
}


def apply(rowIdx: Int): FrameX2 = {
val row = new ListBuffer[Vector[ElemX]]()
data.foreach(seq => {
row += Vector(seq(rowIdx))
})
FrameX2(row.toVector)
}

def apply(rowFrom: Int, rowTo: Int): FrameX2 = {
val row = new ListBuffer[Vector[ElemX]]()
data.foreach(seq => {
row ++= Vector(seq.slice(rowFrom, rowTo))
})
FrameX2(row.toVector)
}

// def apply(columnName: String) : FrameX = {
// columnMap.get(columnName) match {
// case None => throw new Exception("")
// case Some(columnIndex) => {
// FrameX(this.data(columnIndex), columnNames=List(columnName))
// }
// }
// }


def append(that: FrameX2): FrameX2 = {
if (this.columnMap != that.columnMap) {
throw new Exception(FrameErrorMessages.COLUMN_NAMES_MISMATCH)
}

FrameX2(this.data.zip(that.data).map(x => x._1 ++ x._2), this.columnNames)
}


def groupBy(columnNames: List[String]): Option[GroupByFrameX] = {

val groupbyData: Map[Vector[ElemX], FrameX2] = Map[Vector[ElemX], FrameX2]()
val columnIndexs: List[Int] = columnNames.flatMap(this.columnMap.get)
var dataMap = mutable.Map[String, FrameX2]()

this.data.transpose.groupBy((record: Vector[ElemX]) => (columnIndexs.map(record(_))).toString())
.foreach(kv => dataMap += (kv._1 -> FrameX2(kv._2.transpose, this.columnNames))
)
Some(new GroupByFrameX(dataMap))
}

def groupBy(columnName: String): Option[GroupByFrameX] = {
this.groupBy(List(columnName))
}

def applyMap(fn : ElemX => ElemX) : FrameX2 = {

val applyMapData = this.data.map( columnData => {
columnData.map( elem => fn(elem))})
val applyMapFrame = FrameX2(applyMapData, this.columnNames)
applyMapFrame.aggMap = this.aggMap
applyMapFrame
}

def prettyPrint(): Unit = {

val columnWidths: ListBuffer[Int] = new ListBuffer[Int]
List.range(0, this.data.length).map(colIdx => {
val col = this.data(colIdx)
val rowDataMaxLength = col.map(x => x.elem.toString.length).max
columnWidths.append(math.max(rowDataMaxLength, columnNames(colIdx).length))
})

val sb: StringBuilder = new StringBuilder()
// print header
for (i <- 0 to this.data.length - 1) {
sb.append(" | ")
sb.append(columnNames(i) + " " * (columnWidths(i) - columnNames(i).length))
}
val allLength = sb.length
sb.append("\n")
sb.append("-" * allLength + "\n")

val dfHead = this.head()
for (rowNum <- 0 to dfHead.data(0).length - 1) {

val rowData = dfHead.data.map {
_ (rowNum)
}
for ((elemX, idx) <- rowData.view.zipWithIndex) {
sb.append(" | ")
sb.append(elemX.elem.toString + " " * (columnWidths(idx) - elemX.elem.toString.length))
}
sb.append("\n")
}
sb.toString().split("\n").foreach(println)
}

def sameElements(that: FrameX2): Boolean = {

if (this.columnMap != that.columnMap) {
return false
}
if (this.aggMap != that.aggMap) {
return false
}

val thisEachCol = this.data.iterator
val thatEachCol = that.data.iterator
while (thisEachCol.hasNext && thatEachCol.hasNext) {
val theseElem = thisEachCol.next.iterator
val thoseElem = thatEachCol.next.iterator
while (theseElem.hasNext && thoseElem.hasNext) {
if (!theseElem.next.elem.equals(thoseElem.next.elem)) {
return false
}
}
!theseElem.hasNext && !thoseElem.hasNext
}
!thisEachCol.hasNext && !thatEachCol.hasNext
}

override def equals(obj: Any): Boolean = obj match {
case that: FrameX2 => this sameElements that
case _ => false
}

override def hashCode(): Int = super.hashCode()
}

object FrameX2 {

def apply(data: Vector[Vector[_]])(implicit ct: ClassTag[ElemX]): FrameX2 = {
val lenOfCol = data.map(_.size)
if (lenOfCol.distinct.size != 1) {
throw new Exception(FrameErrorMessages.COLUMN_SIZE_MISMATCH)
}
val columnNames : List[String] = List.range(0, data.length).map(_.toString)
FrameX2(data.map(_.map(ElemX.wrapper)), columnNames)
}

def apply(data: Vector[Vector[_]], columns: List[String])(implicit ct: ClassTag[ElemX]): FrameX2 = {
val lenOfCol = data.map(_.size)
if (lenOfCol.distinct.size != 1) {
throw new Exception(FrameErrorMessages.COLUMN_SIZE_MISMATCH)
}
new FrameX2(data.map(_.map(ElemX.wrapper)), columns.zipWithIndex.toMap)
}

def apply(data_ : Vector[ElemX]): FrameX2 = {
val newFrame = Vector()
FrameX2(newFrame :+ data_, List("0"))
}

def apply(ll: List[List[_]])(implicit ct: ClassTag[ElemX]): FrameX2 = {
val lenOfCol = ll.map(_.size)
if (lenOfCol.distinct.size != 1) {
throw new Exception("COLUMNS' SIZE MUST SAME!")
}
val columnNames : List[String] = List.range(0, ll.length).map(_.toString)
FrameX2(ll.map(_.toVector).toVector, columnNames)
}

def apply(ll: List[List[_]], columns: List[String]): FrameX2 = {
val lenOfCol = ll.map(_.size)
if (lenOfCol.distinct.size != 1) {
throw new Exception("COLUMNS' SIZE MUST SAME!")
}
if (ll.size != columns.size) {
throw new Exception("column_names' size is not equal to real data size")
}
FrameX2(ll.map(_.toVector).toVector, columns)
}
}
@@ -6,7 +6,7 @@ package com.framex.core

import scala.collection.mutable

class GroupByFrameX(var dataMap: mutable.Map[String, FrameX]) {
class GroupByFrameX(var dataMap: mutable.Map[String, FrameX2]) {

import com.framex.stats.Stats._
def agg(opName: String) : GroupByFrameX = {
@@ -3,27 +3,108 @@
*/
package com.framex.core

import scalaz.{Coyoneda, _}
import com.framex.core.Expr.BottomType
import scalaz.{:+:, Applicative, Functor, Monad}
import shapeless.HList

//object Foo {
// type ISB = Int :+: String :+: Boolean :+: CNil
//
//}

sealed trait TField[A]
case object TFNone
case class IntField(x: Int) extends TField[Int]
case class DoubleField(x: Double) extends TField[Double]
case class StringField(x: String) extends TField[String]

object TField {
type CoyoTField[A] = Coyoneda[TField, A]
/** A free monad over the free functor generated by `S` */
// type FreeC[S[_], A] = Free[({type f[x] = Coyoneda[S, x]})#f, A]
// type FreeTField[A] = FreeC[TField, A]


trait Elem[A]
case class ElemData[A](x: A) extends Elem[A]
case class ElemMap[A, B](func: A => B, elem: Elem[A]) extends Elem[B]
case class ElemGroup[A, B]()

object Elem {

implicit val elemFunctor : Functor[Elem] = new Functor[Elem] {
override def map[A, B](fa: Elem[A])(f: A => B): Elem[B] = {
val e = ElemMap(f, fa)
fa match {
case ElemData(x: A) => ElemData(e.func.apply(x))
case _ => throw new Exception("呵呵吼")
}
}
}

}


trait ElemSeq[A]
case class ElemSeqData[A](name: String, data: Vector[A]) extends ElemSeq[A]
case class ElemSeqMap[A, B](func: A => B, elem: ElemSeq[A]) extends ElemSeq[B]
case class ElemGroupBy[A](map: Map[HList, ElemSeq[A]]) extends ElemSeq[A]


object ElemSeq {

implicit val elemSeqFunctor : Functor[ElemSeq] = new Functor[ElemSeq] {
override def map[A, B](fa: ElemSeq[A])(f: A => B): ElemSeq[B] = {

val seq = ElemSeqMap(f, fa)
fa match {
case ElemSeqData(name, data) => ElemSeqData(name, data.map(x => f(x)))
case _ => throw new Exception("阿西吧")
}

}
}

implicit val elemSepApplicative : Applicative[ElemSeq] = new Applicative[ElemSeq] {
override def point[A](a: => A): ElemSeq[A] = ???

override def ap[A, B](fa: => ElemSeq[A])(f: => ElemSeq[A => B]): ElemSeq[B] = ???
}

// implicit val elemSeqMonad : Monad[ElemSeq] = new Monad[ElemSeq] {
// override def bind[A, B](fa: ElemSeq[A])(f: A => ElemSeq[B]): ElemSeq[B] = {
//
// fa match {
// case ElemSeqData(name, vec: Vector[A]) => {
// ElemSeqData(name, vec.map(f).)
// }
// case _ => throw new Exception("")
// }
//
// def liftFC[S[_], A](s: S[A]): FreeC[S, A] =
// Free.liftF(Coyoneda lift s)
// }
// override def point[A](a: => A): ElemSeq[A] = ElemSeqData("", Vector())
// }
}

trait Frame
case class FrameData(data: Vector[ElemSeq[_]]) extends Frame {}
case class FrameGroupBy(data: Map[HList, Vector[ElemSeq[_]]]) extends Frame
case class FrameMap[A, B](func: A => B, data: Frame) extends Frame


object Frame {

import ElemSeq.elemSeqFunctor
import scalaz.Scalaz._
import scalaz.Functor

val SeqFuntor = ElemSeq.elemSeqFunctor


def applyMap[A, B](fa: Frame)(f: A => B) : Frame = {



fa match {
case FrameData(data) => {

var ll = new collection.mutable.ListBuffer[ElemSeq[_]]()
data.foreach(seq => {
seq match {
case ElemSeqData(name, seqData: Vector[A]) => {
val r = SeqFuntor.map(ElemSeqData(name, seqData: Vector[A]))(f)
ll += r
}
case _ => throw new Exception("")
}
})
FrameData(ll.toVector)
}
case _ => throw new Exception("")
}
}
}

This file was deleted.

@@ -5,7 +5,7 @@
package com.framex.io

import com.framex.core.Expr.{ExDouble, ExString}
import com.framex.core.{ElemX, FrameX}
import com.framex.core.{ElemX, FrameX2}
import com.framex.utils.{CSVHandler, Constants}
import org.nd4j.linalg.api.ndarray.INDArray

@@ -16,7 +16,7 @@ object FrameXIO {


def readCSV(fileName: String,
sep: String): FrameX = {
sep: String): FrameX2 = {

val bufferedSource = CSVHandler.getSourceFromCSV(fileName)
val headerNames = CSVHandler.getHeaderFromSource(bufferedSource)
@@ -32,19 +32,19 @@ object FrameXIO {
dataBuffer += columnData.map(s => ElemX(ExString(s)))
}
}
FrameX(dataBuffer.toList, headerNames)
FrameX2(dataBuffer.toList, headerNames)
}

def toCSV(df: FrameX, fileName: String): Unit = {
def toCSV(df: FrameX2, fileName: String): Unit = {
???
}

def readND4J(nd4jData: INDArray): FrameX = {
def readND4J(nd4jData: INDArray): FrameX2 = {
val data = nd4jData.transpose().toDoubleMatrix().map(f => f.map(ElemX.wrapper).toVector).toVector
FrameX(data)
FrameX2(data)
}

def toND4J(df: FrameX): Any = {
def toND4J(df: FrameX2): Any = {
???
}

@@ -4,14 +4,14 @@

package com.framex.stats

import com.framex.core.{ElemX, FrameX}
import com.framex.core.{ElemX, FrameX2}
import com.framex.utils.FrameErrorMessages

object Stats {

implicit class FrameStats(var df: FrameX) {
implicit class FrameStats(var df: FrameX2) {

def agg(opName: String): FrameX = {
def agg(opName: String): FrameX2 = {
val data = df.data.map(columnData => {
Vector(getBasicStatsOp(opName).apply(columnData))
})
@@ -22,12 +22,12 @@ object Stats {
kv._1 -> (opName -> kv._2)
}
}.mapValues(Map(_))
val dfAgg = FrameX(data)
val dfAgg = FrameX2(data)
dfAgg.aggMap = aggMap
dfAgg
}

def agg(opNames: List[String]): FrameX = {
def agg(opNames: List[String]): FrameX2 = {
val data = df.data.flatMap(columnData => {
opNames.map(op => Vector(getBasicStatsOp(op).apply(columnData))).toVector
})
@@ -45,12 +45,12 @@ object Stats {
}
}.toMap
}
val dfAgg = FrameX(data)
val dfAgg = FrameX2(data)
dfAgg.aggMap = aggMap
dfAgg
}

def agg(opMap: Map[String, List[String]]): FrameX = {
def agg(opMap: Map[String, List[String]]): FrameX2 = {
var columnIndexAcc = 0
val iterable = {
for {
@@ -79,7 +79,7 @@ object Stats {
case (col, list) =>
col -> list.map(_._2).toMap
}
val dfAgg = FrameX(data)
val dfAgg = FrameX2(data)
dfAgg.aggMap = aggMap
dfAgg
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -5,7 +5,7 @@
package com.framex

import com.framex.core.Expr.ExDouble
import com.framex.core.{ElemX, FrameX}
import com.framex.core.{ElemX, FrameX2}
import com.framex.io.FrameXIO
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.factory.Nd4j
@@ -16,7 +16,7 @@ class TestIO extends FlatSpec with Matchers {
it should "read from ND4J" in {
val arr1: INDArray = Nd4j.create(Array[Float](1, 2, 3, 4), Array[Int](2, 2))
val df = FrameXIO.readND4J(arr1)
df.equals(FrameX(
df.equals(FrameX2(
Vector(
Vector(ElemX(ExDouble(1)), ElemX(ExDouble(3))),
Vector(ElemX(ExDouble(2)), ElemX(ExDouble(4)))
@@ -4,29 +4,61 @@

package com.framex

import com.framex.core.{IntField, TField}
import com.framex.core.TField.CoyoTField
import com.framex.core._
import org.scalatest.{FlatSpec, Matchers}
import scalaz.Coyoneda

class TestTField extends FlatSpec with Matchers {


it should "test coyoneda" in {
it should "test applyMap" in {


import Elem.elemFunctor
import scalaz.Scalaz._



val fn = (x: Int) => x * 2
val e1 : Elem[Int] = ElemData(100)
val e2 = e1 map fn
e2 match {
case ElemData(x) => print(x)
case _ => print("axiba")
}
print("hhhh")

val elemSeq1 = ElemSeqData("title", Vector(1,2,3,4,5))
val ff1 = FrameData(Vector(elemSeq1))

val ff2 = Frame.applyMap(ff1)(fn)
print(ff2)


import scalaz._

val fn = (x :Int) => x + 2
val t1 : Coyoneda[TField, Int] = Coyoneda.lift(IntField(2))
print(t1.fi)
val t2 = t1.map(fn)
print(t2.fi)
print("foobar")


//
// val ct2 = TField.fmap(n)(fn)
// print("axibabababab")
// print(ct2.unlift)
}
// it should "test coyoneda" in {
//
// type ISB = Int :+: Double :+: String :+: CNil
//
// object size extends Poly1 {
// implicit def caseInt = at[Int](i => (i, i))
// implicit def caseString = at[String](s => (s, s.length))
// implicit def caseBoolean = at[Boolean](b => (b, 1))
// }
//
// object reverse2 extends Poly1 {
// implicit def caseString = at[String](x => x.reverse)
// }
//
// val isb = Coproduct[ISB]("foo")
// val isbr = isb map reverse2
// print(isbr.select[String])
//
// print("123")
//
//
//
// }
}