In [None]:
# setup-- 
import os
import pyspark
from splicemachine.spark.context import PySpliceContext
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession

# make sure pyspark tells workers to use python3 not 2 if both are installed
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
jdbc_host = os.environ['JDBC_HOST']

conf = pyspark.SparkConf()
sc = pyspark.SparkContext(conf=conf)

spark = SparkSession.builder.config(conf=conf).getOrCreate()

splicejdbc=f"jdbc:splice://{jdbc_host}:1527/splicedb;user=splice;password=admin"

splice = PySpliceContext(spark, splicejdbc)


In [None]:
%%sql
%defaultDatasource jdbc:splice://jrtest01-splice-hregion:1527/splicedb;user=splice;password=admin

<link rel="stylesheet" href="https://doc.splicemachine.com/zeppelin/css/zepstyles.css" />

# Crystal Ball
The Crystal Ball application is a supply chain reference application relevant to manufacturers, distributors, retailers, and e-commerce companies. With the crystall ball you can:

1. Perform Available-to-Promise (ATP) inquiries in seconds on real-time inventory changes due to purchases, manufacturing, sales, and shipments
2. Learn when shipments are likely to be late
3. Anticipate stock outs due to predicted late orders
4. Determine what customers or downstream orders are affected by anticipated stockouts.

When supply chain managers have the crystal ball, they can:
-- plan around stock outs
-- warn down stream consumers so they can re-plan.



In [None]:
%%scala 
  println("Please copy and paste your JDBC URL. You can find it at the bottom right of your cluster dashboard")
//   val defaultJDBCURL = z.input("JDBCurl","""jdbc:splice://{FRAMEWORKNAME}-proxy.marathon.mesos:1527/splicedb;user=splice;password=admin;useSpark=true""").toString
  val defaultJDBCURL = "jdbc:splice://jrtest01-splice-hregion:1527/splicedb;user=splice;password=admin"
  val localJDBCURL = """jdbc:splice://localhost:1527/splicedb;user=splice;password=admin"""


In [None]:
%%scala 
import java.sql.{Connection,Timestamp}
import java.util.Date
import com.splicemachine.si.api.txn.WriteConflict
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
import com.splicemachine.spark.splicemachine._
    
  val table = "TimeLine_Int"
  val schema = "TimeLine"
  val internalTN = schema + "." + table
  val startOfTimeStr = "1678-01-01 00:00:00"
  val endOfTimeStr = "2261-12-31 00:00:00"
  val startOfTime = java.sql.Timestamp.valueOf(startOfTimeStr)
  val endOfTime = java.sql.Timestamp.valueOf(endOfTimeStr)
  val MAX_RETRIES: Integer = 2

  val SQL_ID = 1
  val SQL_ST = 2
  val SQL_ET = 3
  val SQL_VAL = 4
  val DF_ID = 0
  val DF_ST = 1
  val DF_ET = 2
  val DF_VAL = 3
  
  val columnsWithPrimaryKey: String  = "(Timeline_Id bigint, " + "ST timestamp, " + "ET timestamp, " + "Val bigint, " + "primary key (Timeline_ID, ST)" +")"
  val columnsWithoutPrimaryKey = "(" + "Timeline_Id bigint, " + "ST timestamp, " + "ET timestamp, " + "Val bigint " + ")"
  val primaryKeys = Seq("Timeline_ID, ST")
  val columnsInsertString = "(" + "Timeline_Id, " + "ST, " + "ET, " + "Val" + ") "
  val columnsSelectString = "Timeline_Id, " + "ST, " + "ET, " + "Value"
  val columnsInsertStringValues = "values (?,?,?,?)"


  /* (t1<=ST and t2>ST) or (t1>ST and t1<ET)  (t1 t2 t1 t1 )*/
  val overlapCondition = "where Timeline_Id = ? and ((ST >=? and ST <?) or ((ST < ?) and (ET > >?)))"


  val internalOptions = Map(
    org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_TABLE_NAME -> internalTN,
    org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_URL -> defaultJDBCURL
  )

  val internalJDBCOptions = new org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions(internalOptions)
  val splicemachineContext =  new com.splicemachine.spark.splicemachine.SplicemachineContext(defaultJDBCURL)



  /**
    *
    * createTimeline (table)
    *
    * @param table table name of timeline
    * @return
    */
  def createTimeline(table: String, columnsWithPrimaryKey: String , internalJDBCOptions: JDBCOptions = internalJDBCOptions ): Unit = {
    val conn = JdbcUtils.createConnectionFactory(internalJDBCOptions)()
    if (splicemachineContext.tableExists(table)){
      conn.createStatement().execute("drop table " + table)
    }
    conn.createStatement().execute("create table " + table + columnsWithPrimaryKey)
  }
  
  
  
  /**
    *
    * initialize (id startOfTime endOfTime value)
    *
    * @param table table name of timeline
    * @param id id of timeline
    * @param value initial value of timeline
    * @return
    */
 def initialize(table: String, id: Integer, value: Integer, columnsInsertString : String = columnsInsertString ,columnsInsertStringValues :String = columnsInsertStringValues  , internalJDBCOptions :JDBCOptions = internalJDBCOptions ): Unit = {
    val conn = JdbcUtils.createConnectionFactory(internalJDBCOptions)()
    val start: Timestamp = startOfTime
    val end: Timestamp = endOfTime
    try {
      var ps = conn.prepareStatement("delete from " + table + " where timeline_id = " + id)
      ps.execute()
      ps = conn.prepareStatement("insert into " + table + columnsInsertString + columnsInsertStringValues)
      ps.setInt(SQL_ID, id)
      ps.setTimestamp(SQL_ST, start)
      ps.setTimestamp(SQL_ET, end)
      ps.setInt(SQL_VAL, value)
      ps.execute()
    } finally {
      conn.close()
    }
  }

  val CHANGE_AT_ST = 0
  val CHANGE_AT_ET = 1
  val CHANGE_BETWEEN_ST_ET =2
  

  /**
    * splitMiddle - The new delta interval is subsumed by one interval.
    *
    *  ST------------ET
    *      t1---t2         ==>   ST---t1 t1----t2 t2----ET
    *
    * Change the original interval to end at the start of the new delta interval
    * Create a new record for the delta and apply the delta value
    * Create a new record for the interval from the delta to the end of the original interval
    *
    * @param id - the id of the timeline to update
    * @param t1 - the start of new delta
    * @param t2 - the end of the new delta
    * @param delta - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                      CHANGE_AT_ET persists delta from t2 onwards
    *                      CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    *
    */
    
                
  def splitMiddle(id: Integer,
                  t1: java.sql.Timestamp, t2: java.sql.Timestamp,
                  delta: Long,
                  persistence: Int,
                  internalTN : String =internalTN ,
                  internalOptions : Map[String,String] = internalOptions): Unit = {
    val df = sqlContext.read.options(internalOptions).splicemachine.where(s"TIMELINE_ID = $id AND ST < to_utc_timestamp('$t1','GMT') AND ET > to_utc_timestamp('$t2','GMT')")
    if (df.count() > 0) {

      /* Save old values */
      var oldVal = df.first().getLong(DF_VAL)
      var oldET = df.first().getTimestamp(DF_ET)

      /* Update containing interval to be the begin split */
      val updatedDF = df
        .filter(s"TIMELINE_ID = $id AND ST < to_utc_timestamp('$t1','GMT') AND ET > to_utc_timestamp('$t2','GMT')")
        .select("TIMELINE_ID", "ST", "ET", "VAL")
      .withColumn("ET", lit(t1))
      splicemachineContext.update(updatedDF, internalTN)

      /* calculate persistence */
      val firstValue: Long = persistence match {
        case CHANGE_AT_ST          => oldVal + delta
        case CHANGE_AT_ET          => oldVal
        case CHANGE_BETWEEN_ST_ET  => oldVal + delta
        case _                     => 0
      }

      /* Insert the two new splits */
      /* Note - the second new split will have delta added
		in the persistAfter method
       */
      val newDF = sqlContext.createDataFrame(Seq(
        (id, t1, t2, firstValue),
        (id, t2, oldET, oldVal)))
        .toDF("TIMELINE_ID", "ST", "ET", "VAL")
      splicemachineContext.insert(newDF, internalTN)
    }
  }


  /***
    * 	splitAtEnd - Delta overlaps beginning of interval.
    *
    *         ST------ET
    *      t1---t2         ==>  ST---t2 t2----ET
    *
    * Change the interval to end at the end of the delta then add a split from end of delta to the end of interval
    *
    * @param id - the id of the timeline to update
    * @param t1 - the start of new delta
    * @param t2 - the end of the new delta
    * @param delta - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                      CHANGE_AT_ET persists delta from t2 onwards
    *                      CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    */
    
     
  def splitAtEnd(id: Integer,
                 t1: java.sql.Timestamp, t2: java.sql.Timestamp,
                 delta: Long,
                 persistence: Int,
                  internalTN : String =internalTN ,
                  internalOptions : Map[String,String] = internalOptions): Unit = {
    val df = sqlContext.read.options(internalOptions).splicemachine
      .where(s"""TIMELINE_ID = $id AND ST >= to_utc_timestamp('$t1','GMT') AND ET > to_utc_timestamp('$t2','GMT') AND ST < to_utc_timestamp('$t2','GMT')""")

    if (df.count() > 0) {

      /* Save old values */
      var oldVal = df.first().getLong(DF_VAL)
      var oldST = df.first().getTimestamp(DF_ST)
      var oldET = df.first().getTimestamp(DF_ET)
      /* Update overlapping interval to be the begin split */

      /* calculate persistence */
      /* Note - the second new split will have delta added
          in the persistAfter method if required
 */
      val firstValue: Long = persistence match {
        case CHANGE_AT_ST          => oldVal + delta
        case CHANGE_AT_ET          => oldVal
        case CHANGE_BETWEEN_ST_ET  => oldVal + delta
        case _                     => 0
      }

      val updatedDF = df
	    .filter(s"TIMELINE_ID = $id AND ST >= to_utc_timestamp('$t1','GMT') AND ET > to_utc_timestamp('$t2','GMT') AND ST < to_utc_timestamp('$t2','GMT')")
        .select("TIMELINE_ID", "ST", "ET", "VAL")
      .withColumn("ET", lit(t2))
      .withColumn("VAL", lit(firstValue))
      splicemachineContext.update(updatedDF, internalTN)

      /* Insert a new split after the delta */
      val newDF = sqlContext.createDataFrame(Seq((id, t2, oldET, oldVal))).toDF("TIMELINE_ID", "ST", "ET", "VAL")
      splicemachineContext.insert(newDF, internalTN)
    }
  }

  /**
    * 	splitAtStart - Delta overlaps end of interval.
    *
    *         ST-----ET
    *            t1------t2         ==>    ST---t1 t1---ET
    *
    * Change the interval to end at the start of the delta then add a split from beginning of delta to the end of interval
    *
    * @param id - the id of the timeline to update
    * @param t1 - the start of new delta
    * @param t2 - the end of the new delta
    * @param delta - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                      CHANGE_AT_ET persists delta from t2 onwards
    *                      CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    */
   
  def splitAtStart(id: Integer,
                   t1: java.sql.Timestamp, t2: java.sql.Timestamp,
                   delta: Long, persistence: Int,
                  internalTN : String =internalTN ,
                  internalOptions : Map[String,String] = internalOptions): Unit = {
    val df = sqlContext.read.options(internalOptions).splicemachine.where(s"TIMELINE_ID = $id AND ST < to_utc_timestamp('$t1','GMT') AND " +
                s"ET < to_utc_timestamp('$t2','GMT') AND ET > to_utc_timestamp('$t1','GMT')")
    if (df.count() > 0) {

      /* Save old values */
      var oldVal = df.first().getLong(DF_VAL)
      var oldST = df.first().getTimestamp(DF_ST)
      var oldET = df.first().getTimestamp(DF_ET)

      /* calculate persistence */
      val newValue: Long = persistence match {
        case CHANGE_AT_ST          => oldVal + delta
        case CHANGE_AT_ET          => oldVal
        case CHANGE_BETWEEN_ST_ET  => oldVal
        case _                     => 0
      }
      /* Update overlapping interval to be the begin split */
      val updatedDF = df
        .filter(s"TIMELINE_ID = $id AND ST < to_utc_timestamp('$t1','GMT') AND ET < to_utc_timestamp('$t2','GMT') AND ET > to_utc_timestamp('$t1','GMT')")
        .select("TIMELINE_ID", "ST", "ET", "VAL")
      .withColumn("ET", lit(t1))
      .withColumn("VAL", lit(newValue))
      splicemachineContext.update(updatedDF, internalTN)
      
      /* Insert a new split */
      val newDF = sqlContext.createDataFrame(Seq(
        (id, t1, oldET, oldVal)
      )).toDF("TIMELINE_ID", "ST", "ET", "VAL")
      splicemachineContext.insert(newDF, internalTN)
    }
 }
                   
    
  /***
    *   changeNoSplit - Handles all intervals contained by delta
    *
    *           ST-----ET
    *     t1---------------t2
    *
    *  No splits required since always initialized with infinite time, just need values changed
    *
    * @param id - the id of the timeline to update
    * @param t1 - the start of new delta
    * @param t2 - the end of the new delta
    * @param delta - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                      CHANGE_AT_ET persists delta from t2 onwards
    *                      CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    */
  def changeNoSplit(id: Integer,
                    t1: java.sql.Timestamp, t2: java.sql.Timestamp,
                    delta: Long,
                    persistence: Int,
                  internalTN : String =internalTN ,
                  internalOptions : Map[String,String] = internalOptions): Unit = {
           
    val df = sqlContext.read.options(internalOptions).splicemachine
      .where(s"TIMELINE_ID = $id AND ST >= to_utc_timestamp('$t1','GMT') AND ET <= to_utc_timestamp('$t2','GMT')")

    /* Calculate persistence */
    val increment: Long = persistence match {
      case CHANGE_AT_ST          => delta
      case CHANGE_AT_ET          => 0
      case CHANGE_BETWEEN_ST_ET  => delta
      case _                     => 0
    }
    val updatedDF = df
      .filter(s"TIMELINE_ID = $id AND ST >= to_utc_timestamp('$t1','GMT') AND ET <= to_utc_timestamp('$t2','GMT')")
      .select("TIMELINE_ID", "ST", "ET", "VAL")
      .withColumn("VAL", col("VAL") + increment)

    splicemachineContext.update(updatedDF, internalTN)
  }

  /***
    *   persistAfter - changes the values for all intervals after delta
    *
    *     t1---------------t2  ST-----ET
    *
    *  No splits required since always initialized with infinite time, just need values changed
    *
    * @param id - the id of the timeline to update
    * @param t1 - the start of new delta
    * @param t2 - the end of the new delta
    * @param delta - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                      CHANGE_AT_ET persists delta from t2 onwards
    *                      CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    */
  def persistAfter(id: Integer,
                   t1: java.sql.Timestamp, t2: java.sql.Timestamp,
                   delta: Long,
                   persistence: Int,
                  internalTN : String =internalTN ,
                  internalOptions : Map[String,String] = internalOptions): Unit = {

    /* Persist delta after new splits if necesary */
    if (persistence != CHANGE_BETWEEN_ST_ET) {
      val persistDF = sqlContext.read.options(internalOptions).splicemachine
        .filter(s"TIMELINE_ID = $id AND ST >= to_utc_timestamp('$t2','GMT')")
        .select("TIMELINE_ID", "ST", "ET", "VAL")
      .withColumn("VAL", col("VAL") + delta)
      splicemachineContext.update(persistDF, internalTN)
    }
  }

  /** *
    * update - increases/decreases the value for the interval
    * from the start, end or during the interval
    *
    * @param table       - the name of the timeline table
    * @param id          - the id of the timeline to update
    * @param t1          - the start of new delta
    * @param t2          - the end of the new delta
    * @param delta       - an integer increment to the timeline
    * @param persistence - CHANGE_AT_ST persists delta from t1 onwards
    *                    CHANGE_AT_ET persists delta from t2 onwards
    *                    CHANGE_BETWEEN_ST_ET persists delta during [t1 t2]
    */

  def update(table: String,
             id: Integer,
             t1: Timestamp, t2: Timestamp,
             delta: Long,
             persistence: Int): Unit = {
                

   val intOptions = Map(
    org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_TABLE_NAME -> table,
    org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_URL -> defaultJDBCURL
  )
    changeNoSplit(id, t1, t2, delta, persistence, table,intOptions )
    splitAtStart(id, t1, t2, delta, persistence, table,intOptions)
    splitMiddle(id, t1, t2, delta, persistence, table,intOptions)
    splitAtEnd(id, t1, t2, delta, persistence, table,intOptions)
    persistAfter(id, t1, t2, delta, persistence, table,intOptions)
  }








### Application Requirements

* Streaming inventory movements - ASN - EDI856
* Streaming ancilary data like real-time weather
* Needle-in-Haystack OLTP queries and range scans for ATP
* ACID Transactions for concurrent inventory changes
* OLAP for analysis and feature engineering
* OLAP for inventory projections
* Machine Learning
* UI for Collaboration

Splice Machine has all these components pre-integrated and optimized

### Ingest Advanced Shipping Notices - Transfer Orders

Here you can copy and paste these data ingestion calls.

The data was generated by a supply-chain simulator http://localhost:8080/#/notebook/2CKG62TQU.

The simulator ticks through time randomly inserting Transfer Orders and also randomly inserts changes to Transfer Orders delivery dates. 

The generator randomly selects features for the transfer orders. 

The files below reflect the state of the database after the simulation runs.

The orders and change orders are independent files that can be loaded separately.

The simulation runs are cumulative meaning demo has the inventory timelines for the test data and the train data. 

So to use the demo data. Load train and test for orders and change orders and the load the demo inventory timeline file.


In [None]:
%%scala 

z.run("20170622-063514_1166002275"); // JDBC URL
z.run("20170622-231413_1135446195"); // Timeline Code
z.run("20170622-222153_977899468"); // DDL
z.run("20170623-174025_718327456"); // Load Data




In [None]:
%%sql 
create schema TIMELINE;


In [None]:
%%sql 

drop table IF EXISTS  TIMELINE.TRANSFERORDERS;
drop table IF EXISTS  TIMELINE.TO_DELIVERY_CHG_EVENT;
drop table IF EXISTS  TIMELINE.TIMELINE_INT;
drop table IF EXISTS TIMELINE.STOCKOUTS;



create table TIMELINE.TRANSFERORDERS(
    TO_ID   BIGINT,
    PO_ID   BIGINT,
    SHIPFROM BIGINT,
    SHIPTO  BIGINT,
    SHIPDATE TIMESTAMP,
    DELIVERYDATE TIMESTAMP,
    MODDELIVERYDATE TIMESTAMP,
    SOURCEINVENTORY BIGINT,
    DESTINATIONINVENTORY BIGINT,
    QTY BIGINT,
    SUPPLIER BIGINT,
    ASN VARCHAR(100),
    CONTAINER VARCHAR(100),
    TRANSPORTMODE SMALLINT,
    CARRIER BIGINT,
    FROMWEATHER SMALLINT,
    TOWEATHER SMALLINT,
    LATITUDE  DOUBLE,
    LONGITUDE DOUBLE,
    primary key (TO_ID)
    );

create index TIMELINE.TOSTIDX on TRANSFERORDERS (
    ShipDate,
    TO_Id
 );
 
 create index TIMELINE.TOETIDX on TRANSFERORDERS (
    Deliverydate,
    TO_Id
 );

create table TIMELINE.TO_DELIVERY_CHG_EVENT(
    TO_event_Id bigint,
    TO_Id bigint ,
    ShipFrom bigint,
    ShipTo bigint,
    OrgDeliveryDate timestamp,
    newDeliveryDate timestamp,
    Supplier varchar(100) ,
    TransportMode smallint ,
    Carrier bigint ,
    Fromweather smallint,
    ToWeather smallint,
    primary key (TO_event_Id)
    );
    
create table TIMELINE.TIMELINE_INT(
    Timeline_Id BIGINT,
    ST          TIMESTAMP,
    ET          TIMESTAMP,
    VAL         BIGINT,
    primary key (Timeline_Id, ST)
    );
    
create table TIMELINE.STOCKOUTS(
    TO_ID   BIGINT,
    Timeline_Id BIGINT,
    ST          TIMESTAMP,
    primary key (TO_ID,ST)
    );
    


In [None]:
%%sql 

call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TRANSFERORDERS',null, 's3a://splice-demo/supplychain/data_0623/train_orders.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TO_DELIVERY_CHG_EVENT', null, 's3a://splice-demo/supplychain/data_0623/train_events.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TIMELINE_INT', null, 's3a://splice-demo/supplychain/data_0623/train_inv.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);



In [None]:
%%sql 

call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TRANSFERORDERS',null, 's3a://splice-demo/supplychain/data_0623/test_orders.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);
call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TO_DELIVERY_CHG_EVENT', null, 's3a://splice-demo/supplychain/data_0623/test_events.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);
call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TIMELINE_INT', null, 's3a://splice-demo/supplychain/data_0623/test_inv.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);




In [None]:
%%sql 


call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TRANSFERORDERS',null, 's3a://splice-demo/supplychain/data_0623/demo_orders.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);
call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TO_DELIVERY_CHG_EVENT', null, 's3a://splice-demo/supplychain/demo_0623/demo_events.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);
call SYSCS_UTIL.IMPORT_DATA('TIMELINE','TIMELINE_INT', null, 's3a://splice-demo/supplychain/data_0623/demo_inv.csv', null, null, 'yyyy-MM-dd HH:mm:ss.S', null, null, -1, '/tmp', true, null);



In [None]:
%%sql 
select * from timeline.transferorders



### Timelines

Timelines are a relational representation of temporal data for AI applications 

Timelines record historical, present and future values.

A timeline table contains a collection of timelines, each with a unique id.
Every row represents: `TIMELINE_ID = VAL @ [ST ET]` meaning the variable denoted by the id has the value over that time interval

Timelines require indexed row-based storage and an OLTP compute engine to quickly look up values associated at times.

Timelines require ACID properties because they serve concurrent users changing timelines plus all the timeline updates require atomic changes to timelines.

For example, you have to make sure that when you move an order that the decrement to the source inventory changes atomically with the change to the destination inventory.

In [None]:
%%sql 
select * from timeline.timeline_int
where TIMELINE_ID = ${inv=200}
order by TIMELINE.TIMELINE_INT.ST;

In [None]:
%%sql 
select * from timeline.timeline_int
where timeline_id = ${inv= 100} AND val < 0 
order by timeline.timeline_int.st;

In [None]:
%%sql 
select VAL AS Available from timeline.timeline_int
where timeline_id = ${inv= 100} 
AND ST <= TIMESTAMP('${Time=2017-05-05 00:00:00.0}')  
AND ET > TIMESTAMP('${Time=2017-05-05 00:00:00.0}')

In [None]:
%%sql 


### Apply the Crystal Ball
#### Perform a What-If on any predicted late shipment

By moving the shipment to the predicted new delivery date, you can see the new inventory levels and plan around stockouts.

Below we create a prediction table that materializes predictions on some set of orders as to whether they are late. Then we initialize the table randomly for demostration purposes only. 



In [None]:
%%sql 
Drop table IF EXISTS TIMELINE.PREDICTIONS;
create table TIMELINE.PREDICTIONS(
    TO_ID   BIGINT,
    LatenessBin1 DOUBLE,
    LatenessBin2 DOUBLE,
    LatenessBin3 DOUBLE,
    LatenessBin4 DOUBLE,
    LatenessBin5 DOUBLE,
    LatenessBin6 DOUBLE,
    LatenessBin7 DOUBLE,
    LatenessBin8 DOUBLE,
    LatenessBin9 DOUBLE,
    LatenessBin10 DOUBLE,
    primary key (TO_ID)
    );


In [None]:
%%sql 
insert into TIMELINE.PREDICTIONS (
    TO_ID, 
    LatenessBin1,
    LatenessBin2,
    LatenessBin3,
    LatenessBin4
    )
    SELECT TO_ID, RANDOM(), RANDOM(), RANDOM(), RANDOM() 
    From TIMELINE.TRANSFERORDERS


In [None]:
%%sql 
SELECT TP.TO_ID, TP.LatenessBin1 as ZERO_DAYLATE, TP.LatenessBin2 as ONE_DAYLATE, TP.LatenessBin3 as FIVE_DAYLATE,  TP.LatenessBin4 as TEN_DAYLATE, timeline.transferorders.*    FROM timeline.predictions TP  LEFT OUTER JOIN timeline.transferorders  ON TP.to_id = timeline.transferorders.to_id
where TIMESTAMP('${begin =2017-05-05 00:00:00.0}') >= timeline.transferorders.deliverydate 
AND TIMESTAMP('${end =2017-05-05 00:00:00.0}') >timeline.transferorders.deliverydate 
AND TP.LatenessBin3 >= ${threshold = .75}

### What-If Simulation 
#### Perform a What-If for the specified Order with specified delay in shipment

Pick an order that may be delayed by number of days to see what orders may result in stock out situation because of the delay.

First all the orders that are sourced from the Destination of the Order in consideration, the ones that have stockout are listed, before the delay in delivery date for comparison.
Next the delay is simulated, and inventory calculations  are made and the Orders that are sourced from the Destination, are again checked for stockout situation.

Since the what-if calculations are done on temporary table and does not impact the actual data.




In [None]:
%angular

<form class="form-inline">
  <div class="form-group">
   <h5>Simulate Late Order </h5>
    <label for="orderFieldId"> Order ID : </label>
    <input type="text" class="form-control" id="orderFieldId" placeholder= Order id ..." ng-model="orderId"></input>
    <label for="delayFieldId">Delay in Days: </label>
    <input type="text" class="form-control" id="delayFieldId" placeholder= Delay Days ..." ng-model="delayDays"></input>
      <button type="submit" class="btn btn-primary" ng-click="z.angularBind('orderId',orderId,'20170625-202310_651912174');z.angularBind('delayDays',delayDays,'20170625-202310_651912174'); z.runParagraph('20170625-202310_651912174')"> Run What-If</button>
  </div>

</form>


In [None]:
%%scala 

val splicemachineContext = new SplicemachineContext(defaultJDBCURL)
val InventoryTable = "timeline.timeline_int"
val TOTable = "timeline.transferorders"
val stockoutTable = "timeline.STOCKOUTS"
val CHANGE_AT_ST = 0
val CHANGE_AT_ET = 1
val tempTableColsWithPKey : String  = "(Timeline_Id bigint, " + "ST timestamp, " + "ET timestamp, " + "Val bigint, " + "primary key (Timeline_ID, ST)" +")"

def createTempInvTable (smContext :SplicemachineContext, source : Long, dest : Long): String = {
    
    var tempTable ="Timeline."+  "TEMP_INV_" + org.apache.commons.lang3.RandomStringUtils.randomAlphabetic(6).toUpperCase();
            while(smContext.tableExists( tempTable))
                tempTable ="Timeline."+  "TEMP_INV_" +org.apache.commons.lang3.RandomStringUtils.randomAlphabetic(6).toUpperCase();
    
    
    
    val tempOptions = Map(
         org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_TABLE_NAME -> tempTable,
        org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.JDBC_URL -> defaultJDBCURL
    )

    val tempJDBCOptions = new org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions(tempOptions)

    
    val conn = JdbcUtils.createConnectionFactory(tempJDBCOptions)()
    try {
    conn.createStatement().execute("create table " + tempTable + tempTableColsWithPKey)
    } finally {
      conn.close()
    }
    
    //insert 
    val stmt = "select *  FROM timeline.timeline_int  WHERE Timeline_Id  in ( " + source + ", "+ dest +")"
    val timesDf = splicemachineContext.df(stmt)
    smContext.insert(timesDf,tempTable )
   tempTable
 
    
}

def whatif(smContext :SplicemachineContext,
            tempInvTable: String,
           source: Integer,
           destination: Integer,
           shippingDate: Timestamp,
           deliveryDate: Timestamp,
           newDeliveryDate : Timestamp,
           qty: Long,
           retryCount: Integer = 0,
           revertFlag: Boolean): Unit = {

        val conn: Connection = smContext.getConnection()
         try {
          conn.setAutoCommit(false) //TBD - Need to set to false when DBAAS-570 is resolved
          update(tempInvTable, source, shippingDate, deliveryDate,  qty, CHANGE_AT_ST)
          update(tempInvTable, destination, shippingDate, deliveryDate,  -qty, CHANGE_AT_ET)
          update(tempInvTable, source, shippingDate, newDeliveryDate,  -qty, CHANGE_AT_ST)
          update(tempInvTable, destination, shippingDate, newDeliveryDate,  qty, CHANGE_AT_ET)
          conn.commit()
          
        }
        catch {
          case exp: WriteConflict => {
            conn.rollback()
            conn.setAutoCommit(true)
            if (retryCount < MAX_RETRIES) {
              println("Retrying create TO" + source + " " + destination + " " + shippingDate + " " + deliveryDate + " " + qty + " " + retryCount + 1)
              whatif(smContext, tempInvTable,source, destination, shippingDate, deliveryDate, newDeliveryDate, qty, retryCount + 1, revertFlag)
            }
            else {
              // put code here to handle too many retries
            }
          }
          case e: Throwable => println(s"Got some other kind of exception: $e")
        }
        finally {
          conn.setAutoCommit(true)
        }
      }
      
      
  // Will need to copy the inventory table so that what-if is not visible to others

  val transferOrdersTable = Map(
    JDBCOptions.JDBC_TABLE_NAME -> "Timeline.TransferOrders",
    JDBCOptions.JDBC_URL -> defaultJDBCURL
    ) 
  val orderid = z.angular("orderId").toString.toLong
  val q = s"select *  FROM timeline.transferorders WHERE to_id = $orderid"
  val order = splicemachineContext.df(q)
  
   val days = z.angular("delayDays").toString.toInt
 
  if(order.count > 0 && days > 0) {
    //  val days: Int = 5
      val source = order.first().getAs("SOURCEINVENTORY").asInstanceOf[Long]
      val dest = order.first().getAs("DESTINATIONINVENTORY").asInstanceOf[Long]
      val ship = order.first().getAs("SHIPDATE").asInstanceOf[Timestamp]
      val delivery = order.first().getAs("DELIVERYDATE").asInstanceOf[Timestamp]
      val qty = order.first().getAs("QTY").asInstanceOf[Long]
      val newDelivery =new Timestamp( new org.joda.time.DateTime (delivery).plusDays(days).getMillis())
      
      //Populate stockouts before What If
      
     val queryBefore = s"""SELECT t.to_id, i.ST, i.timeline_id FROM $InventoryTable i  , $TOTable t
        WHERE i.timeline_id = $dest
        AND t.sourceinventory = i.timeline_id
        AND val < 0
        AND i.ST <=  t.shipdate
        AND  t.shipdate < i.ET
        ORDER BY i.ST"""
    
       println(s"q=$queryBefore")
      val stockOutsBefore = splicemachineContext.df(queryBefore)
  
      splicemachineContext.insert(stockOutsBefore, stockoutTable)
      z.run("20170621-055239_1661420434")
      z.run("20170628-152508_1831563439")
      
      val tempInventoryTable = createTempInvTable(splicemachineContext,source,dest)
      
      whatif(splicemachineContext,tempInventoryTable,source.toInt, dest.toInt, ship, delivery, newDelivery, qty, days, false)
      
      val destInvCol = TOTable + ".destinationinventory "
      val timelineIdCol = tempInventoryTable + ".timeline_id "
      val toIdCol = TOTable + ".to_id "
      val stCol = tempInventoryTable + ".ST "
      val etCol = tempInventoryTable + ".ET "
      val delDateCol = TOTable + ".deliverydate "
      val shipDateCol =  TOTable + ".shipdate "
      val sourceInvCol = TOTable + ".sourceinventory "
      val latCol = TOTable + ".latitude "
      val longCol = TOTable + ".longitude "
      val srcWeatherCol = TOTable + ".fromweather "
      val destWeatherCol = TOTable + ".toweather "
      /*
      val query = s"""SELECT $toIdCol, $stCol, $timelineIdCol FROM $tempInventoryTable , $TOTable
                        WHERE $timelineIdCol = $dest
                        AND $destInvCol = $timelineIdCol
                        AND val < 0
                        AND $stCol >= $delDateCol
                        ORDER BY $stCol"""
                        println(s"q=$query")
                        */
   
     val query = s"""SELECT $toIdCol, $stCol, $timelineIdCol FROM $tempInventoryTable , $TOTable
        WHERE $timelineIdCol = $dest
        AND $sourceInvCol = $timelineIdCol
        AND val < 0
        AND $stCol <=  $shipDateCol
        AND  $shipDateCol < $etCol
        ORDER BY $stCol"""
    
    println(s"q=$query")
      val stockOuts = splicemachineContext.df(query)
  
     // whatif(source.toInt, dest.toInt, ship, delivery, newDelivery, qty, 5, true) // undo what-if 
      splicemachineContext.insert(stockOuts, stockoutTable)
      z.run("20170716-165322_7991113")
      z.run("20170628-152508_1831563439")
     // splicemachineContext.dropTable(tempInventoryTable)
   } else {
    println(" NO ORDERS FOUND FOR ORDER ID " + orderid)
   }
  


In [None]:
%%sql 
select 
    timeline.stockouts.st,
    timeline.timeline_int.et,
    val,
    timeline.stockouts.to_id,
    shipfrom,
    shipto,
    qty,
    DESTINATIONINVENTORY,
    SOURCEINVENTORY,
    LATITUDE,
    LONGITUDE,
    FROMWEATHER,
    TOWEATHER,     
    SUPPLIER,
    CARRIER,
    TRANSPORTMODE
    from Timeline.Stockouts, Timeline.TransferOrders, timeline.Timeline_int 
    Where Timeline.Stockouts.to_id = Timeline.TransferOrders.to_id 
    AND Timeline.Stockouts.timeline_id =  timeline.Timeline_int.timeline_id
    AND timeline.Timeline_int.timeline_id =  Timeline.TransferOrders.sourceinventory
    AND  Timeline.Stockouts.ST =  timeline.Timeline_int.ST
order by ST;






In [None]:
%%sql 
select 
    timeline.stockouts.st,
    timeline.timeline_int.et,
    val,
    timeline.stockouts.to_id,
    shipfrom,
    shipto,
    qty,
    DESTINATIONINVENTORY,
    SOURCEINVENTORY,
    LATITUDE,
    LONGITUDE,
    FROMWEATHER,
    TOWEATHER,     
    SUPPLIER,
    CARRIER,
    TRANSPORTMODE
    from Timeline.Stockouts, Timeline.TransferOrders, timeline.Timeline_int 
    Where Timeline.Stockouts.to_id = Timeline.TransferOrders.to_id 
    AND Timeline.Stockouts.timeline_id =  timeline.Timeline_int.timeline_id
    AND timeline.Timeline_int.timeline_id =  Timeline.TransferOrders.sourceinventory
    AND  Timeline.Stockouts.ST =  timeline.Timeline_int.ST
order by ST;






In [None]:
%%sql 
delete from timeline.stockouts where to_id >0;

In [None]:
%%sql 
select * from timeline.TO_DELIVERY_CHG_EVENT


In [None]:
%%sql 
select * from timeline.TO_DELIVERY_CHG_EVENT



In [None]:
%%sql 
select * from timeline.TO_DELIVERY_CHG_EVENT, timeline.transferorders 
where timeline.transferorders.to_id = timeline.TO_DELIVERY_CHG_EVENT.to_id

## Machine Learning 
### Learning to Predict Late Shipments

Here we use a logistic regression ML model to classify late orders. Our inventory system tracks events such as the delivery date on an order changing or the qty delivered being different than expected.

The model considers attributes of the orders such as:
- Mode of Transport
- Carrier
- Latitude 
- Longitude
- Source City
- Destination City
- Part.

The model also considers exogenous data to enrich the inventory data such as weather:
- weather at source
- weather at destination.

The machine learning algorithm outputs a model that can predict whether a shipment is late. 




### Ease of ML - NO ETL 
#### Just Transformations

Splice Machine is HTAP so you can perform both transactional and analytical queries.

Therefore we do not need to extract and load data - we only transform it.

Below we transform the transfer orders and the changes to transfer orders into a view, computing how late the order is and binning the lateness into 0,1,5,and 10 day late bins.

The first step is a classic transformation step of merging a master table of data with a table of changes and labeling the rows with classes and enriching it with outside data like weather in this case.




In [None]:
%%sql 

drop table IF EXISTS TIMELINE.FEATURES;

CREATE table TIMELINE.FEATURES AS
SELECT 
    TimeLine.TO_DELIVERY_CHG_EVENT.orgdeliverydate,  
    TimeLine.TO_DELIVERY_CHG_EVENT.newdeliverydate, 
    CASE WHEN TimeLine.TO_DELIVERY_CHG_EVENT.TO_EVENT_ID is Null 
        THEN TimeLine.TransferOrders.fromweather 
        ELSE TimeLine.TO_DELIVERY_CHG_EVENT.fromweather end as currentweather,
    TimeLine.TransferOrders.*, 
    CASE WHEN TimeLine.TO_DELIVERY_CHG_EVENT.TO_EVENT_ID is Null 
        THEN 0 
        ELSE TimeLine.TO_DELIVERY_CHG_EVENT.newdeliverydate - TimeLine.TO_DELIVERY_CHG_EVENT.orgdeliverydate end as Lateness,
    CASE
    WHEN  TimeLine.TO_DELIVERY_CHG_EVENT.newdeliverydate - TimeLine.TO_DELIVERY_CHG_EVENT.orgdeliverydate > 0 
    THEN
        CASE
            WHEN  TimeLine.TO_DELIVERY_CHG_EVENT.newdeliverydate - TimeLine.TO_DELIVERY_CHG_EVENT.orgdeliverydate > 5 
            THEN
                CASE 
                    WHEN  TimeLine.TO_DELIVERY_CHG_EVENT.newdeliverydate - TimeLine.TO_DELIVERY_CHG_EVENT.orgdeliverydate > 10
                    THEN 3
                    ELSE 2
                END
            ELSE 1

        END
    ELSE 0
    END AS Label
 from TimeLine.TransferOrders Left Outer Join TimeLine.TO_DELIVERY_CHG_EVENT
 on TimeLine.TransferOrders.TO_ID = TimeLine.TO_DELIVERY_CHG_EVENT.TO_ID
 WHERE  TIMESTAMP('${begin = 2017-05-05 00:00:00.0}') >= timeline.transferorders.deliverydate 
AND TIMESTAMP('${end =2017-05-05 00:00:00.0}') > timeline.transferorders.deliverydate; 
 select * from timeline.features
 WHERE TIMESTAMP('${begin = 2017-05-05 00:00:00.0}') >= timeline.features.orgdeliverydate 
    AND TIMESTAMP('${end =2017-05-05 00:00:00.0}') > timeline.features.orgdeliverydate ;
 


#### MLlib

MLlib is a rich repository of Transformers and Models. 

https://spark.apache.org/docs/latest/ml-guide.html

In this use case we will use Logistic Regression to classify late orders into four classes: 0 days late, 1-5 days late, 5-10 days late and  10 or over days late.

The Logistic Regression Model expects a dataframe with two elements: feature Vector and label.

Therefore we have to extract the columns from the above table into this form.

Luckily MLlib has such a transformer called a Vector Assembler.

Below we create a Vector Assembler, extract some columns from or feature table and then feed this to the model.

Then we can deploy the model to create the prediction table we used above.


In [None]:
%%scala 
    
    import org.apache.spark.ml.feature.VectorAssembler
    import java.sql.{Connection,Timestamp}
    import com.splicemachine.spark.splicemachine._
    import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
    import org.apache.spark.ml.classification.LogisticRegression
    import spark.implicits._

    
    val optionMap = Map(
      JDBCOptions.JDBC_TABLE_NAME -> "Timeline.Features",
      JDBCOptions.JDBC_URL -> defaultJDBCURL
    )
    val dfUpper = sqlContext.read.options(optionMap).splicemachine
    val newNames = Seq("orgdeliverydate","newdeliverydate","currentweather","to_id",
      "po_id","shipfrom","shipto","shipdate","deliverydate","moddeliverydate","sourceinventory",
      "destinationinventory","qty","supplier","asn","container","transportmode","carrier",
      "fromweather","toweather","latitude","longitude","lateness","label")
    val df = dfUpper.toDF(newNames: _*)
    
    
    //assemble feature vector from dataframe
    val assembler = new VectorAssembler()
      .setInputCols(Array("shipfrom", "shipto", "sourceinventory", "destinationinventory", "supplier", "transportmode", "carrier", "fromweather", "toweather"))
      .setOutputCol("features")
    
    val output = assembler.transform(df)
    println("Assembled columns ShipFrom, ShipTo, SourceInventory, DestinationInventory, Supplier, TransportMode, Carrier, FromWeather, ToWeather to vector column 'features'")
    output.select("features", "label").show(true)
    
    // Set parameters for the algorithm.
    // Here, we limit the number of iterations to 10.
    val lr = new LogisticRegression()
        .setMaxIter(10)

        
    
    // Fit the model to the data.
    val model = lr.fit(output)
    
   //Get the number of classes in Label
     val numClasses = model.numClasses
    // Given a dataset, predict each point's label, and show the results.
    val newdf = model.transform(output)
    
    // Print the coefficients and intercept for multinomial logistic regression
    println(s"Coefficients: \n${model.coefficientMatrix}")
    println(s"Intercepts: ${model.interceptVector}")
    
    

In [None]:
%%scala 
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.sql.types.{StructType,StructField,DoubleType, LongType}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.encoders.RowEncoder

val predictionTable = "timeline.predictions"

var labelCnt = numClasses
//Only allow max of 10 lables
if(labelCnt > 10)
    labelCnt =10

    
 newdf.printSchema()
 
    
    var schema = StructType(
    StructField("TO_ID", LongType, false) :: Nil)
   
    
    var i=0;
    for (i <- 1 to labelCnt) {
        schema = schema.add( StructField("LATENESSBIN"+i, DoubleType, false) )
    }
           
    
val encoder = RowEncoder(schema)
 
 val pred = newdf
  .select( "features", "label", "probability", "prediction", "to_id")
  .map { case Row( features: Vector, label: Integer, prob: Vector, prediction: Double, idd:Long) => 
  
    var seq1 : Seq[Any] = Seq(idd.asInstanceOf[Number].longValue())
    var j=0;
    for (j <- 1 to labelCnt) {
        seq1 = seq1:+ ( prob(j-1).asInstanceOf[Number].doubleValue())
    }
    println(seq1)
     Row.fromSeq(seq1)   
    }(encoder)
    
    
  splicemachineContext.update(pred, predictionTable) 
  pred.show()


In [None]:
%%scala
%lsmagic


In [None]:
%lsmagic