In [321]:
try:    importlib.reload(Jupytils)
except: import Jupytils
import itertools as it
import cvxopt
from cvxopt import matrix, solvers
from fractions import Fraction

showTopbar("Markov Chains", menu=[("Help", "#"), ("Home", "#")])
np.set_printoptions(precision=4, suppress=True)

<IPython.core.display.Javascript object>

In [628]:
import random

'''These classes are only meant to work with integer strings '''
class NaiveMarkov:
    """An nth-Order Markov Chain class with some lexical processing elements."""
    def __init__(self, delim, order, n=10000):
        """Initialized with a delimiting character (usually a space) and the order of the Markov chain."""
        self.states = {}
        self.delim = delim
        self.max = n
        if order > 0:
            self.order = order
        else:
            raise Exception('Markov Chain order cannot be negative or zero.')

    # Must provide classes encoded by 0, 1, 2 etc
    def Freq(s1, s2 = None, numClasses=None):
        if len(s1) <= 0:
            return None;

        if (s2 is None):
            s2 = s1[1:]

        numClasses = numClasses if numClasses else max(max(s1), max(s2))+1
        F=np.zeros((numClasses, numClasses))
        for z in zip(s2,s1):
            F[(z)] += 1

        F=np.matrix(F)
        P=F.copy();
        P = P/P.sum(axis=0)
        return F, P
    
    def GetTokens(self, sample):
        if (type(sample) == str ):
            tokens = sample.split(self.delim)
        else:
            tokens = sample;
            
        return tokens;

    def Train(self, sample):
        prev = tuple(['' for i in range(self.order)])
        tokens = self.GetTokens(sample)
        self.tokens = tokens;
        
        self.max = len(tokens)
        for t in tokens:
            if not prev in self.states:
                self.states[prev] = []
            curr = prev[1:] + (t,)
            self.states[prev].append(curr)
            prev = curr
   
    def NextState(self, Xt=None, n=None):
        if Xt is None or not Xt in self.states.keys():
            Xt = tuple(['' for i in range(self.order)])
        ri = random.randint(0, len(self.states[Xt])-1)
        #print("{}:{}:{} ".format(Xt, ri, len(self.states[Xt])-1), end='')
        t = self.states[Xt][ri]
        Xt1 = t; #t[len(t)-1]
        
        return Xt1;
    
    def Predict(self, Xt=None, n=None):
        ret= [] if Xt is None else [Xt[-1]]
        #print ("HHH=>", ret)
        for i in range(self.max):
            c = self.NextState(Xt);
            ret.append(c[-1])
        #print ("HHH=>", ret)
            
        return ret

    def PredictO(self, orig, n=None):
        ret= [orig[j] for j in range(self.order)]
        start = tuple(ret)
        for i in range(len(orig)-self.order):
            start = tuple([orig[j+i] for j in range(self.order)])
            #print(start, end='', sep= ';')
            c = self.NextState(start);
            ret.append(c[-1])
        return ret;


def Metrics(a, b, printOut=True, msg=None):
    n=0;
    t=0;
    z = zip(a,b)
    correctClass=defaultdict(int)
    totalClass=defaultdict(int)
    
    for c in z:
        totalClass[c[0]] += 1;

        if(c[0] == c[1]):
            correctClass[c[0]] += 1;
            t+= 1
        n += 1

    if (printOut):
        print("=======================Metrics : ", msg)
        print("orig=>{}\npred=>{}".format(a[0:80], b[0:80]))
        print("Total %d, correct %d, acc: %3.2f"%(n,t,t/n))
        for i,c in totalClass.items():
            acc = correctClass[i]/c
            print("class: {} total: {}, correct: {}, acc: {}".format(i, c, correctClass[i], acc))

    return n, t, totalClass


In [629]:
s1=[int(_) for _ in '0 0 1 1 0 2 1 0 1 2 0 1 2 0 1 2 0 1 0 1'.split()]
numClasses = len(unique(s1))
print("# Try the Naive Markov Chain Approach ............For Paper Example")

order = 1
m=NaiveMarkov(' ', order)
m.Train(s1)
start = tuple([s1[i] for i in range(order)])
r = m.Predict(start )
Metrics(s1, r, True, msg="For MC order = {}".format(order))
r = m.PredictO(s1 )
Metrics(s1, r, msg="For MC: with seeds taken from original trained tuples");


order=3
m=NaiveMarkov(' ', order)
m.Train(s1)
start = tuple([s1[i] for i in range(order)])
r = m.Predict(start )
Metrics(s1, r, True, msg="For MC order = {}".format(order))

r = m.PredictO(s1 )
Metrics(s1, r, msg="For MC: with seeds taken from original trained tuples");
#Metrics(s1, r)
#F,p = NaiveMarkov.Freq(s1)


# Try the Naive Markov Chain Approach ............For Paper Example
orig=>[0, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1]
pred=>[0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 2, 1, 1, 2]
Total 20, correct 7, acc: 0.35
class: 0 total: 8, correct: 2, acc: 0.25
class: 1 total: 8, correct: 5, acc: 0.625
class: 2 total: 4, correct: 0, acc: 0.0
orig=>[0, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1]
pred=>[0, 1, 1, 0, 2, 1, 1, 2, 1, 2, 0, 1, 2, 0, 1, 0, 1, 2, 2, 1]
Total 20, correct 11, acc: 0.55
class: 0 total: 8, correct: 3, acc: 0.375
class: 1 total: 8, correct: 6, acc: 0.75
class: 2 total: 4, correct: 2, acc: 0.5
orig=>[0, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1]
pred=>[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Total 20, correct 8, acc: 0.40
class: 0 total: 8, correct: 0, acc: 0.0
class: 1 total: 8, correct: 8, acc: 1.0
class: 2 total: 4, correct: 0, acc: 0.0
orig=>[0, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 

In [630]:
pefs='''
2 2 1 2 2 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 2 1 3 2 2 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 3 3 3 2 3 3 2 2 2 2
3 2 1 3 3 3 3 3 3 2 2 3 1 3 3 2 3 3 3 2 3 3 3 3 2 2 3 3 1 2 3 2 3 1 3 3 3 3 2 2 3 1 3 2 1 2 1 1 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 3 3 3 2 3 3 3 3 3 3
3 3 3 3 3 3 3 3 2 3 1 3 2 1 3 2 3 3 2 2 3 3 1 1 2 1 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 2 2 3 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 2
2 2 2 3 2 2 3 2 2 3 3 3 2 3 3 3 2 3 3 3 3 3 2 3 3 2 2 2 3 3 3 3 3 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 3 3 2 3 3 2 3 2 3 3 2 2 3 3 3 3 2 3 3
3 3 3 3 3 3 2 3 3 3 1 2 3 3 2 1 2 2 2 2 1 2 1 2 2 3 2 1 3 2 2 2 1 1 2 2 2 2 1 2 2 2 3 2 3 2 2 1 2 3 3 3 2 3 3 2 2 2 3 3 3 3 3 3 3 3 3 2 2 3 2 2 2
3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 2 2 2 2 2 3 2 2 3 3 2 3 3 2 2 2 3 3 3 2 3 3 3 3 3 3 1 3 2 3 2 2 3 3 2 1 3 2 2 3 1 1 2 2 2 2 1 1 1 2 2 2 1 2 3
2 2 3 3 3 2 2 3 2 1 1 3 3 2 1 1 2 1 2 3 2 2 2 3 3 3 3 3 3 2 3 3 2 3 3 2 3 2 3 2 3 2 2 3 1 2 3 3 3 3 2 1 3 2 1 2 2 3 3 2 3 1 2 2 3 2 3 3 2 2 1 2 1
2 2 3 2 2 3 2 3 2 2 2 2 1 3 3 2 2 1 2 3 2 1 2 2 2 1 3 3 2 3 3 3 1 3 3 2 3 1 1 3 3 3 2 3 3 2 2 2 2 3 3 3 3 1 3 3 2 2 2 3 1 2 2 2 1 2 1 2 1 3 1 2 1
3 1 2 2 3 3 1 3 3 1 3 2 3 2 3 2 3 2 3 1 3 3 3 3 3 2 2 2 3 3 3 2 2 2 3 2 2 3 3 2 2 1 2 1 2 2 3 1 2 2 2 1 2 2 2 2 3 2 2 3 2 2 3 3 3 2 2 1 3 2 3 3 2
2 3 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 1 2 1 1 3 3 2 2 2 3 3 2 1 2 2 1 2 1 2 2 1 1 2 1 1 2 2 1 1 1 2 1 2 1 1 1 2 2
1 1 2 1 2 1 1 1 1 1 2 2 1 1 2 1 1 1 2 2 1 1 2 1 1 2 1 1 1 2 2 1 2 2 3 2 2 1 2 2 1 1 2 1 1 1 3 1 1 1 2 2 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1
1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 2 1 2 1 1 1 2 2 2 2 3 1 1 1 1 1 1 1 2 3 3 1 2 2 2 1 3 2 2 1 2 2 1 3 3 2 2 1 2 2 1 1 1 1 2 1 2 2
2 1 1 2 1 2 1 3 1 2 1 1 2 1 1 2 1 1 2 2 2 1 1 1 2 2 3 2 2 2 2 1 2 2 1 3 3 1 1 1 2 2 2 1 2 1 1 1 1 1 3 3 3 2 2 2 2 2 2 1 2 3 1 1 1 1 1 1 3 1 2 1 1
2 3 2 2 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1 2 1 1 1 1 2 2 1 1 1 1 1 2 2 3 2 1 2 2 2 2 2 2 2 1 2 2 2 2 1 3 2 3 3 3 2 1 3 2 2 2 2 3 1 1 1
2 1 1 2 3 1 3 1 2 1 2 1 2 2 2 1 1 1 1 1 1 1 1 2 2 1 2 2 2 2 1 1 3 2 1 1 2 3 3 2 1 2 2 2 1 1 2 1 3 2 3 1 1 1 2 1 1 1 2 1 2 1 2 1 2 1 2 1 3 1 3 2 2
2 2 2 2 1 2 2 1 1 1 2 3 2 1 1 1 3 1 3 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 1 3 1 1 1 1 2 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2
1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 3 1 1 1 2 1 1 2 2 3 3 2 3 3 3 2 2 2 3 3 3 2 2 2 2 3 1 2 2 2 1 2 3 2 2 1 1 3 1 1 1 2 1 2 1 2 2 2 2 3 1 2 2 2
1 1 2 2 2 2 1 1 3 1 3 1 2 2 3 2 2 2 3 2 2 1 2 1 1 2 1 2 2 2 1 2 1 1 2 1 1 1 2 2 1 1 1 2 1 1 1 2 1 1 2 2 2 3 2 3 2 3 2 3 3 3 3 3 3 2 3 3 2 3 2 3 
'''

pef=[int(_)-1 for _ in pefs.replace('\n', ' ').split()]
s1= deepcopy(pef)

numClasses = len(unique(s1))
print("# Try the Naive Markov Chain Approach ............For SCHAS PEF (I think Hand)")

order = 7
m=NaiveMarkov(' ', order)
m.Train(s1)
start = tuple([s1[i] for i in range(order)])
r = m.Predict(start )
Metrics(s1, r, True, msg="For MC order = {}".format(order))
r = m.PredictO(s1 )
Metrics(s1, r, msg="For MC: with seeds taken from original trained tuples");


order=11
m=NaiveMarkov(' ', order)
m.Train(s1)
start = tuple([s1[i] for i in range(order)])
r = m.Predict(start )
Metrics(s1, r, True, msg="For MC order = {}".format(order))

r = m.PredictO(s1 )
Metrics(s1, r, msg="For MC: with seeds taken from original trained tuples");
#Metrics(s1, r)
#F,p = NaiveMarkov.Freq(s1)


# Try the Naive Markov Chain Approach ............For SCHAS PEF (I think Hand)
orig=>[1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 0, 2, 2, 2, 2]
pred=>[1, 2, 1, 0, 2, 1, 0, 2, 2, 0, 0, 1, 0, 0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 1, 0, 0, 2, 2, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 1, 1, 2, 1]
Total 1313, correct 434, acc: 0.33
class: 0 total: 404, correct: 245, acc: 0.6064356435643564
class: 1 total: 515, correct: 70, acc: 0.13592233009708737
class: 2 total: 394, correct: 119, acc: 0.3020304568527919
orig=>[1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1

In [631]:
class WeightedMarkov:
    """An higher order multi variate Markov Chain"""
    def __init__(self, X= None, nStates=None, order=1):
        assert order > 0, "order cannot be negative"
        self.nStates = nStates
        self.order = order
        self.X = X;

    def Compute(self, X=None, order=1, numClasses=None):
        fS={}
        pS={}
        xHats=[]
        numClasses = numClasses if numClasses is not None else len(unique(X[0]))

        if ( X is None):
            X = self.X;
            numClasses = self.nStates
            order = self.order
        
        for x in X:
            xHats.append(self.computeXHat(x, numClasses) )

        for i in range(order):
            a1 = X[0]
            a2 = a1[i+1:]
            F=self.Freq(a1, a2, numClasses)
            fS[i] = F[0]
            pS[i] = F[1]
        
        self.fS = fS; self.pS = pS; self. xHats = xHats;
        
        return fS, pS, xHats;
    
    #---- Private stuff
    def Encode(self, y):
        l = preprocessing.LabelEncoder()
        y = l.fit_transform(y);
        return y, l.classes_

    #--- 
    # Must provide classes encoded by 0, 1, 2 etc
    def Freq(self, s1, s2 = None, numClasses=None):
        if len(s1) <= 0:
            return None;

        if (s2 is None):
            s2 = s1[1:]

        numClasses = numClasses if numClasses else max(max(s1), max(s2))+1
        F=np.zeros((numClasses, numClasses))
        for z in zip(s2,s1):
            F[(z)] += 1

        F=np.matrix(F)
        P=F.copy();
        P = P/P.sum(axis=0)
        return F, P
    

    def computeXHat(self, s, numClasses =None):
        numClasses = numClasses if numClasses else max(s)+1
        xHat=np.zeros(numClasses)
        t=pd.Series(s).value_counts() 
        for i,j in t.items():    
            xHat[i] =j
        ret = xHat/xHat.sum()

        ret = np.array([ret]).T

        return ret;

    #Display thr matrix
    def Matdisplay(*M, names=None, useFractions=False, display=False):
        s = ""
        if (names is None):
            names = ["" for i in range(len(M)) ]
        for i, m in enumerate(M):
            if str(type(np.array(m).flat[0])).find('str') < 0:
                useFrac = useFractions
            else:
                useFrac = False
            
            s+= LA.M(m, name=names[i], useFrac=useFrac, call_display=False, showdim=False);

        if (display):
            display(Math(s))
        return (s)
    

    def Mdisplay(fS, pS, xHats):
        g = ["\hat{{F}}^{{{}}}".format(_) for _ in sorted(fS.keys())]
        v = [_[1] for _ in sorted(fS.items())]
        s = WeightedMarkov.Matdisplay(*v, names=g)

        g = ["\hat{{P}}^{{{}}}".format(_) for _ in sorted(pS.keys())]
        v = [_[1] for _ in sorted(pS.items())]
        s += WeightedMarkov.Matdisplay(*v, names=g, useFractions=True)

        g = ["\hat{{x}}_{}".format(i+1) for i in range(len(xHats))]
        s+= WeightedMarkov.Matdisplay(*xHats, names=g, useFractions=True)
        
        display(Math(s))
        return (s)

    def Dump(self):
        WeightedMarkov.Mdisplay(self.fS, self.pS, self.xHats)
        
        
    def DisplayCAb(self):
        t=''
        for i in range(self.order):
            t += '\lambda_{}+'.format(i)
        t = t[0:-1] + " <= +1"
        t += "\n-"+ t.replace('+', '-')
        for _ in range(order):
            t += '\n\lambda_{} >= 0'.format(_)

        for _ in range(numClasses):
            t += '\n-'
        for _ in range(numClasses):
            t += '\n+'
        t += '\nw >= 0'    

        dd = np.array([_ for _ in t.split('\n')])
        for tt in t.split('\n'):
            pass
            #print (tt, end='')
            #display(Math(tt))

        dd =np.matrix([dd]).T

        cAb = [np.matrix(c),np.matrix(A),np.matrix([b]).T, dd]
        s = WeightedMarkov.Matdisplay(*cAb, names="c A b description".split(), useFractions=True)
        display(Math(s))
    
    def PrepareMatrices(self):
        s=len(self.X)
        n= self.nStates
        ls = list(it.product(range(s), repeat=2))
        numParams = self.order

        numRows = (2*s + numParams + 2*s*n)
        # numRows= 
        # 1. 2*s for sum of lambdas equal to 1 - we need two (1) for >= 1 (2) <= 1 plus
        # 2. lambda)_jk >=0, therefore  -1 * lamda)_jk <= 0 -> 2 for each lambda
        # 3. 2 for each s * numClasses (n) , the set of   lambdas

        #-1. ---- Prepare A
        A=[]
        b=[]

        for i in range(s):
            b += [1,-1]

            c2=[1 for _ in range(numParams+1)]
            c2[-1]= 0;  

            A += c2;
            A += [_ * -1 for _ in c2];

        A = np.array(A).reshape(int(len(A)/(numParams+1)),numParams+1, order='C')

        #-2. ---- Prepare A -> lambda's must be non negative
        for i in range(numParams):
            b += [0]

            c2=[0 for _ in range(numParams+1)]
            c2[i]= -1;  
            A = vstack((A, c2))

        #-3. ---- Prepare A
        i=0

        x=xHats[i].flatten().tolist()
        x1 = [-1 * _ for _ in x]
        b += x1
        b += x

        j=np.zeros((n*(numParams+1)))
        j=j.reshape(n,numParams+1)
        j[:,-1]=1
        for k in range(numParams):        
            pd1 = pS.get(k)
            xh1 = xHats[0]
            bb1 = pd1 * xh1
            j[:,k] = bb1.flatten()

        A = vstack((A, j*-1))
        j[:,-1]=-1
        A = vstack((A, j))

        # =>> Compute C

        c= [0. for _ in range(numParams+1)]
        c[-1] = 1

        A = vstack((A, [_*-1 for _ in c]))
        b.append(0)

        self.c = c;
        self.A = A;
        self.b = b;
        return c,A,b;

    def PrepareMatricesNN(self): #Do not include non negative contraints
        s=len(self.X)
        n= self.nStates
        ls = list(it.product(range(s), repeat=2))
        numParams = self.order

        numRows = (2*s + numParams + 2*s*n)
        # numRows= 
        A=[0 for _ in range(numParams + 1)]
        b=[0]

        #-3. ---- Prepare A
        for i in range(s):
            b += [1,-1]

            c2=[1 for _ in range(numParams+1)]
            c2[-1]= 0;  

            A += c2;
            A += [_ * -1 for _ in c2];

        A = np.array(A).reshape(int(len(A)/(numParams+1)),numParams+1, order='C')

        i=0
        x=xHats[i].flatten().tolist()
        x1 = [-1 * _ for _ in x]
        b += x1
        b += x

        j=np.zeros((n*(numParams+1)))
        j=j.reshape(n,numParams+1)
        j[:,-1]=1
        for k in range(numParams):        
            pd1 = pS.get(k)
            xh1 = xHats[0]
            bb1 = pd1 * xh1
            j[:,k] = bb1.flatten()

        A = vstack((A, j*-1))
        j[:,-1]=-1
        A = vstack((A, j))

        # =>> Compute C

        c= [0. for _ in range(numParams+1)]
        c[-1] = 1

        A = vstack((A, [_*-1 for _ in c]))
        b.append(0)

        self.c = c;
        self.A = A;
        self.b = b;
        return c,A,b;
    
    def Solve(self, showProgress=True, solver=None):
        self.c1=matrix(self.c)
        self.A1=matrix(self.A)
        self.b1=matrix(self.b)
        
        solvers.options['show_progress'] = showProgress
        
        self.sol=solvers.lp(self.c1, self.A1, self.b1,)
        self.p = self.sol['x']

        return self.sol;
    
    def Predict(self, CtIn, randomized=False):
        sol = np.matrix ([0. for i in xHats[0]]).T
        Ct = deepcopy(CtIn)
        if ( type(Ct[0]) == list):
            for i in range(len(Ct)):
                Ct[i] = np.matrix(Ct[i]).T
            
        for i in range(hm.order):
            Qx =  self.pS[i] * Ct[i]
            Qx1=  Qx * self.p[i]
            sol += Qx1
        self.sol = sol
        
        mm=[i for i in range(m) if self.sol[i] == max(self.sol)]  # Get all the candidates
        if ( randomized):
            predicted = mm[random.randint(0,len(mm)-1)]
        predicted = mm[0]

        
        return sol, predicted;

In [632]:
# Paper Example - lets prove the examples in paper works correctly!

s1=[int(_) for _ in '0 0 1 1 0 2 1 0 1 2 0 1 2 0 1 2 0 1 0 1'.split()]
numClasses = len(unique(s1))
print( '''# Lets try the Weighted  Markov Chain Approach ............
Lets make sure the numbers are as given in the paper''')

X=[s1]

order=2
m= numClasses
hm= WeightedMarkov(X,m, order)
fS,pS, xHats = hm.Compute(X,order)

print("==>Show if we remove non negative constraints - the results are as expected")
c,A,b = hm.PrepareMatricesNN()
sol = hm.Solve(False)
params = np.array([_ for _ in sol['x'].T])
print("LP Solved Solutions with non negative constraint....\n\n", params, sum(params[0:2]), "solution=", soltn)

print("==>Show the results are as expected when setup right")
c,A,b = hm.PrepareMatrices()
sol = hm.Solve(False)

hm.Dump()
print("LP Setup ...")
hm.DisplayCAb()
soltn = sum(np.array(c) * sol['x'])
params = np.array([_ for _ in sol['x'].T])
print("LP Solved Solutions ....\n", params, sum(params[0:2]), "solution=", soltn)



# Lets try the Weighted  Markov Chain Approach ............
Lets make sure the numbers are as given in the paper
==>Show if we remove non negative constraints - the results are as expected
LP Solved Solutions with non negative constraint....

 [ 1.8    -0.8     0.0157] 0.99999999995 solution= 1.02857142962
==>Show the results are as expected when setup right


<IPython.core.display.Math object>

LP Setup ...


<IPython.core.display.Math object>

LP Solved Solutions ....
 [ 1.      0.      0.0286] 1.00000000214 solution= 1.02857142962


In [633]:
# This is to show the rules if Xt=1, Xt+1 = 2; etc 
#
#
from copy import deepcopy

# You can adjust for floating point/round off errors by uncommenting following two lines
#hm.p[0]=1
#hm.p[1]=0

m= len(unique(s1))
XX=[0 for _ in range(m)]
XC=[XX.copy() for _ in range(order)]

for j in it.product(range(m), repeat=order):
    Xt = deepcopy(XC); #.copy()
    for i,c in enumerate(j): 
        Xt[i][c]= 1

    r,p=hm.Predict(Xt)
    
    print(j, p, Xt, r.T, r.flat[0], r.flat[1], r.flat[2])


(0, 0) 1 [[1, 0, 0], [1, 0, 0]] [[ 0.125  0.75   0.125]] 0.125000000702 0.749999993773 0.125000007662
(0, 1) 1 [[1, 0, 0], [0, 1, 0]] [[ 0.125  0.75   0.125]] 0.125000011142 0.749999990293 0.125000000702
(0, 2) 1 [[1, 0, 0], [0, 0, 1]] [[ 0.125  0.75   0.125]] 0.125000003312 0.750000001603 0.124999997222
(1, 0) 2 [[0, 1, 0], [1, 0, 0]] [[ 0.4286  0.1429  0.4286]] 0.428571422527 0.142857150122 0.428571429487
(1, 1) 0 [[0, 1, 0], [0, 1, 0]] [[ 0.4286  0.1429  0.4286]] 0.428571432967 0.142857146642 0.428571422527
(1, 2) 0 [[0, 1, 0], [0, 0, 1]] [[ 0.4286  0.1429  0.4286]] 0.428571425137 0.142857157952 0.428571419047
(2, 0) 0 [[0, 0, 1], [1, 0, 0]] [[ 0.75  0.25  0.  ]] 0.749999986813 0.250000004884 1.04398657807e-08
(2, 1) 0 [[0, 0, 1], [0, 1, 0]] [[ 0.75  0.25  0.  ]] 0.749999997253 0.250000001404 3.47995526025e-09
(2, 2) 0 [[0, 0, 1], [0, 0, 1]] [[ 0.75  0.25  0.  ]] 0.749999989423 0.250000012714 0.0


In [634]:
#This section is to actually do the prediction on the given set: 12/19 Proof
pdt = [s1[order-1]]
print("=>", s1)

for j in range(len(s1) - order):
    Xt = deepcopy(XC); #.copy()
#    for i in range(order):
#        Xt[i][s1[j+i]]=1
    prs=[0 for _ in range(order)]
    for i,c in enumerate(reversed(s1[j:j+order]) ):
        prs[i] = c;
        Xt[i][c]=1
        
    r, predicted = hm.Predict(Xt)

    original  = s1[j+order]
    pdt.append(predicted)
    
    print("{} {}=>{} {} {} {} {}".format(prs, original, predicted,j, Xt, r.T, mm) )
    #if( j==2): break
    
Metrics(s1[order:], pdt[1:])

=> [0, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1]
[0, 0] 1=>1 0 [[1, 0, 0], [1, 0, 0]] [[ 0.125  0.75   0.125]] [1]
[1, 0] 1=>2 1 [[0, 1, 0], [1, 0, 0]] [[ 0.4286  0.1429  0.4286]] [1]
[1, 1] 0=>0 2 [[0, 1, 0], [0, 1, 0]] [[ 0.4286  0.1429  0.4286]] [1]
[0, 1] 2=>1 3 [[1, 0, 0], [0, 1, 0]] [[ 0.125  0.75   0.125]] [1]
[2, 0] 1=>0 4 [[0, 0, 1], [1, 0, 0]] [[ 0.75  0.25  0.  ]] [1]
[1, 2] 0=>0 5 [[0, 1, 0], [0, 0, 1]] [[ 0.4286  0.1429  0.4286]] [1]
[0, 1] 1=>1 6 [[1, 0, 0], [0, 1, 0]] [[ 0.125  0.75   0.125]] [1]
[1, 0] 2=>2 7 [[0, 1, 0], [1, 0, 0]] [[ 0.4286  0.1429  0.4286]] [1]
[2, 1] 0=>0 8 [[0, 0, 1], [0, 1, 0]] [[ 0.75  0.25  0.  ]] [1]
[0, 2] 1=>1 9 [[1, 0, 0], [0, 0, 1]] [[ 0.125  0.75   0.125]] [1]
[1, 0] 2=>2 10 [[0, 1, 0], [1, 0, 0]] [[ 0.4286  0.1429  0.4286]] [1]
[2, 1] 0=>0 11 [[0, 0, 1], [0, 1, 0]] [[ 0.75  0.25  0.  ]] [1]
[0, 2] 1=>1 12 [[1, 0, 0], [0, 0, 1]] [[ 0.125  0.75   0.125]] [1]
[1, 0] 2=>2 13 [[0, 1, 0], [1, 0, 0]] [[ 0.4286  0.1429  0.4286]] [1]

(18, 14, defaultdict(int, {0: 6, 1: 8, 2: 4}))

In [659]:
#Now lets try this on PEF 

numClasses = len(unique(pef))
print( '''# Lets try the Weighted  Markov Chain Approach for PEF............
Lets make sure the numbers are as given in the paper''')

X=[pef]

order=2
m= numClasses
hm= WeightedMarkov(X,m, order)
fS,pS, xHats = hm.Compute(X,order)

print("==>Show the results are as expected when setup right")
c,A,b = hm.PrepareMatrices()
sol = hm.Solve(False)

hm.Dump()
print("LP Setup ...")
hm.DisplayCAb()
soltn = sum(np.array(c) * sol['x'])
params = np.array([_ for _ in sol['x'].T])
print("LP Solved Solutions ....\n", params, sum(params[0:2]), "solution=", soltn)


# Lets try the Weighted  Markov Chain Approach for PEF............
Lets make sure the numbers are as given in the paper
==>Show the results are as expected when setup right


<IPython.core.display.Math object>

LP Setup ...


<IPython.core.display.Math object>

LP Solved Solutions ....
 [ 1.      0.      0.0005] 1.00000000614 solution= 1.0005116202


In [660]:
XX=[0 for _ in range(m)]
XC=[XX.copy() for _ in range(order)]

pdt = [pef[order-1]]
print("=>", pdt[0:60])

for j in range(len(pef) - order):
    Xt = deepcopy(XC); #.copy()
    prs=[0 for _ in range(order)]
    for i,c in enumerate(reversed(pef[j:j+order]) ):
        prs[i] = c;
        Xt[i][c]=1
        
    r, predicted = hm.Predict(Xt)

    original  = pef[j+order]
    pdt.append(predicted)
    
    #print("{} {}=>{} {} {} {} {}".format(prs, original, predicted,j, Xt, r.T, mm) )
    #if( j==2): break
    
Metrics(pef[order:], pdt[1:])

=> [1]
orig=>[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 0, 2, 2, 2, 2, 2, 2]
pred=>[1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 0, 2, 2, 2, 2, 2]
Total 1311, correct 662, acc: 0.50
class: 0 total: 404, correct: 208, acc: 0.5148514851485149
class: 1 total: 513, correct: 234, acc: 0.45614035087719296
class: 2 total: 394, correct: 220, acc: 0.5583756345177665


(1311, 662, defaultdict(int, {0: 404, 1: 513, 2: 394}))

<hr>
## Examples (Other Data Sets)

In [None]:
#Examples from [2] 
Product A: 6 6 6 6 2 6 2 6 2 2 6 2 6 6 2 6 2 4 4 4 5 6 6 1 2 2 6 6 6 2 6 2 6 6 2 6 2 2 6 2 1 2 2 6 6 6 2 1 2 6 2 6 6 2 2 6 2 2 2 6 2 6 2 2 2 2 2 6 2 2 6 6 6 6 1 2 2 6 2 2 2 2 6 2 2 2 2 3 3 2 3266662626626266266223433131216166166262622266162612162622226616622622234446461661666616222666626 6226262226222666632262222226262226226626662223334166166161666616662122222236666626
Product B: 1 6 6 1 6 1 1 1 1 1 1 6 6 6 1 2 1 6 6 1 1 1 6 6 2 1 6 6 1 1 1 6 1 2 1 6 2 2 2 2 2 6 1 6 6 1 2 1 6 6 6 1 1 1 6 6 1 1 1 1 6 1 1 2 1 6 1 6 1 1 6 2 6 2 6 6 6 3 6 6 1 6 6 2 2 2 3 2 2 6 6 6116266262661366111223226222161611621112216111126111161612161661612222332226666211611161616161166 211661126266612616111161611661661616611662222222226666166616616611613335166666666
Product C: 6 6 6 6 6 6 6 2 6 6 6 6 6 6 6 2 6 6 6 6 2 6 6 6 2 2 6 6 6 6 6 6 6 1 6 2 6 6 6 6 6 6 6 6 2 6 6 1 2 6 1 6 6 1 6 2 6 6 6 6 6 6 6 2 6 6 6 2 6 6 1 6 6 6 6 6 6 6 3 3 6 3 2 1 2 2 1 6 6 1 6166666616661666666666662666666662266261266626626626162621266226262262666222662662261212662266122 1626221156361661226162661626266616166222123616161616661166666166616116666666616616
Product D: 6 2 2 2 2 3 3 4 4 4 5 4 3 3 6 2 6 6 6 3 4 4 3 3 3 3 3 2 6 6 3 4 4 4 4 3 4 2 6 2 2 6 2 2 6 6 3 4 5 4 4 6 3 6 6 6 2 6 2 6 6 2 2 6 4 4 5 4 3 4 3 4 4 6 2 6 6 2 2 6 2 6 6 2 6 6 2 6 6 2 6263555444362662626226266264444446366262626266222222222333554533362662262222623223632234444554466 2626222222255445526266262622334454443436262222222222234444544432226222626262222232
Product E: 6 2 2 2 2 3 3 4 4 4 5 4 3 3 6 2 6 6 2 3 4 4 3 4 4 3 3 2 2 6 3 4 4 4 4 3 4 2 3 2 2 6 3 3 6 6 3 4 5 4 5 3 3 2 6 6 2 6 2 6 6 2 2 6 4 4 4 4 4 4 5 4 4 6 2 6 6 2 2 6 2 6 6 2 6 6 2 6 6 2 6 2634444444626626266662622644444463362226262222222222223645555246626622622226232236322344445543362 622263222255444436266262622334454444436262226222222234444544432226662626262222222

In [None]:
#Examples from [1]
price='''
5 5 5 5 4 5 3 5 3 3 4 2 5 5 3 1 1 1 3 3 4 1 5 1 1 3 3 2 5 1 5 1 5 5 5 5 2 1 4 1 1 1 2 4 5 5 1 4 2 4 1 3 4 2 2 5 2 2 5 5 
2 5 4 4 4 2 2 5 2 2 5 5 5 5 3 2 2 5 4 5 2 4 5 5 4 1 1 1 2 2 3 2 4 5 5 5 2 5 2 5 5 2 4 2 5 5 2 5 5 1 2 3 4 3 3 1 3 1 4 3
5 4 5 5 4 5 5 2 5 2 5 2 2 3 5 5 3 5 2 5 4 2 1 5 2 5 2 2 2 2 5 5 4 5 5 2 2 5 2 2 2 3 4 4 4 5 4 5 1 5 5 1 3 5 5 5 1 5 2 2
2 5 5 5 5 2 4 5 2 2 5 2 5 2 2 2 4 2 2 2 4 5 5 5 3 2 2 5 2 5 4 4 4 5 3 3 5 3 1 1 4 2 2 5 5 2 5 5 5 2 5 5 3 5 5 4 1 5 5 1
5 5 1 5 1 5 5 5 5 1 5 5 5 2 1 2 5 2 5 5 2 3 5 5 5 5 5 2 5''' 

sales='''
5 1 1 1 1 2 2 3 3 3 4 3 2 2 5 1 5 5 1 2 3 3 2 3 3 2 2 1 1 5 2 3 3 3 3 2 3 1 2 1 1 5 2 2 5 5 2 3 4 3 4 2 2 1 5 5 1 5 1 5
5 1 1 5 3 3 3 3 3 3 4 3 3 5 1 5 5 1 1 5 1 5 5 1 5 5 1 5 5 1 5 1 5 2 3 3 3 3 3 3 3 5 1 5 5 1 5 1 5 5 5 5 1 5 1 1 5 3 3 3
3 3 3 5 2 2 5 1 1 1 5 1 5 1 1 1 1 1 1 1 1 1 1 1 1 2 5 3 4 4 4 4 1 3 5 5 1 5 5 1 1 5 1 1 1 1 5 1 2 1 1 2 5 2 1 1 2 3 3 3
3 4 4 3 2 2 5 1 5 1 1 1 5 2 1 1 1 1 4 4 3 3 3 3 2 5 1 5 5 1 5 1 5 1 1 2 2 3 3 4 3 3 1 1 1 2 1 1 5 1 1 1 5 1 1 1 1 1 1 1
2 3 3 1 1 4 3 1 3 2 1 1 1 1 1 5 5 1 5 1 5 1 1 1 1 1 1 1'''


p = [int(c)-1 for c in price.replace('\n', ' ').strip().split()]
s = [int(c)-1 for c in sales.replace('\n', ' ').strip().split()]

numClasses = 5
X=[p[:-6]]

#fs, ps, xHats = ComputeMMC(X, numClasses = numClasses)
#Mdisplay(fs, ps, xHats)

## References:

1. <a src="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0ahUKEwjM66WC3JbXAhVil1QKHTj1CakQFggtMAA&url=http%3A%2F%2Fwww.ccsenet.org%2Fjournal%2Findex.php%2Fmas%2Farticle%2FviewFile%2F6040%2F4874_1_1_1&usg=AOvVaw0fZh5XmtYpF14lVZRfr6aZ">  Application of Markov Chains to Analyze and Predict the Time Series </a>

2. A multivariate Markov chain model for categorical data sequences and its applications in demand predictions
   Wai‚ÄêKi Ching  Eric S. Fung  Michael K. Ng
    
3. Higher-order multivariate Markov chains and their applications; Wai-Ki Ching , Michael K.Ng , Eric S.Fungb

4. Higher-Order Markov Chain Models for Categorical Data Sequences* Wai Ki Ching, Eric S. Fung, Michael K. Ng [Implemented here as WeightedMarkov]

    