# Spatial Dynamics: Markov based methods

Markov chain's assumption is that the observations are on the rows of the input and the different points in time on the columns.

In [1]:
import pysal
import numpy as np

# 3 states (a,b,c) and 5 different pixels at three different points in time.
# So the first pixel was in class ‘b’ in period 1, class ‘a’ in period 2, and class ‘c’ in period 3.
c = np.array([['b','a','c'],['c','c','a'],['c','b','c'],['a','a','b'],['a','b','c']])
c.shape, c



((5, 3), array([['b', 'a', 'c'],
        ['c', 'c', 'a'],
        ['c', 'b', 'c'],
        ['a', 'a', 'b'],
        ['a', 'b', 'c']], dtype='<U1'))

In [2]:
m = pysal.Markov(c)
m.classes  # array (k, 1), all different classes (bins) of the matrix.

array(['a', 'b', 'c'], dtype='<U1')

In [3]:
m.transitions  # matrix (k, k), count of transitions between each state i and j.

array([[1., 2., 1.],
       [1., 0., 2.],
       [1., 1., 1.]])

In [4]:
m.p  # matrix (k, k), transition probability matrix.

matrix([[0.25      , 0.5       , 0.25      ],
        [0.33333333, 0.        , 0.66666667],
        [0.33333333, 0.33333333, 0.33333333]])

In [5]:
m.steady_state  # matrix (k, 1), ergodic distribution.

matrix([[0.30769231],
        [0.28846154],
        [0.40384615]])

In [6]:
import geopandas as gpd
import pysal as ps
import matplotlib.pyplot as plt
import pandas as pd
import random
import numpy as np
import datetime as dt
import glob
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pylab import figure, scatter, show
from matplotlib import colors
import csv
import requests
import zipfile, urllib, os
from urllib.request import Request,urlopen, urlretrieve
import urllib

import warnings
warnings.filterwarnings('ignore')

In [7]:
allFiles = glob.glob(r"taxi_data/*.csv")
allFiles

['taxi_data\\yellow_tripdata_2017-03.csv',
 'taxi_data\\yellow_tripdata_2017-06.csv',
 'taxi_data\\yellow_tripdata_2017-11.csv']

In [8]:

frame = pd.DataFrame()
list_ = []

for file_ in allFiles:
    num_lines = sum(1 for l in open(file_))
   

    df = pd.read_csv(file_, header=0 ).sample(frac=0.01)
    list_.append(df)
    
df = pd.concat(list_)

In [9]:
df_clean = df[(df['passenger_count'] < 10) & 
                        (df['passenger_count'] > 0) & 
                        (df['extra'] >= 0) &
                        (df['extra'] <= 1) &
                        (df['RatecodeID'] < 7) &
                        (df['mta_tax'] >= 0) &
                        (df['trip_distance'] > 0) &
                        (df['tip_amount'] >= 0) &
                        (df['tolls_amount'] >= 0) &
                        (df['improvement_surcharge'] > 0) &
                        (df['total_amount'] > 0)]

df_clean = df_clean[(df_clean['payment_type'] == 1)] # only credit card payment

df_clean['tpep_pickup_datetime'] = df_clean.tpep_pickup_datetime.apply(
                                    lambda x:dt.datetime.strptime(x,"%Y-%m-%d %H:%M:%S"))


df_clean['pickup_month'] = df_clean.tpep_pickup_datetime.apply(lambda x: x.month)


In [10]:
taxi_zone_tip_df = pd.pivot_table(df_clean, values='tip_amount', index=['PULocationID'], columns=['pickup_month'], aggfunc=np.mean)
taxi_zone_tip_df['PULocation'] = taxi_zone_tip_df.index
taxi_zone_tip_df.shape

(195, 5)

In [11]:
taxi_zones = gpd.read_file('taxi_zones_tip.shp')

In [12]:
taxi_zone_tip = pd.merge(taxi_zones, taxi_zone_tip_df, on='PULocation', how='left')

In [13]:
taxi_zone_tip.shape, taxi_zones.shape

((227, 17), (227, 13))

In [14]:
taxi_zone_tip.to_csv('taxi_zone_tip.csv', float_format='%.3f', index=False, header=True, sep=",", decimal=".")


## Classic Markov

In [15]:
zone_tip = taxi_zone_tip[[3,6,11]]
#zone_tip = zone_tip.dropna()

zone_tip = np.array(zone_tip)
#zone_tip.index = taxi_zone_tip['zone']

In [16]:
zone_tip_q = np.array([ps.Quantiles(y).yb for y in zone_tip]).transpose()
zone_tip_q.shape

(3, 227)

In [17]:
markov_tip = ps.Markov(zone_tip_q)

In [18]:
markov_tip.classes

array([0, 2, 4])

In [19]:
markov_tip.transitions

array([[270.,  80.,  70.],
       [ 75.,  28.,  26.],
       [ 75.,  21.,  33.]])

In [20]:
markov_tip.p

matrix([[0.64285714, 0.19047619, 0.16666667],
        [0.58139535, 0.21705426, 0.20155039],
        [0.58139535, 0.1627907 , 0.25581395]])

In [21]:
markov_tip.steady_state

matrix([[0.61946903],
        [0.19026549],
        [0.19026549]])

In [22]:
ps.ergodic.fmpt(markov_tip.p)

matrix([[1.61428571, 5.39344262, 5.76393443],
        [1.72      , 5.25581395, 5.55737705],
        [1.72      , 5.55737705, 5.25581395]])

## Spatial Markov

In [23]:
W = ps.queen_from_shapefile("taxi_zones_tip.shp")
W.transform = 'r'
W.sparse



<227x227 sparse matrix of type '<class 'numpy.float64'>'
	with 1118 stored elements in Compressed Sparse Row format>

In [24]:
f = pd.read_csv("taxi_zone_tip.csv",sep=',')
pci = np.array([f[str(y)] for y in [3,6,11]])

pci.shape

(3, 227)

In [25]:
pci = pci.transpose()

In [26]:
pci = np.nan_to_num(pci)
rpci = pci / (pci.mean(axis = 0))
rpci.shape


(227, 3)

In [27]:
sm = ps.Spatial_Markov(rpci, W, k = 5)

The global transition probability matrix for relative tip amount:

In [28]:
sm.p

matrix([[0.78807947, 0.0794702 , 0.0397351 , 0.01986755, 0.07284768],
        [0.32258065, 0.16129032, 0.16129032, 0.09677419, 0.25806452],
        [0.11111111, 0.03333333, 0.58888889, 0.21111111, 0.05555556],
        [0.04444444, 0.03333333, 0.24444444, 0.54444444, 0.13333333],
        [0.13043478, 0.04347826, 0.04347826, 0.17391304, 0.60869565]])

In [29]:
for p in sm.P:
    print(p)

[[0.85074627 0.07462687 0.         0.         0.07462687]
 [0.66666667 0.         0.         0.16666667 0.16666667]
 [0.75       0.         0.         0.25       0.        ]
 [0.5        0.         0.         0.         0.5       ]
 [0.35714286 0.07142857 0.14285714 0.07142857 0.35714286]]
[[0.75       0.06818182 0.06818182 0.02272727 0.09090909]
 [0.4        0.         0.2        0.2        0.2       ]
 [0.14285714 0.04761905 0.61904762 0.0952381  0.0952381 ]
 [0.22222222 0.11111111 0.22222222 0.11111111 0.33333333]
 [0.4        0.         0.1        0.         0.5       ]]
[[0.8        0.         0.         0.         0.2       ]
 [0.33333333 0.66666667 0.         0.         0.        ]
 [0.         0.         0.75555556 0.24444444 0.        ]
 [0.03225806 0.03225806 0.32258065 0.61290323 0.        ]
 [0.         0.         0.         0.33333333 0.66666667]]
[[0.75       0.0625     0.1875     0.         0.        ]
 [0.         0.14285714 0.42857143 0.14285714 0.28571429]
 [0.1      

The different steady state distributions implied by these different transition probabilities:

In [30]:
sm.S

array([[0.76644423, 0.06613441, 0.01787417, 0.02442803, 0.12511916],
       [0.52568354, 0.05020878, 0.19518369, 0.04565029, 0.18327371],
       [0.11      , 0.033     , 0.45      , 0.341     , 0.066     ],
       [0.13106114, 0.06794624, 0.26440618, 0.41641778, 0.12016866],
       [0.16002919, 0.05143634, 0.03342011, 0.21592455, 0.53918981]])

In [31]:
for f in sm.F:
    print(f)

[[ 1.30472637 13.84324324 56.15333333 43.03414634 11.65333333]
 [ 1.75       15.12072072 54.40666667 35.82926829  9.90666667]
 [ 1.54166667 15.14864865 55.94666667 33.27560976 11.44666667]
 [ 2.16666667 15.06486486 51.32666667 40.93658537  6.82666667]
 [ 2.33333333 14.28648649 44.5        36.83902439  7.99238095]]
[[ 1.90228517 18.34511628 11.67336683 24.53775322  8.99898063]
 [ 3.30023456 19.91683721  9.84221106 20.13075506  7.60652396]
 [ 4.73025801 19.18232558  5.1233788  20.79005525  8.56727829]
 [ 3.82486317 18.19906977  9.52562814 21.9056681   6.46738022]
 [ 2.9460516  20.51255814 11.33869347 25.78821363  5.45632008]]
[[ 9.09090909 87.90909091 13.          8.          5.        ]
 [ 3.         30.3030303  16.         11.          8.        ]
 [41.54545455 84.          2.22222222  4.09090909 46.54545455]
 [37.45454545 79.90909091  5.          2.93255132 42.45454545]
 [40.45454545 82.90909091  8.          3.         15.15151515]]
[[ 7.63002681 15.91855204  4.99003322  7.85170341 18

## LISA Markov

In [32]:
lm = ps.LISA_Markov(rpci, W)
lm.classes

array([1, 2, 3, 4])

In [33]:
lm.transitions

array([[ 53.,   3.,   1.,   7.],
       [ 14.,  31.,  13.,   4.],
       [ 14.,  31., 209.,  26.],
       [ 12.,   6.,  15.,  15.]])

The estimated transition probability matrix is:

In [34]:
lm.p

matrix([[0.828125  , 0.046875  , 0.015625  , 0.109375  ],
        [0.22580645, 0.5       , 0.20967742, 0.06451613],
        [0.05      , 0.11071429, 0.74642857, 0.09285714],
        [0.25      , 0.125     , 0.3125    , 0.3125    ]])

The implied long run steady state distribution of the chain is:

In [35]:
lm.steady_state

matrix([[0.44563426],
        [0.13759066],
        [0.2933462 ],
        [0.12342888]])

Finally the first mean passage time for the LISAs is:

In [36]:
ps.ergodic.fmpt(lm.p)

matrix([[ 2.24399264, 14.21740113, 13.35088559,  9.73865557],
        [ 6.91112071,  7.26793512,  9.05600766, 10.84241321],
        [ 9.54235091, 11.00474576,  3.40894139, 10.59797166],
        [ 7.04854509, 11.62666667,  7.95595979,  8.10183169]])