In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import transbigdata as tbd

In [2]:
isinstance(pd.DataFrame([1,2,3]),pd.DataFrame)

True

In [3]:
tbd.cal_entropy_rate([1,2,3,1,2,1])

0.5283208335737187

In [110]:
def cal_entropy(sequence):
    '''
    Calculate entropy.

    Parameters
    ----------------
    sequence : List,DataFrame,Series
        sequence data

    Returns
    ----------------
    entropy : Number
    '''
    sequence = pd.DataFrame(sequence)
    r_1 = sequence[0].value_counts().reset_index()
    r_1[0] /= r_1[0].sum()
    entropy = -(r_1[0]*np.log(r_1[0])/np.log(2)).sum()
    return entropy

def cal_entropy_rate(sequence):
    '''
    Calculate entropy rate.
    Reference: Goulet-Langlois, G., Koutsopoulos, H. N., Zhao, Z., & Zhao, J. (2017). Measuring regularity of individual travel patterns. IEEE Transactions on Intelligent Transportation Systems, 19(5), 1583-1592.
    
    Parameters
    ----------------
    sequence : List,DataFrame,Series
        sequence data

    Returns
    ----------------
    entropy_rate : Number
    '''
    sequence = pd.DataFrame(sequence,columns = ['key'])
    #对item编号排序
    sequence = sequence.reindex().reset_index()
    sequence_item = sequence['key'].drop_duplicates().reset_index().rename(columns = {'index':'Id'})
    sequence = pd.merge(sequence,sequence_item).sort_values('index')
    #序列
    sequence = list(sequence['Id'].astype(str))
    #BWT
    sequences = []
    for i in range(len(sequence)):
        sequence_new_1 = sequence[0:i]
        sequence_new_2 = sequence[i:]
        sequence_new = ','.join(sequence_new_2+sequence_new_1)
        sequences.append(sequence_new)
    sequences = sorted(sequences)
    sorted_rotations = [i.split(',')[-1] for i in sequences]

    #对序列分割为多个n**0.5长度的段
    sorted_rotations = pd.DataFrame(sorted_rotations)
    n = len(sorted_rotations)
    sorted_rotations['group'] = range(n)
    sorted_rotations['group'] /= n**0.5
    sorted_rotations['group'] = sorted_rotations['group'].astype(int)
    entropy_rate = sorted_rotations.groupby(['group']).apply(lambda r:cal_entropy(r[0])).mean()
    return entropy_rate