In [34]:
import math
import time
import pandas as pd
import numpy as np

In [35]:
with open(r"C:\Users\SOUMEN\Desktop\ISI,KOLKATA\3rd sem\dissertation\entropy calculation using dgim in response matrix\response_matrix.npy", 'rb') as f:
        response_matrix = np.load(f)
print(response_matrix)

[[ 1  0  0 ... -1 -1 -1]
 [ 0  0  0 ...  1  1 -1]
 [ 0  0  1 ... -1 -1 -1]
 ...
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]]


In [36]:
class DGIM:
    def __init__(self, data_series, N):
        """
        Implementation of DGIM Algorithm

        Args:
            data_series (pd.Series): Input Pandas Series containing binary data (e.g., 0's and 1's).
            N (int): Window size
        """
        self.data_series = data_series
        self.N = N
        self.ts = 0
        self.buckets = []
        self._init_buckets()
        
    def _init_buckets(self):
        """
        Create the buckets according to the window size.
        """
        for i in range(int(math.log(self.N, 2))+1):
            self.buckets.append([])
    
    def _old_bucket(self, k=0):
        """
        Find the oldest bucket according to k.

        Args:
            k (int, optional): Defaults to N.

        Returns:
            tuple: old bucket number size and old bucket end-timestamp 
        """
        k = self.N if k == 0 else k
        obi = 0
        obt = 0
        for i in range(len(self.buckets)):
            for ets in self.buckets[i]:
                if ets >= self.ts - k:
                    obi = i
                    obt = ets
                else:
                    return obi, obt
        return obi, obt
    
    def _update(self):
        """
        Update the buckets based on the algorithm constraints.
        If we have more than 2 buckets of each size, merge them.
        """
        for i in range(len(self.buckets)):
            if len(self.buckets[i]) > 2:
                self.buckets[i].pop()
                tmp = self.buckets[i].pop()
                if i != len(self.buckets) - 1:
                    self.buckets[i+1].insert(0, tmp)
                    
    def count(self, k=0):
        """
        Count the ones in the last k bits.

        Args:
            k (int, optional): Defaults to N.

        Returns:
            int: count
        """
        cnt = 0
        obi, obt = self._old_bucket(k)
        for i in range(len(self.buckets)):
            if i > obi:
                break
            for ets in self.buckets[i]:
                if ets > obt:
                    cnt += 2**i
                elif ets == obt:
                    cnt += int(0.5 * 2**i)
        return cnt+1
    
    def run(self):
        """
        Iterate through the data_series and process.
        """
        for x in self.data_series:
            self.ts += 1
            obi, obt = self._old_bucket()
            if obt is not None and obt == self.ts - self.N:
                if obt in self.buckets[obi]:
                    self.buckets[obi].remove(obt)
            if x == 1:
                self.buckets[0].insert(0, self.ts)
                self._update()
            elif x == -1:
                # Ignore -1 values and do not update the buckets.
                pass

In [37]:
# Example usage:
if __name__ == "__main__":
    lst =[]
    for i in range(response_matrix.shape[1]):
        INPUT_SERIES = pd.Series(response_matrix[ : ,i])
    
    
        # INPUT_SERIES = pd.Series([1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1])
        WINDOW_SIZE = len(INPUT_SERIES)

        start = time.time()

        dgim = DGIM(INPUT_SERIES, WINDOW_SIZE)
        dgim.run()
        result_window = dgim.count(WINDOW_SIZE)
 
        time1 = time.time() - start

        print(f"The number of 1 bits in the col{i} window with size {WINDOW_SIZE} is {result_window} ")
        lst.append(result_window)
#         print("The running time of DGIM with window size", WINDOW_SIZE, ":", time1)
    

The number of 1 bits in the col0 window with size 64 is 5 
The number of 1 bits in the col1 window with size 64 is 2 
The number of 1 bits in the col2 window with size 64 is 1 
The number of 1 bits in the col3 window with size 64 is 6 
The number of 1 bits in the col4 window with size 64 is 6 
The number of 1 bits in the col5 window with size 64 is 5 
The number of 1 bits in the col6 window with size 64 is 2 
The number of 1 bits in the col7 window with size 64 is 3 
The number of 1 bits in the col8 window with size 64 is 6 
The number of 1 bits in the col9 window with size 64 is 4 
The number of 1 bits in the col10 window with size 64 is 8 
The number of 1 bits in the col11 window with size 64 is 4 
The number of 1 bits in the col12 window with size 64 is 7 
The number of 1 bits in the col13 window with size 64 is 6 
The number of 1 bits in the col14 window with size 64 is 4 
The number of 1 bits in the col15 window with size 64 is 1 
The number of 1 bits in the col16 window with size

In [38]:
lst1 = []
for i in range((response_matrix.shape[1])):
    count = 0
    for j in range((response_matrix.shape[0])):
        if response_matrix[j][i] == 1:
            count += 1
    print(f"The number of one in col{i} is {count}")
    lst1.append(count)

The number of one in col0 is 5
The number of one in col1 is 2
The number of one in col2 is 1
The number of one in col3 is 6
The number of one in col4 is 6
The number of one in col5 is 5
The number of one in col6 is 2
The number of one in col7 is 3
The number of one in col8 is 6
The number of one in col9 is 4
The number of one in col10 is 9
The number of one in col11 is 4
The number of one in col12 is 8
The number of one in col13 is 7
The number of one in col14 is 4
The number of one in col15 is 0
The number of one in col16 is 2
The number of one in col17 is 2
The number of one in col18 is 4
The number of one in col19 is 3
The number of one in col20 is 7
The number of one in col21 is 6
The number of one in col22 is 3
The number of one in col23 is 9
The number of one in col24 is 5
The number of one in col25 is 0
The number of one in col26 is 2
The number of one in col27 is 1
The number of one in col28 is 1
The number of one in col29 is 3
The number of one in col30 is 2
The number of one 

In [39]:
cnt = 0
for i in range((response_matrix.shape[1])):
    if lst[i] == lst1[i]:
        cnt += 1

accuracy = (cnt/response_matrix.shape[1])*100
print(accuracy)

83.23185011709602
