In [53]:
import numpy as np
import pandas as pd
import math

df = pd.read_csv("data/App_monthly_enagement.csv", index_col=False)

# Convert each row to a list and store in samples
samples = []
for index, row in df.iterrows():
    sample = row.values.tolist()
    samples.append(sample)

def calculate_shannon_entropy(data):
    # Count the frequency of each month vlaue
    frequency = {}
    for variable in data:
        if variable in frequency:
            frequency[variable] += 1
        else:
            frequency[variable] = 1
    
    # Calculate the Shannon entropy
    entropy = 0.0
    total_count = len(data)
    for count in frequency.values():
        probability = count / total_count
        entropy -= probability * math.log2(probability)
    
    return entropy

# Find percentiles for all elements in all list
def find_percentiles(samples):
    flattened_samples = np.array(samples).flatten()
    p33 = np.percentile(flattened_samples, 33)
    p66 = np.percentile(flattened_samples, 66)
    return p33, p66

p33, p66 = find_percentiles(samples)

def bin_elements(sample, p33, p66):
    binned_sample = []
    for element in sample:
        if element == 0:
            binned_sample.append(0)
        elif element < p33:
            binned_sample.append(1)
        elif element >= p33 and element <= p66:
            binned_sample.append(2)
        else:
            binned_sample.append(3)
    return binned_sample

# Bin the elements in all samples
binned_samples = []
for sample in samples:
    binned_sample = bin_elements(sample, p33, p66)
    binned_samples.append(binned_sample)

# Calculate Shannon entropy for binned samples
ent_list_binned=[]
for data in binned_samples:
    entropy = calculate_shannon_entropy(data)
    ent_list_binned.append(entropy)
    
# Convert the list to a numpy array
ent_array = np.array(ent_list_binned)

# Calculate the 33% and 66% thresholds
threshold_33 = np.percentile(ent_array, 33)
threshold_66 = np.percentile(ent_array, 66)

#Segment the users’ engagement stability based on the thresholds
segmented_list = []
for ent in ent_list_binned:
    if ent < threshold_33:
        segmented_list.append("stable")
    elif ent < threshold_66:
        segmented_list.append("moderate")
    else:
        segmented_list.append("variable")