In [30]:
import polars as pl
import numpy as np
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns

class EventHMM:
    """
    Hidden Markov Model for analyzing app event sequences.

    States represent hidden user behavior patterns.
    Observations are the actual events (login, click, purchase, etc.)
    """

    def __init__(self, n_states: int = 3, state_names: Optional[List[str]] = None):
        """
        Initialize HMM model.

        Args:
            n_states: Number of hidden states
            state_names: Optional names for states (e.g., ['browsing', 'engaged', 'converting'])
        """
        self.n_states = n_states
        self.state_names = state_names or [f"State_{i}" for i in range(n_states)]

        # Model parameters
        self.transition_matrix = None  # A[i,j] = P(state_j | state_i)
        self.emission_matrix = None    # B[i,j] = P(event_j | state_i)
        self.initial_probs = None      # π[i] = P(state_i at t=0)

        # Mappings
        self.event_to_idx = {}
        self.idx_to_event = {}
        self.n_events = 0

        # Training data storage
        self.sequences = []
        self.user_sequences = {}

    def _prepare_data(self, df: pl.DataFrame) -> Dict[str, List[int]]:
        """
        Convert DataFrame to sequences of event indices grouped by user.
        """
        # Get unique events and create mappings
        unique_events = df.select("event").unique().to_series().to_list()
        self.event_to_idx = {event: idx for idx, event in enumerate(unique_events)}
        self.idx_to_event = {idx: event for event, idx in self.event_to_idx.items()}
        self.n_events = len(unique_events)

        print(f"Found {self.n_events} unique events: {unique_events}")

        # Group by user and create sequences
        user_sequences = {}

        # Sort by user and timestamp
        df_sorted = df.sort(["userid", "timestamp"])

        # Group by user
        for user_data in df_sorted.group_by("userid"):
            userid = user_data[0]
            events = user_data[1].select("event").to_series().to_list()

            # Convert events to indices
            event_sequence = [self.event_to_idx[event] for event in events]

            # Only keep sequences with more than 1 event
            if len(event_sequence) > 1:
                user_sequences[userid] = event_sequence

        print(f"Prepared sequences for {len(user_sequences)} users")
        return user_sequences

    def _initialize_parameters(self):
        """
        Initialize HMM parameters randomly.
        """
        # Random initialization with normalization
        self.transition_matrix = np.random.rand(self.n_states, self.n_states)
        self.transition_matrix = self.transition_matrix / self.transition_matrix.sum(axis=1, keepdims=True)

        self.emission_matrix = np.random.rand(self.n_states, self.n_events)
        self.emission_matrix = self.emission_matrix / self.emission_matrix.sum(axis=1, keepdims=True)

        self.initial_probs = np.random.rand(self.n_states)
        self.initial_probs = self.initial_probs / self.initial_probs.sum()

        print("Initialized random parameters")

    def _forward_algorithm(self, sequence: List[int]) -> Tuple[np.ndarray, float]:
        """
        Forward algorithm for computing forward probabilities.
        """
        T = len(sequence)
        alpha = np.zeros((T, self.n_states))

        # Initialization
        alpha[0] = self.initial_probs * self.emission_matrix[:, sequence[0]]

        # Forward pass
        for t in range(1, T):
            for j in range(self.n_states):
                alpha[t, j] = np.sum(alpha[t-1] * self.transition_matrix[:, j]) * \
                             self.emission_matrix[j, sequence[t]]

        # Total probability
        log_likelihood = np.log(np.sum(alpha[T-1]) + 1e-10)

        return alpha, log_likelihood

    def _backward_algorithm(self, sequence: List[int]) -> np.ndarray:
        """
        Backward algorithm for computing backward probabilities.
        """
        T = len(sequence)
        beta = np.zeros((T, self.n_states))

        # Initialization
        beta[T-1] = 1.0

        # Backward pass
        for t in range(T-2, -1, -1):
            for i in range(self.n_states):
                beta[t, i] = np.sum(self.transition_matrix[i] *
                                  self.emission_matrix[:, sequence[t+1]] *
                                  beta[t+1])

        return beta

    def _baum_welch_step(self, sequences: List[List[int]]) -> float:
        """
        One step of Baum-Welch algorithm (EM step).
        """
        # Initialize accumulators
        gamma_sum = np.zeros(self.n_states)
        xi_sum = np.zeros((self.n_states, self.n_states))
        gamma_obs_sum = np.zeros((self.n_states, self.n_events))

        total_log_likelihood = 0.0

        for sequence in sequences:
            if len(sequence) < 2:
                continue

            T = len(sequence)

            # Forward-backward
            alpha, log_likelihood = self._forward_algorithm(sequence)
            beta = self._backward_algorithm(sequence)

            total_log_likelihood += log_likelihood

            # Compute gamma (state probabilities)
            gamma = alpha * beta
            gamma = gamma / (np.sum(gamma, axis=1, keepdims=True) + 1e-10)

            # Compute xi (transition probabilities)
            xi = np.zeros((T-1, self.n_states, self.n_states))
            for t in range(T-1):
                for i in range(self.n_states):
                    for j in range(self.n_states):
                        xi[t, i, j] = alpha[t, i] * self.transition_matrix[i, j] * \
                                     self.emission_matrix[j, sequence[t+1]] * beta[t+1, j]

                # Normalize
                xi_sum_t = np.sum(xi[t])
                if xi_sum_t > 0:
                    xi[t] /= xi_sum_t

            # Accumulate statistics
            gamma_sum += np.sum(gamma, axis=0)
            xi_sum += np.sum(xi, axis=0)

            # Emission accumulation
            for t in range(T):
                gamma_obs_sum[:, sequence[t]] += gamma[t]

        # M-step: Update parameters
        # Initial probabilities (from first time step gammas)
        initial_gamma_sum = np.zeros(self.n_states)
        for sequence in sequences:
            if len(sequence) >= 1:
                alpha, _ = self._forward_algorithm(sequence)
                beta = self._backward_algorithm(sequence)
                gamma_0 = alpha[0] * beta[0]
                gamma_0 /= (np.sum(gamma_0) + 1e-10)
                initial_gamma_sum += gamma_0

        self.initial_probs = initial_gamma_sum / (np.sum(initial_gamma_sum) + 1e-10)

        # Transition matrix
        for i in range(self.n_states):
            row_sum = np.sum(xi_sum[i])
            if row_sum > 0:
                self.transition_matrix[i] = xi_sum[i] / row_sum
            else:
                self.transition_matrix[i] = 1.0 / self.n_states

        # Emission matrix
        for i in range(self.n_states):
            row_sum = gamma_sum[i]
            if row_sum > 0:
                self.emission_matrix[i] = gamma_obs_sum[i] / row_sum
            else:
                self.emission_matrix[i] = 1.0 / self.n_events

        return total_log_likelihood

    def fit(self, df: pl.DataFrame, max_iterations: int = 100, tolerance: float = 1e-4):
        """
        Train the HMM using Baum-Welch algorithm.
        """
        print("Starting HMM training...")

        # Prepare data
        self.user_sequences = self._prepare_data(df)
        sequences = list(self.user_sequences.values())

        # Initialize parameters
        self._initialize_parameters()

        # Training loop
        prev_log_likelihood = float('-inf')

        for iteration in range(max_iterations):
            log_likelihood = self._baum_welch_step(sequences)

            print(f"Iteration {iteration + 1}: Log-likelihood = {log_likelihood:.4f}")

            # Check convergence
            if abs(log_likelihood - prev_log_likelihood) < tolerance:
                print(f"Converged after {iteration + 1} iterations")
                break

            prev_log_likelihood = log_likelihood

        print("Training completed!")
        self._print_model_summary()

    def predict_sequence(self, sequence: List[str]) -> List[str]:
        """
        Predict most likely state sequence using Viterbi algorithm.
        """
        # Convert events to indices
        obs_sequence = [self.event_to_idx.get(event, 0) for event in sequence]
        T = len(obs_sequence)

        # Viterbi tables
        delta = np.zeros((T, self.n_states))
        psi = np.zeros((T, self.n_states), dtype=int)

        # Initialization
        delta[0] = self.initial_probs * self.emission_matrix[:, obs_sequence[0]]

        # Forward pass
        for t in range(1, T):
            for j in range(self.n_states):
                trans_probs = delta[t-1] * self.transition_matrix[:, j]
                psi[t, j] = np.argmax(trans_probs)
                delta[t, j] = np.max(trans_probs) * self.emission_matrix[j, obs_sequence[t]]

        # Backward pass - find best path
        states = np.zeros(T, dtype=int)
        states[T-1] = np.argmax(delta[T-1])

        for t in range(T-2, -1, -1):
            states[t] = psi[t+1, states[t+1]]

        # Convert to state names
        return [self.state_names[state] for state in states]

    def get_user_behavior_pattern(self, userid: str) -> Optional[List[str]]:
        """
        Get the predicted behavior pattern for a specific user.
        """
        if userid not in self.user_sequences:
            return None

        sequence = self.user_sequences[userid]
        events = [self.idx_to_event[idx] for idx in sequence]
        return self.predict_sequence(events)

    def _print_model_summary(self):
        """
        Print a summary of the learned model.
        """
        print("\n" + "="*50)
        print("HMM MODEL SUMMARY")
        print("="*50)

        print(f"\nNumber of states: {self.n_states}")
        print(f"Number of events: {self.n_events}")

        print(f"\nInitial State Probabilities:")
        for i, prob in enumerate(self.initial_probs):
            print(f"  {self.state_names[i]}: {prob:.3f}")

        print(f"\nTransition Matrix:")
        print("From \\ To    ", end="")
        for state in self.state_names:
            print(f"{state:>10}", end="")
        print()

        for i, state in enumerate(self.state_names):
            print(f"{state:>10}   ", end="")
            for j in range(self.n_states):
                print(f"{self.transition_matrix[i,j]:>10.3f}", end="")
            print()

        print(f"\nEmission Probabilities (Top 3 events per state):")
        for i, state in enumerate(self.state_names):
            print(f"\n{state}:")
            # Get top 3 events for this state
            top_events = np.argsort(self.emission_matrix[i])[-3:][::-1]
            for event_idx in top_events:
                event_name = self.idx_to_event[event_idx]
                prob = self.emission_matrix[i, event_idx]
                print(f"  {event_name}: {prob:.3f}")

    def plot_model(self):
        """
        Visualize the HMM model.
        """
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

        # 1. Initial probabilities
        ax1.bar(self.state_names, self.initial_probs)
        ax1.set_title('Initial State Probabilities')
        ax1.set_ylabel('Probability')

        # 2. Transition matrix heatmap
        sns.heatmap(self.transition_matrix,
                   xticklabels=self.state_names,
                   yticklabels=self.state_names,
                   annot=True, fmt='.3f', cmap='Blues', ax=ax2)
        ax2.set_title('State Transition Matrix')

        # 3. Emission matrix heatmap
        event_names = [self.idx_to_event[i] for i in range(self.n_events)]
        sns.heatmap(self.emission_matrix,
                   xticklabels=event_names,
                   yticklabels=self.state_names,
                   annot=True, fmt='.2f', cmap='Greens', ax=ax3)
        ax3.set_title('Event Emission Probabilities')
        ax3.set_xlabel('Events')

        # 4. Event distribution
        event_counts = np.sum(self.emission_matrix, axis=0)
        ax4.bar(event_names, event_counts)
        ax4.set_title('Overall Event Distribution')
        ax4.set_xlabel('Events')
        ax4.set_ylabel('Total Probability Mass')
        ax4.tick_params(axis='x', rotation=45)

        plt.tight_layout()
        plt.show()


# Example usage and testing
def create_sample_data():
    """
    Create sample app event data for testing.
    """
    np.random.seed(42)

    events = ['app_open', 'browse_catalog', 'view_product', 'add_to_cart',
              'checkout_start', 'payment', 'purchase_complete', 'app_close']

    countries = ['US', 'UK', 'DE', 'FR', 'CA']

    data = []

    # Generate realistic user sessions
    for userid in range(100):
        # Each user has 1-5 sessions
        n_sessions = np.random.randint(1, 6)

        for session in range(n_sessions):
            country = np.random.choice(countries)
            base_time = session * 3600 + userid * 86400  # Different days/sessions

            # Generate event sequence based on user behavior pattern
            if userid < 30:  # Browsers - mostly browse, rarely purchase
                session_events = ['app_open', 'browse_catalog'] + \
                               np.random.choice(['browse_catalog', 'view_product'],
                                              size=np.random.randint(2, 8)).tolist() + \
                               ['app_close']
            elif userid < 70:  # Shoppers - browse and sometimes buy
                session_events = ['app_open', 'browse_catalog', 'view_product'] + \
                               np.random.choice(['view_product', 'add_to_cart'],
                                              size=np.random.randint(1, 4)).tolist()
                if np.random.random() < 0.3:  # 30% conversion
                    session_events += ['checkout_start', 'payment', 'purchase_complete']
                session_events += ['app_close']
            else:  # Buyers - focused, high conversion
                session_events = ['app_open', 'browse_catalog', 'view_product',
                                'add_to_cart', 'checkout_start']
                if np.random.random() < 0.8:  # 80% conversion
                    session_events += ['payment', 'purchase_complete']
                session_events += ['app_close']

            # Add events with timestamps
            for i, event in enumerate(session_events):
                data.append({
                    'userid': f'user_{userid:03d}',
                    'country': country,
                    'timestamp': base_time + i * 60,  # 1 minute between events
                    'event': event
                })

    return pl.DataFrame(data)



In [4]:
# Demo usage
if __name__ == "__main__":
    # Create sample data
    print("Creating sample data...")
    df = create_sample_data()
    print(df)

    print(f"Sample data shape: {df.shape}")
    print(f"Unique users: {df.select('userid').n_unique()}")
    print(f"Unique events: {df.select('event').unique().to_series().to_list()}")
    print(f"Date range: {df.select('timestamp').min().item()} - {df.select('timestamp').max().item()}")

    # Initialize and train HMM
    print("\nInitializing HMM with 3 states...")
    hmm = EventHMM(n_states=3, state_names=['Browser', 'Shopper', 'Buyer'])

    # Train the model
    hmm.fit(df, max_iterations=50)

    # Test predictions
    print("\nTesting predictions...")
    test_sequence = ['app_open', 'browse_catalog', 'view_product', 'add_to_cart',
                    'checkout_start', 'payment', 'purchase_complete', 'app_close']

    predicted_states = hmm.predict_sequence(test_sequence)

    print("\nExample prediction:")
    for event, state in zip(test_sequence, predicted_states):
        print(f"{event:>18} -> {state}")

    # Analyze specific user
    sample_user = list(hmm.user_sequences.keys())[0]
    user_pattern = hmm.get_user_behavior_pattern(sample_user)
    if user_pattern:
        print(f"\nUser {sample_user} behavior pattern:")
        user_events = [hmm.idx_to_event[idx] for idx in hmm.user_sequences[sample_user]]
        for event, state in zip(user_events, user_pattern):
            print(f"{event:>18} -> {state}")

Creating sample data...
shape: (2_107, 4)
┌──────────┬─────────┬───────────┬───────────────────┐
│ userid   ┆ country ┆ timestamp ┆ event             │
│ ---      ┆ ---     ┆ ---       ┆ ---               │
│ str      ┆ str     ┆ i64       ┆ str               │
╞══════════╪═════════╪═══════════╪═══════════════════╡
│ user_000 ┆ CA      ┆ 0         ┆ app_open          │
│ user_000 ┆ CA      ┆ 60        ┆ browse_catalog    │
│ user_000 ┆ CA      ┆ 120       ┆ view_product      │
│ user_000 ┆ CA      ┆ 180       ┆ browse_catalog    │
│ user_000 ┆ CA      ┆ 240       ┆ browse_catalog    │
│ …        ┆ …       ┆ …         ┆ …                 │
│ user_099 ┆ DE      ┆ 8553780   ┆ add_to_cart       │
│ user_099 ┆ DE      ┆ 8553840   ┆ checkout_start    │
│ user_099 ┆ DE      ┆ 8553900   ┆ payment           │
│ user_099 ┆ DE      ┆ 8553960   ┆ purchase_complete │
│ user_099 ┆ DE      ┆ 8554020   ┆ app_close         │
└──────────┴─────────┴───────────┴───────────────────┘
Sample data shape: (210

In [33]:
df = pl.read_parquet("data/df_trial_filtered.parquet")

In [34]:
df

event,distinct_id,os_version,country_code,date,time_since_first_event,days,hours,minutes,seconds,groups,start_flag,session_id,end_event,transition
str,str,str,str,datetime[μs],duration[μs],i32,i32,i32,i32,str,i8,i64,str,str
"""RecordViewController.startSpee…","""$RCAnonymousID:user_0092548370…","""18.3""","""GB""",2025-02-11 02:14:53,3h 41m 19s,0,3,221,13279,"""RecordViewController.startSpee…",0,3,"""app_open""","""trial_started_event -> trial_c…"
"""VideoPreviewScreen""","""$RCAnonymousID:user_0092548370…","""18.3""","""GB""",2025-02-11 02:15:28,3h 41m 54s,0,3,221,13314,"""VideoPreview""",0,3,"""RecordScreen.didFinishRecordin…","""trial_started_event -> trial_c…"
"""RecordViewController.startSpee…","""$RCAnonymousID:user_0092548370…","""18.3""","""GB""",2025-02-11 02:18:08,3h 44m 34s,0,3,224,13474,"""RecordViewController.startSpee…",0,3,"""RecordViewController.stopSpeec…","""trial_started_event -> trial_c…"
"""VideoPreviewScreen""","""$RCAnonymousID:user_0092548370…","""18.3""","""GB""",2025-02-11 02:20:32,3h 46m 58s,0,3,226,13618,"""VideoPreview""",0,3,"""app_close""","""trial_started_event -> trial_c…"
"""VideoPreviewScreen.SaveSuccess""","""$RCAnonymousID:user_0092548370…","""18.3""","""GB""",2025-02-11 02:21:44,3h 48m 10s,0,3,228,13690,"""VideoPreviewScreen.SaveSuccess""",0,3,"""VideoPreviewScreen.exportVideo""","""trial_started_event -> trial_c…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""VideoPreviewScreen.mirrorVideo…","""user_99947267453523""","""17.5.1""","""ID""",2025-01-23 19:17:40,19m 33s,0,0,19,1173,"""VideoPreview""",0,38163,"""VideoPreviewScreen.optionSelec…","""trial_started_event -> trial_c…"
"""VideoPreviewScreen.optionSelec…","""user_99947267453523""","""17.5.1""","""ID""",2025-01-23 19:17:48,19m 41s,0,0,19,1181,"""VideoPreview""",0,38163,"""VideoPreviewScreen.mirrorVideo…","""trial_started_event -> trial_c…"
"""VideoPreviewScreen.mirrorVideo…","""user_99947267453523""","""17.5.1""","""ID""",2025-01-23 19:17:48,19m 41s,0,0,19,1181,"""VideoPreview""",0,38163,"""$ae_session""","""trial_started_event -> trial_c…"
"""VideoPreviewScreen.exportVideo""","""user_99947267453523""","""17.5.1""","""ID""",2025-01-23 19:18:52,20m 45s,0,0,20,1245,"""VideoPreview""",0,38163,"""VideoPreviewScreen.SaveSuccess""","""trial_started_event -> trial_c…"


In [35]:
df2 = df.select([pl.col("groups"), pl.col("distinct_id"), pl.col("country_code"), pl.col("seconds")])

In [36]:
df2 = df2.rename({"distinct_id": "userid"})
df2 = df2.rename({"country_code": "country"})
df2 = df2.rename({"seconds": "timestamp"})
df2 = df2.rename({"groups": "event"})
df2 = df2.select(["userid", "event", "country", "timestamp"])

In [37]:
minor_event_to_drop = ["user", "session", "Show", "$identify", "session",
                      "Second", "identity_alias", "user",
                       "Thank", "$create_alias", "hideReviewTap", "Homescreen", "proUserSignIn",
                       "$ae", "device_attributes", "config_attributes", "Tele", "branch", "Rating",
                       "update", "app", "Regist", "Login", "expiration_event", "NoRatingBecauseOfNoSceneError",
                       "SettingsScreen", "Whats", "reviewTap", "purchase", "checkSubscriptionStatus.Error",
                       "fetchRedeemedPromoCodea.Error", "bluetooth", "open", "paywall", "Welcome",
                       "chooseFacebook", "chooseYoutube", "subscription", "Word", "OpenScript", "SortingTap",
                       "script", "Editorscreen", "create", "Record", "deleteRecording", "Folder",
                       "TeleprompterDidLoaded", "remove", "PlayerScreen", "product_change_event",
                       "start", "stop", "add", "billing_issue_event", "Font", "Selected", "TrialScreenCloseTap",
                       "caption", "subtitle", "userDidTakeScreenshotNotification", "deeplinkRemoteConnectUsed",
                       "TrialScreenPurchaseSuccess", "documentPicker.Error", "did_receive_asa_attribution", "setup",
                       "connect", "Select", "assigned", "cancellation_event", "handle", "getPreviousTranscriptionJob.Error",
                       "Settings", "Manage", "SelectPlatform", "ShowRegistrationFromSettings", "TrialScreenCloseConfirmation",
                       "WCSession*ERROR", "trial_started_event", "errorCatched", "didRegister", "AudioCleaningError"
                      ]
df2 = df2.filter(~pl.col("event").is_in(minor_event_to_drop))

In [38]:
df2

userid,event,country,timestamp
str,str,str,i32
"""$RCAnonymousID:user_0092548370…","""RecordViewController.startSpee…","""GB""",13279
"""$RCAnonymousID:user_0092548370…","""VideoPreview""","""GB""",13314
"""$RCAnonymousID:user_0092548370…","""RecordViewController.startSpee…","""GB""",13474
"""$RCAnonymousID:user_0092548370…","""VideoPreview""","""GB""",13618
"""$RCAnonymousID:user_0092548370…","""VideoPreviewScreen.SaveSuccess""","""GB""",13690
…,…,…,…
"""user_99947267453523""","""VideoPreview""","""ID""",1173
"""user_99947267453523""","""VideoPreview""","""ID""",1181
"""user_99947267453523""","""VideoPreview""","""ID""",1181
"""user_99947267453523""","""VideoPreview""","""ID""",1245


In [40]:
# Demo usage
if __name__ == "__main__":
    # Create sample data
   
    print(df2)

    print(f"Sample data shape: {df2.shape}")
    print(f"Unique users: {df2.select('userid').n_unique()}")
    #print(f"Unique events: {df2.select('event').unique().to_series().to_list()}")
    print(f"Date range: {df2.select('timestamp').min().item()} - {df2.select('timestamp').max().item()}")

    # Initialize and train HMM
    print("\nInitializing HMM with 3 states...")
    hmm = EventHMM(n_states=5, state_names=['state1', 'state2', 'state3', 'state4', 'state5'])

    # Train the model
    hmm.fit(df2, max_iterations=50)

    # Analyze specific user
    sample_user = list(hmm.user_sequences.keys())[0]
    user_pattern = hmm.get_user_behavior_pattern(sample_user)
    if user_pattern:
        print(f"\nUser {sample_user} behavior pattern:")
        user_events = [hmm.idx_to_event[idx] for idx in hmm.user_sequences[sample_user]]
        for event, state in zip(user_events, user_pattern):
            print(f"{event:>18} -> {state}")

shape: (328_806, 4)
┌─────────────────────────────────┬─────────────────────────────────┬─────────┬───────────┐
│ userid                          ┆ event                           ┆ country ┆ timestamp │
│ ---                             ┆ ---                             ┆ ---     ┆ ---       │
│ str                             ┆ str                             ┆ str     ┆ i32       │
╞═════════════════════════════════╪═════════════════════════════════╪═════════╪═══════════╡
│ $RCAnonymousID:user_0092548370… ┆ RecordViewController.startSpee… ┆ GB      ┆ 13279     │
│ $RCAnonymousID:user_0092548370… ┆ VideoPreview                    ┆ GB      ┆ 13314     │
│ $RCAnonymousID:user_0092548370… ┆ RecordViewController.startSpee… ┆ GB      ┆ 13474     │
│ $RCAnonymousID:user_0092548370… ┆ VideoPreview                    ┆ GB      ┆ 13618     │
│ $RCAnonymousID:user_0092548370… ┆ VideoPreviewScreen.SaveSuccess  ┆ GB      ┆ 13690     │
│ …                               ┆ …                       

4: Log-likelihood = -195423.8377  
5: Log-likelihood = -195250.0179

In [41]:
hmm._print_model_summary()


HMM MODEL SUMMARY

Number of states: 5
Number of events: 69

Initial State Probabilities:
  state1: 0.037
  state2: 0.095
  state3: 0.015
  state4: 0.538
  state5: 0.314

Transition Matrix:
From \ To        state1    state2    state3    state4    state5
    state1        0.001     0.948     0.000     0.003     0.048
    state2        0.197     0.517     0.037     0.234     0.015
    state3        0.040     0.016     0.885     0.049     0.011
    state4        0.396     0.170     0.304     0.114     0.014
    state5        0.000     0.162     0.001     0.003     0.834

Emission Probabilities (Top 3 events per state):

state1:
  VideoPreviewScreen.SaveSuccess: 0.913
  Trim: 0.039
  blur: 0.027

state2:
  VideoPreview: 0.965
  Trim: 0.017
  Intercom: 0.005

state3:
  VideoPreview: 0.848
  VideoPreviewScreen.SaveSuccess: 0.077
  blur: 0.033

state4:
  VideoPreview: 0.968
  RecordViewController.startSpeechRecognizing: 0.012
  purchase_abandoned: 0.011

state5:
  capture: 0.292
  Visual: 0.