In [2]:
import pandas as pd
import random
import numpy as np

def build_transition_matrix(states, data):
    """
    Builds a transition matrix from a sequence of data.
    
    Args:
        states (list): A list of all possible states.
        data (list): The sequence of states observed.
        
    Returns:
        pd.DataFrame: A DataFrame representing the transition matrix.
    """
    # Create a DataFrame to hold the transition counts
    df_transitions = pd.DataFrame(0, index=states, columns=states)
    
    # Iterate through the data to count transitions
    for i in range(len(data) - 1):
        from_state = data[i]
        to_state = data[i+1]
        df_transitions.loc[from_state, to_state] += 1
    
    # Normalize the counts to get probabilities (rows must sum to 1)
    df_probabilities = df_transitions.div(df_transitions.sum(axis=1), axis=0)
    
    # Handle cases where a state has no transitions out
    df_probabilities = df_probabilities.fillna(0)
    
    return df_probabilities

def simulate_flow(transition_matrix, start_state, num_steps):
    """
    Simulates a sequence of states based on the transition matrix.
    
    Args:
        transition_matrix (pd.DataFrame): The probability transition matrix.
        start_state (str): The initial state for the simulation.
        num_steps (int): The number of steps to simulate.
        
    Returns:
        list: A list of the simulated states.
    """
    path = [start_state]
    current_state = start_state
    
    for _ in range(num_steps - 1):
        # Get the transition probabilities for the current state
        state_probabilities = transition_matrix.loc[current_state]
        
        # Get a list of the possible next states and their corresponding probabilities
        next_states = state_probabilities.index.tolist()
        probabilities = state_probabilities.values.tolist()
        
        # Use random.choices to pick the next state based on the probabilities
        # k=1 ensures we get a single choice
        next_state = random.choices(next_states, weights=probabilities, k=1)[0]
        
        path.append(next_state)
        current_state = next_state
        
    return path

if __name__ == '__main__':
    # 1. Define the states and the data sequence
    weather_states = ['sunny', 'cloudy', 'rainy']
    weather_history = ['sunny', 'sunny', 'cloudy', 'rainy', 'cloudy', 'sunny', 'rainy', 'rainy', 'cloudy', 'sunny']
    
    # 2. Build the transition matrix
    transition_matrix = build_transition_matrix(weather_states, weather_history)
    print("--- Transition Matrix (Probabilities) ---")
    print(transition_matrix)
    
    # 3. Simulate the weather for a period of time
    start_state = 'sunny'
    num_days = 10
    simulated_weather = simulate_flow(transition_matrix, start_state, num_days)
    
    print("\n--- Simulated Weather Flow ---")
    print(f"Starting from: '{start_state}'")
    print(f"Number of days: {num_days}")
    print(" -> ".join(simulated_weather))

--- Transition Matrix (Probabilities) ---
           sunny    cloudy     rainy
sunny   0.333333  0.333333  0.333333
cloudy  0.666667  0.000000  0.333333
rainy   0.000000  0.666667  0.333333

--- Simulated Weather Flow ---
Starting from: 'sunny'
Number of days: 10
sunny -> sunny -> sunny -> rainy -> cloudy -> sunny -> sunny -> rainy -> cloudy -> sunny
