In [None]:
def chunk_with_overlap(arr, chunk_length, overlap_length=0):
    """
    Splits an array into chunks with a specified overlap.

    Parameters:
        arr (list): The list to be chunked.
        chunk_length (int): The number of items in each chunk.
        overlap_length (int): The number of items to overlap between consecutive chunks.

    Returns:
        list: A list of chunks (each chunk is a list of items from arr).
    """
    if chunk_length <= 0:
        raise ValueError("chunk_length must be a positive integer.")
    if overlap_length < 0 or overlap_length >= chunk_length:
        raise ValueError("overlap_length must be non-negative and less than chunk_length.")

    chunks = []
    i = 0
    while i < len(arr):
        chunk = arr[i:i + chunk_length]
        chunks.append(chunk)
        # Move the index forward by (chunk_length - overlap_length)
        i += chunk_length - overlap_length

    return chunks

# Example usage:
# Assume 'text' is your array containing transcript items (e.g., "start-text")
# For instance, let's create a sample transcript array:
text = [
    "0.0-Hello, welcome to the video.",
    "5.0-Today we'll discuss AI summarization.",
    "10.0-First, we transcribe the video.",
    "15.0-Then, we process the transcript.",
    "20.0-Next, we chunk the text.",
    "25.0-After that, we summarize.",
    "30.0-Finally, we review the output.",
    "35.0-Thank you for watching.",
    "40.0-See you next time.",
    "45.0-Goodbye."
]

# Define the desired chunk length and overlap length:
chunk_length = 3  # Number of transcript lines per chunk
overlap_length = 2  # Overlap 1 item between consecutive chunks

chunks = chunk_with_overlap(text, chunk_length, overlap_length)

# Print out the resulting chunks
for idx, chunk in enumerate(chunks):
    print(f"Chunk {idx + 1}:")
    print("\n".join(chunk))
    print("-" * 40)


Chunk 1:
0.0-Hello, welcome to the video.
5.0-Today we'll discuss AI summarization.
10.0-First, we transcribe the video.
----------------------------------------
Chunk 2:
5.0-Today we'll discuss AI summarization.
10.0-First, we transcribe the video.
15.0-Then, we process the transcript.
----------------------------------------
Chunk 3:
10.0-First, we transcribe the video.
15.0-Then, we process the transcript.
20.0-Next, we chunk the text.
----------------------------------------
Chunk 4:
15.0-Then, we process the transcript.
20.0-Next, we chunk the text.
25.0-After that, we summarize.
----------------------------------------
Chunk 5:
20.0-Next, we chunk the text.
25.0-After that, we summarize.
30.0-Finally, we review the output.
----------------------------------------
Chunk 6:
25.0-After that, we summarize.
30.0-Finally, we review the output.
35.0-Thank you for watching.
----------------------------------------
Chunk 7:
30.0-Finally, we review the output.
35.0-Thank you for watching.
