Given two strings needle and haystack, return the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.

 

Example 1:

Input: haystack = "sadbutsad", needle = "sad"
Output: 0
Explanation: "sad" occurs at index 0 and 6.
The first occurrence is at index 0, so we return 0.
Example 2:

Input: haystack = "leetcode", needle = "leeto"
Output: -1
Explanation: "leeto" did not occur in "leetcode", so we return -1.
 

Constraints:

1 <= haystack.length, needle.length <= 104
haystack and needle consist of only lowercase English characters.

In [None]:
# brute force for each index, check the word is present or not.
class Solution:
    def strStr(self, haystack: str, needle: str) -> int:
        n, m = len(haystack), len(needle)
        for i in range(n - m + 1):
            if haystack[i:i + m] == needle:
                return i
        return -1
    
# tc - O(m * n)
# sc - O(1)


In [None]:
class Solution:
    def strStr(self, haystack: str, needle: str) -> int:
        l = r = 0
        m = len(haystack)
        n = len(needle)

        while l < m and r < n:
            if haystack[l] == needle[r]:
                l += 1
                r += 1
            else:
                # start searching the needle from the first.
                if r != 0:
                    r = 0 # so if it fails we are starting over, this is worng, will fail here:
                    # "mississippi"
                    # needle = "issip"
                    # "mississippi".  "issip"
                    #       l              r
                    # the 'i' which is part of the ask is already passed and never looked back.
                    # need a tracker kind of to go back and see.
                    # Output
                    # -1
                    # Expected
                    # 4
                    l += 1
                else:
                    l += 1
        if r == n: # when the whole string got mathched, means we found it,
            return l - r # so when we cross the while word, l will be also be crossed the word.
            # sad[b]utsad   and sad
            #     l                r
            # l - r = 4- 3 = 0 the index.
        
        return -1
    



In [11]:
Solution().strStr( haystack = "sadbutsad", needle = "sad")

0

In [12]:
Solution().strStr(haystack = "leetcode", needle = "leeto")

-1

## # KMP algorithm == Knuth–Morris–Pratt

In [None]:
class Solution:
    def strStr(self, haystack: str, needle: str) -> int:
        if not needle:
            return 0  # Edge case: empty needle
        
        # Step 1: Build LPS array
        def build_lps(pattern):
            lps = [0] * len(pattern)
            length = 0  # length of previous longest prefix suffix
            i = 1
            while i < len(pattern):
                if pattern[i] == pattern[length]:
                    length += 1
                    lps[i] = length
                    i += 1
                else:
                    if length != 0:
                        length = lps[length - 1]
                    else:
                        lps[i] = 0
                        i += 1
            return lps

        lps = build_lps(needle)

        # Step 2: KMP search using lps
        i = j = 0  # i = haystack, j = needle
        while i < len(haystack):
            if haystack[i] == needle[j]:
                i += 1
                j += 1
                if j == len(needle):
                    return i - j  # match found
            else:
                if j != 0:
                    j = lps[j - 1]  # use lps to shift needle
                else:
                    i += 1  # move haystack only

        return -1  # no match


Let’s say our needle = "abcaby"

We want to build the LPS array, where each lps[i] tells us the length of the longest proper prefix which is also a suffix in the substring needle[0...i].

🔧 Initialize:
lps = [0] * len(needle) → [0, 0, 0, 0, 0, 0]

length = 0 → keeps track of the current matching prefix-suffix length

i = 1 → we always start from index 1 (since lps[0] is always 0)

| i | needle\[i] | length | needle\[i] == needle\[length]? | Action                                         | lps                 |
| - | ---------- | ------ | ------------------------------ | ---------------------------------------------- | ------------------- |
| 1 | `b`        | 0      | `b` ≠ `a`                      | no match → lps\[1] = 0, i += 1                 | \[0, 0, 0, 0, 0, 0] |
| 2 | `c`        | 0      | `c` ≠ `a`                      | no match → lps\[2] = 0, i += 1                 | \[0, 0, 0, 0, 0, 0] |
| 3 | `a`        | 0      | `a` == `a`                     | match → length += 1 → 1, lps\[3] = 1           | \[0, 0, 0, 1, 0, 0] |
| 4 | `b`        | 1      | `b` == `b`                     | match → length += 1 → 2, lps\[4] = 2           | \[0, 0, 0, 1, 2, 0] |
| 5 | `y`        | 2      | `y` ≠ `c`                      | mismatch → update length = lps\[1] = 0 → retry | \[0, 0, 0, 1, 2, 0] |
Final lps = [0, 0, 0, 1, 2, 0]

💡 What this means:
lps[3] = 1 → the substring abca has "a" as both prefix and suffix

lps[4] = 2 → the substring abcab has "ab" as both prefix and suffix

lps[5] = 0 → abcaby has no proper prefix-suffix match


Step 2: Run KMP search
We'll track two pointers:

i: index in haystack

j: index in needle

We’ll match characters and use lps to skip redundant checks on mismatches.

| Step | i  | j | haystack\[i] | needle\[j] | Match? | Action                                       | Result             |
| ---- | -- | - | ------------ | ---------- | ------ | -------------------------------------------- | ------------------ |
| 1    | 0  | 0 | `a`          | `a`        | ✅ Yes  | i++, j++                                     | i=1, j=1           |
| 2    | 1  | 1 | `b`          | `b`        | ✅ Yes  | i++, j++                                     | i=2, j=2           |
| 3    | 2  | 2 | `x`          | `c`        | ❌ No   | j != 0 → j = lps\[1] = 0                     | i=2, j=0           |
| 4    | 2  | 0 | `x`          | `a`        | ❌ No   | j == 0 → i++                                 | i=3, j=0           |
| 5    | 3  | 0 | `a`          | `a`        | ✅ Yes  | i++, j++                                     | i=4, j=1           |
| 6    | 4  | 1 | `b`          | `b`        | ✅ Yes  | i++, j++                                     | i=5, j=2           |
| 7    | 5  | 2 | `c`          | `c`        | ✅ Yes  | i++, j++                                     | i=6, j=3           |
| 8    | 6  | 3 | `a`          | `a`        | ✅ Yes  | i++, j++                                     | i=7, j=4           |
| 9    | 7  | 4 | `b`          | `b`        | ✅ Yes  | i++, j++                                     | i=8, j=5           |
| 10   | 8  | 5 | `c`          | `y`        | ❌ No   | j != 0 → j = lps\[4] = 2                     | i=8, j=2           |
| 11   | 8  | 2 | `c`          | `c`        | ✅ Yes  | i++, j++                                     | i=9, j=3           |
| 12   | 9  | 3 | `a`          | `a`        | ✅ Yes  | i++, j++                                     | i=10, j=4          |
| 13   | 10 | 4 | `b`          | `b`        | ✅ Yes  | i++, j++                                     | i=11, j=5          |
| 14   | 11 | 5 | `y`          | `y`        | ✅ Yes  | j == len(needle) → return i - j = 11 - 6 = 5 | ✅ Match at index 5 |
Output: 5
Because "abcaby" starts at index 5 in "abxabcabcaby".

# Z Algorithm

In [None]:
def calculate_z(s: str) -> list[int]:
    n = len(s)
    z = [0] * n
    l, r = 0, 0

    for i in range(1, n):
        if i > r:
            # Case A: outside window
            l, r = i, i
            while r < n and s[r - l] == s[r]:
                r += 1
            z[i] = r - l
            r -= 1
        else:
            # Case B or C: inside window
            k = i - l
            if z[k] < r - i + 1:
                z[i] = z[k]  # Case B
            else:
                # Case C: try to extend
                l = i
                while r < n and s[r - l] == s[r]:
                    r += 1
                z[i] = r - l
                r -= 1
    return z

def search_pattern(pattern: str, text: str) -> list[int]:
    concat = pattern + "$" + text
    z = calculate_z(concat)

    positions = []
    pattern_len = len(pattern)

    for i in range(len(z)):
        if z[i] == pattern_len:
            # Subtract the offset due to pattern + '$'
            positions.append(i - pattern_len - 1)

    return positions

# this will give you all the accurances of the string.




In [None]:
class Solution:
    def strStr(self, haystack: str, needle: str) -> int:
        concat = needle + "$" + haystack
        z = self.calculate_z(concat)

        positions = []
        pattern_len = len(needle)

        for i in range(len(z)):
            if z[i] == pattern_len:
                # Subtract the offset due to pattern + '$'
                positions.append(i - pattern_len - 1)

        return positions[0] if positions else -1

    def calculate_z(self,s: str) -> list[int]:
        n = len(s)
        z = [0] * n
        l, r = 0, 0

        for i in range(1, n):
            if i > r:
                # Case A: outside window
                l, r = i, i
                while r < n and s[r - l] == s[r]:
                    r += 1
                z[i] = r - l
                r -= 1
            else:
                # Case B or C: inside window
                k = i - l
                if z[k] < r - i + 1:
                    z[i] = z[k]  # Case B
                else:
                    # Case C: try to extend
                    l = i
                    while r < n and s[r - l] == s[r]:
                        r += 1
                    z[i] = r - l
                    r -= 1
        return z


# tc - O(n + m) 
# sc - O(n + m) 