In [1]:
############### HASH FUNCTION #################

In [None]:
'''
✅ Problem Recap
Given an array of integers, check if there exists a subarray whose sum is exactly 0.

🧠 Key Idea (Why Hash?)
Keep a running prefix sum while traversing.
If the same prefix sum appears again, it means the elements in between sum to 0.
So we use a set (hash set) to store prefix sums we've seen before.
The set is backed by a hash table, so if prefix_sum in seen: is a hash lookup (O(1)).


✅ Why It Works
The prefix_sum is the total sum up to current index. prefix_sum[i] = arr[0] + arr[1] + ... + arr[i]
Helps to Avoid recomputation when summing subarrays.
If we see the same prefix_sum again, that means the subarray in between sums to 0.
Why? Because:
prefix_sum[i] == prefix_sum[j] → sum from j+1 to i is zero.

✅ Where is the hash?
seen = set() is a hash set
When we do prefix_sum in seen, it's a hashed lookup
When we seen.add(prefix_sum), Python computes hash(prefix_sum) to store it efficiently
'''


In [None]:
'''

2. Count Subarrays with Given Sum (k
3. Longest Subarray with Sum = k
4. Subarray with Equal Number of 0s and 1s  ; Trick: Convert 0s to -1, then use sum = 0 logic.
5. Subarray with All Unique Elements ; Use a sliding window and a hash set.
6. Subarray with Equal Count of Two Elements (e.g., 1s and 2s);  Trick: Replace 2 with -1, then use sum = 0.
7. Check for Repeating Subarray of Length k
                                   
| Problem                        | Hash Structure Used | Time  |
| ------------------------------ | ------------------- | ----- |
| Subarray sum = 0               | `set()`             | O(n)  |
| Count subarrays with sum = k   | `dict()`            | O(n)  |
| Longest subarray sum = k       | `dict()`            | O(n)  |
| Equal 0s and 1s                | `dict()`            | O(n)  |
| All unique subarray            | `set()` (sliding)   | O(n)  |
| Repeating subarray of length k | `set()`             | O(nk) |

| Problem                        | Prefix Sum | Hash Map | Sliding Window | Hash Set | Rolling Hash |
| ------------------------------ | ---------- | -------- | -------------- | -------- | ------------ |
| Subarray Sum = 0               | ✅          | ✅        | ❌              | ❌        | ❌            |
| Count Subarrays with Sum = k   | ✅          | ✅        | ❌              | ❌        | ❌            |
| Longest Subarray with Sum = k  | ✅          | ✅        | ❌              | ❌        | ❌            |
| Equal 0s and 1s                | ✅ (0→-1)   | ✅        | ❌              | ❌        | ❌            |
| All Unique Subarray            | ❌          | ❌        | ✅              | ✅        | ❌            |
| Repeating Subarray of Length k | ❌          | ✅        | ✅              | ✅        | ✅ (optional) |



| Feature    | **Hash Map** (`dict`)                     | **Hash Set** (`set`)                    |
| ---------- | ----------------------------------------- | --------------------------------------- |
| Stores     | **Key → Value** pairs                     | Only **unique keys / elements**         |
| Use case   | Fast lookup + value association           | Fast membership testing / deduplication |
| Example    | `{"a": 1, "b": 2}`                        | `{"a", "b", "c"}`                       |
| Syntax     | `my_map["a"] = 1`                         | `my_set.add("a")`                       |
| Duplicates | ✅ Keys must be unique (values can repeat) | ❌ No duplicates allowed                 |
| Lookup     | `O(1)` average time                       | `O(1)` average time                     |



When to use:
| Scenario                                     | Use                                    |
| -------------------------------------------- | -------------------------------------- |
| Track frequencies / counts                   | `dict` (e.g. `word_count["the"] += 1`) |
| Track existence / membership only            | `set` (e.g. `if word in seen:`)        |
| Find duplicates or uniqueness                | `set`                                  |
| Map one value to another (e.g. word → index) | `dict`                                 |

'''
# https://asrathore08.medium.com/15-questions-on-array-to-practice-before-coding-interview-f64b683abfa3

In [None]:
def is_anagram(s1, s2):
    if len(s1) != len(s2):
        return False

    char_count = {}

    '''
    # Count characters in s1
    for char in s1:          
        if char in char_count:
            char_count[char] = char_count[char] + 1
        else:
            char_count[char] = 1
'''
            
    for char in s1:
        #char_count[char] = char_count.get(char, 0) + 1        
        if char in char_count:
            char_count[char] = char_count[char] + 1
        else:
            char_count[char] = 1
        
    # Subtract character counts using s2
    for char in s2:
        if char not in char_count:
            return False
        char_count[char] -= 1
        if char_count[char] < 0:
            return False

    return True

print(is_anagram("looten", "oolent"))  # True
print(is_anagram("race", "care"))      # True
print(is_anagram("hello", "helloo"))   # False

In [3]:
from collections import defaultdict

def count_subarrays_with_sum(arr, k):
    prefix_sum = 0
    count = 0
    sum_freq = defaultdict(int)
    sum_freq[0] = 1

    print("Initial state:")
    print(f"prefix_sum = {prefix_sum}, count = {count}")
    print(f"sum_freq = {dict(sum_freq)}\n")

    for idx, num in enumerate(arr):
        prefix_sum += num
        diff = prefix_sum - k
        matched_subarrays = sum_freq[diff]
        count += matched_subarrays

        print(f"Step {idx + 1}:")
        print(f"  Current num         = {num}")
        print(f"  Updated prefix_sum  = {prefix_sum}")
        print(f"  prefix_sum - k      = {prefix_sum} - {k} = {diff}")
        print(f"  Subarrays found     = {matched_subarrays}")
        print(f"  Updated count       = {count}")

        sum_freq[prefix_sum] += 1
        print(f"  Updated sum_freq    = {dict(sum_freq)}\n")

    print(f"Final count of subarrays with sum {k} = {count}")
    return count

# Example usage:
arr = [4, 2, -3, 1, 6]
count_subarrays_with_sum(arr, 7)


'''
Goal: Return total count of subarrays summing to k
Structure: Uses dict to store prefix sum frequencies
Uses prefix_sum - k to check if such a subarray ending at current index exists
No early exit — must go through full array
'''

Initial state:
prefix_sum = 0, count = 0
sum_freq = {0: 1}

Step 1:
  Current num         = 4
  Updated prefix_sum  = 4
  prefix_sum - k      = 4 - 7 = -3
  Subarrays found     = 0
  Updated count       = 0
  Updated sum_freq    = {0: 1, -3: 0, 4: 1}

Step 2:
  Current num         = 2
  Updated prefix_sum  = 6
  prefix_sum - k      = 6 - 7 = -1
  Subarrays found     = 0
  Updated count       = 0
  Updated sum_freq    = {0: 1, -3: 0, 4: 1, -1: 0, 6: 1}

Step 3:
  Current num         = -3
  Updated prefix_sum  = 3
  prefix_sum - k      = 3 - 7 = -4
  Subarrays found     = 0
  Updated count       = 0
  Updated sum_freq    = {0: 1, -3: 0, 4: 1, -1: 0, 6: 1, -4: 0, 3: 1}

Step 4:
  Current num         = 1
  Updated prefix_sum  = 4
  prefix_sum - k      = 4 - 7 = -3
  Subarrays found     = 0
  Updated count       = 0
  Updated sum_freq    = {0: 1, -3: 0, 4: 2, -1: 0, 6: 1, -4: 0, 3: 1}

Step 5:
  Current num         = 6
  Updated prefix_sum  = 10
  prefix_sum - k      = 10 - 7 = 3
  Subarray

'\nGoal: Return total count of subarrays summing to k\nStructure: Uses dict to store prefix sum frequencies\nUses prefix_sum - k to check if such a subarray ending at current index exists\nNo early exit — must go through full array\n'

In [4]:
'''

✅ Problem:
Given an array of integers and a target sum k, print all subarrays that sum to k.

Key Idea:
Use a dictionary prefix_sums to map each prefix_sum to all the indices where it occurred
If at index i, we find that prefix_sum - k exists, 
then all previous indices j where prefix_sum[j] == prefix_sum[i] - k are starts of valid subarrays

'''

from collections import defaultdict

def print_subarrays_with_sum_k(arr, k):
    prefix_sum = 0
    prefix_map = defaultdict(list)
    prefix_map[0].append(-1)  # to handle subarrays starting at index 0

    print(f"Array: {arr}")
    print(f"Target Sum (k): {k}\n")
    print(f"Initial prefix_map: {dict(prefix_map)}\n")

    for i, num in enumerate(arr):
        prefix_sum += num
        print(f"--- Step {i + 1} ---")
        print(f"Current Index: {i}")
        print(f"Current Number: {num}")
        print(f"Updated Prefix Sum: {prefix_sum}")

        needed_sum = prefix_sum - k
        print(f"Looking for (prefix_sum - k) = {prefix_sum} - {k} = {needed_sum}")

        if needed_sum in prefix_map:
            print(f"✅ Found {len(prefix_map[needed_sum])} match(es) in prefix_map:")
            for start_index in prefix_map[needed_sum]:
                subarray = arr[start_index + 1 : i + 1]
                print(f"    Subarray from index {start_index + 1} to {i}: {subarray}")
        else:
            print("No matching subarray ending here.")

        # Update the prefix map
        prefix_map[prefix_sum].append(i)
        print(f"Updated prefix_map: {dict(prefix_map)}\n")

# Example usage:
arr = [3, 4, 7, -2, 2, 1, 4, 2]
k = 7
print_subarrays_with_sum_k(arr, k)


Array: [3, 4, 7, -2, 2, 1, 4, 2]
Target Sum (k): 7

Initial prefix_map: {0: [-1]}

--- Step 1 ---
Current Index: 0
Current Number: 3
Updated Prefix Sum: 3
Looking for (prefix_sum - k) = 3 - 7 = -4
No matching subarray ending here.
Updated prefix_map: {0: [-1], 3: [0]}

--- Step 2 ---
Current Index: 1
Current Number: 4
Updated Prefix Sum: 7
Looking for (prefix_sum - k) = 7 - 7 = 0
✅ Found 1 match(es) in prefix_map:
    Subarray from index 0 to 1: [3, 4]
Updated prefix_map: {0: [-1], 3: [0], 7: [1]}

--- Step 3 ---
Current Index: 2
Current Number: 7
Updated Prefix Sum: 14
Looking for (prefix_sum - k) = 14 - 7 = 7
✅ Found 1 match(es) in prefix_map:
    Subarray from index 2 to 2: [7]
Updated prefix_map: {0: [-1], 3: [0], 7: [1], 14: [2]}

--- Step 4 ---
Current Index: 3
Current Number: -2
Updated Prefix Sum: 12
Looking for (prefix_sum - k) = 12 - 7 = 5
No matching subarray ending here.
Updated prefix_map: {0: [-1], 3: [0], 7: [1], 14: [2], 12: [3]}

--- Step 5 ---
Current Index: 4
Current

In [7]:
from collections import defaultdict
    
def print_subarrays_with_sum_k(arr, k):
    prefix_sum = 0
    prefix_map = defaultdict(list)
    prefix_map[0].append(-1)
    for i, num in enumerate(arr):
        prefix_sum += num
        needed_sum = prefix_sum - k
        
        if needed_sum in prefix_map:
            start_index = (prefix_map[needed_sum])
            start_index = start_index[0]
            subarray = arr[start_index + 1 : i + 1]
            print(f"    Subarray from index {start_index + 1} to {i}: {subarray}")
        
        prefix_map[prefix_sum].append(i)
        
arr = [3, 4, 7, -2, 2, 1, 4, 2]
k = 6
print_subarrays_with_sum_k(arr, k)

    Subarray from index 6 to 7: [4, 2]


In [None]:
from collections import defaultdict

def longest_subarray_with_sum_k(arr, k):
    prefix_sum = 0
    prefix_map = defaultdict(list)
    prefix_map[0].append(-1)  # Handle subarrays starting at index 0
    max_len = 0
    final_subarray = []

    print(f"Array: {arr}")
    print(f"Target Sum: {k}")
    print("-" * 50)

    for i, num in enumerate(arr):
        prefix_sum += num
        needed_sum = prefix_sum - k

        if needed_sum in prefix_map:
            start_index = prefix_map[needed_sum][0]
            sub_len = i - start_index
            subarray = arr[start_index + 1 : i + 1]

            print(f"✔️  Found subarray from index {start_index + 1} to {i}: {subarray} (Sum = {k})")

            if sub_len > max_len:
                max_len = sub_len
                final_subarray = subarray  # update the winner

        if not prefix_map[prefix_sum]:
            prefix_map[prefix_sum].append(i)

    print("=" * 50)
    if final_subarray:
        print(f"🏆 Longest subarray with sum = {k} is: {final_subarray} (Length = {max_len})")
    else:
        print("❌ No subarray found with the given sum.")

    return max_len

# Test
longest_subarray_with_sum_k([10, 5, 2, 7, 1, 9], 15)

In [None]:
# 4. Subarray with Equal Number of 0s and 1s
# Trick: Convert 0s to -1, then use sum = 0 logic.

from collections import defaultdict

def longest_subarray_equal_0s_1s(arr):
    # Step 1: Convert 0s to -1s for sum logic
    transformed_arr = [-1 if num == 0 else 1 for num in arr]
    
    prefix_sum = 0
    prefix_map = defaultdict(int)
    prefix_map[0] = -1  # So we can catch full-length subarrays starting at index 0
    max_len = 0
    start_index = -1

    for i, num in enumerate(transformed_arr):
        prefix_sum += num

        if prefix_sum in prefix_map:
            prev_index = prefix_map[prefix_sum]
            sub_len = i - prev_index
            if sub_len > max_len:
                max_len = sub_len
                start_index = prev_index + 1
        else:
            prefix_map[prefix_sum] = i

    if max_len > 0:
        longest_subarray = arr[start_index:start_index + max_len]
    else:
        longest_subarray = []

    return max_len, longest_subarray


In [None]:
# 4. Subarray with Equal Number of 0s and 1s
# Trick: Convert 0s to -1, then use sum = 0 logic.

def subarray_with_equal_0s_1s(arr):
    sum_index = {}
    max_len = 0
    curr_sum = 0

    for i in range(len(arr)):
        curr_sum += -1 if arr[i] == 0 else 1

        if curr_sum == 0:
            max_len = i + 1
        elif curr_sum in sum_index:
            max_len = max(max_len, i - sum_index[curr_sum])
        else:
            sum_index[curr_sum] = i

    return max_len

subarray_with_equal_0s_1s([0, 1, 0, 1, 0, 1, 1])


In [None]:
# 5. Subarray with All Unique Elements
# Use a sliding window and a hash set.

def longest_unique_subarray(arr):
    seen = set()
    left = 0
    max_len = 0

    for right in range(len(arr)):
        while arr[right] in seen:
            seen.remove(arr[left])
            left += 1
        seen.add(arr[right])
        max_len = max(max_len, right - left + 1)

    return max_len

longest_unique_subarray([1, 2, 3, 1, 2, 3, 4])

In [None]:
# Python Code (Sliding Window + Hash Set, Returns Subarray)

def longest_unique_subarray(arr):
    seen = set()
    left = 0
    max_len = 0
    start_idx = 0  # To remember where the max window starts

    for right in range(len(arr)):
        # If duplicate is found, shrink window from the left
        while arr[right] in seen:
            seen.remove(arr[left])
            left += 1

        seen.add(arr[right])

        # Update max window info
        if right - left + 1 > max_len:
            max_len = right - left + 1
            start_idx = left

    longest_subarray = arr[start_idx:start_idx + max_len]
    return max_len, longest_subarray

# Example usage
arr = [1, 2, 3, 1, 2, 3, 4, 5]
length, subarray = longest_unique_subarray(arr)
print(f"Length: {length}")
print(f"Longest Unique Subarray: {subarray}")


In [None]:
# 7. Check for Repeating Subarray of Length k

def has_duplicate_subarray_k(arr, k):
    seen = set()
    for i in range(len(arr) - k + 1):
        sub = tuple(arr[i:i+k])
        if sub in seen:
            return True
        seen.add(sub)
    return False

has_duplicate_subarray_k([1,2,3,1,2,3], 3)

In [None]:
'''
🔍 Problem:
Given an array (or list of words), find the minimum distance between any two repeated elements.

🧠 Example:
arr = ["All", "work", "and", "no", "play", "makes", "for", "no", "work", "no", "fun", "no", "play"]

Closest repeated pair: "no" → occurs at indices 3, 7, 9, and 11
Minimum distance: 9 - 7 = 2

'''

def find_nearest_repetition(arr):
    last_seen = {}
    min_distance = float('inf')
    closest_pair = None

    for i, word in enumerate(arr):
        if word in last_seen:
            distance = i - last_seen[word]
            if distance < min_distance:
                min_distance = distance
                closest_pair = (word, last_seen[word], i)
        last_seen[word] = i

    if closest_pair:
        word, i1, i2 = closest_pair
        print(f"Closest repeated word: '{word}' between positions {i1} and {i2} (distance = {min_distance})")
        return min_distance
    else:
        print("No repeated words found.")
        return -1


sentence = "All work and no play makes for no work no fun no play"
words = sentence.split()

find_nearest_repetition(words)


In [None]:
'''
📘 Problem:
Detect the first repeated line in a text file and report its line number.

Details:
You're given lines of a file (as a list of strings).
Find the first repeated line, based on its content, regardless of where it occurs.
Use enumerate() to track line numbers.
Use hash() to identify unique line contents efficiently.

✅ Why both?
enumerate() gives line numbers during iteration.
hash() is used to store and compare lines by their hash (for speed & set membership).

enumerate() and hash() serve distinct purposes:
enumerate() → adds an index to an iterable
hash() → returns a hash value for hashable objects (used in sets, dicts, etc.)
'''

def first_repeated_line(lines):
    seen_hashes = set()

    for line_number, line in enumerate(lines, start=1):
        line_hash = hash(line.strip())
        #print(line_hash)

        if line_hash in seen_hashes:
            print(f"First repeated line found at line {line_number}: {line.strip()}")
            return line_number
        else:
            seen_hashes.add(line_hash)

    print("No repeated lines found.")
    return -1

lines = [
    "This is the first line.",
    "This is the second line.",
    "Another line.",
    "This is the first line.",  # Repeated
    "Yet another line."
]

first_repeated_line(lines)


In [None]:
# ---------- Check if Zero-Sum Subarray Exists ----------
def has_zero_sum_subarray(arr):
    prefix_sum = 0
    seen = set()
    for num in arr:
        prefix_sum += num
        if prefix_sum == 0 or prefix_sum in seen:
            return True
        seen.add(prefix_sum)
    return False


# ---------- Print One Zero-Sum Subarray ----------
def print_zero_sum_subarray(arr):
    prefix_sum = 0
    sum_index = {}
    for i, num in enumerate(arr):
        prefix_sum += num
        if prefix_sum == 0:
            return arr[0:i+1]
        if prefix_sum in sum_index:
            return arr[sum_index[prefix_sum]+1:i+1]
        sum_index[prefix_sum] = i
    return []


# ---------- Count Subarrays With Sum = k ----------
from collections import defaultdict

def count_subarrays_with_sum(arr, k):
    prefix_sum = 0
    sum_freq = defaultdict(int)
    sum_freq[0] = 1
    count = 0
    for num in arr:
        prefix_sum += num
        count += sum_freq[prefix_sum - k]
        sum_freq[prefix_sum] += 1
    return count


# ---------- Print All Subarrays With Sum = k ----------
def print_subarrays_with_sum_k(arr, k):
    prefix_sum = 0
    prefix_map = defaultdict(list)
    prefix_map[0].append(-1)
    for i, num in enumerate(arr):
        prefix_sum += num
        if (prefix_sum - k) in prefix_map:
            for start in prefix_map[prefix_sum - k]:
                subarray = arr[start+1:i+1]
                print(subarray)
        prefix_map[prefix_sum].append(i)

        
'''
why do you use defaultdict(int) in function 'count_subarrays_with_sum' and defaultdict(list) in function 'print_subarrays_with_sum' ? 
can we use either of these in both functions?

✅ defaultdict(int) — for counting frequencies
Used in: count_subarrays_with_sum(arr, k)
- It creates a dictionary where missing keys default to 0.
- Perfect for counting how many times a certain prefix sum has appeared.;prefix_sum → count

✅ defaultdict(list) — for storing multiple indices
Used in: print_subarrays_with_sum_k(arr, k)
- It creates a dictionary where missing keys default to empty lists.
- Used when you need to store all positions where a prefix sum occurred.; prefix_map → list of indices
- Allows you to go back and find all start indices to extract subarrays. 

'''