In [8]:
import csv
import os

def count_rows_until_string(file_path, stop_string):
    """
    Counts the number of rows in a TSV file until a specific string is found.

    This function reads the file line by line, so it's efficient with memory,
    even for very large files.

    Args:
        file_path (str): The path to the TSV file.
        stop_string (str): The string to search for within any field of a row.

    Returns:
        int: The number of rows *before* the row containing the string. 
             For example, if the string is in the first row, it returns 0.
             Returns -1 if the string is not found in the file.
    """
    try:
        # Open the file with 'newline=""' which is recommended for the csv module
        with open(file_path, 'r', newline='', encoding='utf-8') as tsvfile:
            # Create a CSV reader object, specifying the tab as the delimiter
            tsv_reader = csv.reader(tsvfile, delimiter='\t')
            
            # Use enumerate to get both the index (row number) and the row content
            for i, row in enumerate(tsv_reader):
                # The 'row' is a list of strings (the columns)
                # We check if our stop_string is present in any of the fields in the row
                if any(stop_string in field for field in row):
                    # Return the index 'i', which is the count of preceding rows
                    return i
                    
    except FileNotFoundError:
        print(f"Error: The file at '{file_path}' was not found.")
        return -1
        
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return -1
    
    # If the loop completes without finding the string, return -1
    return -1

# --- Example of How to Use the Function ---

# 1. For demonstration, let's create a sample TSV file.
file_name = "/home/kobrien/socat/GCB-test/SOCATv2025.tsv"


# 2. Define the file path and the string we want to find.
target_string = "Expocode"

# 3. Call the function and get the result.
row_count = count_rows_until_string(file_name, target_string)

# 4. Print a user-friendly result.
if row_count != -1:
    print(f"The target string '{target_string}' was found.")
    print(f"Number of rows before the target: {row_count}")
    # The string is on line index 4, which is the 5th row.
    # So, there are 4 rows before it (including the header).
else:
    print(f"The target string '{target_string}' was not found in the file '{file_name}'.")



The target string 'Expocode' was found.
Number of rows before the target: 4
