<a href="https://colab.research.google.com/github/mbjallow6/Algorithms-python/blob/main/Rossalind_Problems_Part_Two.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import data from the computer
from google.colab import files
uploaded = files.upload()

Saving rosalind_maj.txt to rosalind_maj.txt


## Majority Element Problem

In [2]:
"""
Rosalind Majority Element Problem Solution


This module implements a divide-and-conquer algorithm to find the majority element
in multiple arrays. It handles large inputs efficiently and includes verification for correctness.


Author: Bioinformatics Solution
Compatible with: Python 3.6+
Platform: Google Colab
"""


from typing import List, Tuple



class MajorityFinder:
    """
    A class to find the majority element in an array using divide-and-conquer.
    """


    def __init__(self):
        """Initialize the majority finder."""
        pass


    def find_majority(self, arr: List[int]) -> int:
        """
        Find the majority element in the array using divide-and-conquer.


        A majority element appears more than n/2 times.


        Args:
            arr (List[int]): Input array


        Returns:
            int: The majority element if it exists, -1 otherwise
        """
        if not arr:
            return -1


        def majority_rec(start: int, end: int) -> int:
            if start == end:
                return arr[start]


            mid = (start + end) // 2
            left_major = majority_rec(start, mid)
            right_major = majority_rec(mid + 1, end)


            if left_major == right_major:
                return left_major


            # Count occurrences in the range
            left_count = sum(1 for i in range(start, end + 1) if arr[i] == left_major)
            right_count = sum(1 for i in range(start, end + 1) if arr[i] == right_major)


            if left_count > (end - start + 1) // 2:
                return left_major
            if right_count > (end - start + 1) // 2:
                return right_major


            return -1  # No majority in this range


        candidate = majority_rec(0, len(arr) - 1)
        if candidate == -1:
            return -1


        # Verify the count
        count = sum(1 for x in arr if x == candidate)
        return candidate if count > len(arr) // 2 else -1


    def verify_majority(self, arr: List[int], result: int) -> Tuple[bool, str]:
        """
        Verify that the majority element is correct.


        Args:
            arr (List[int]): Input array
            result (int): Reported majority element or -1


        Returns:
            Tuple[bool, str]: (is_valid, error_message)
        """
        if not arr:
            return result == -1, "Valid for empty array" if result == -1 else "Invalid for empty array"


        from collections import Counter
        counts = Counter(arr)
        n = len(arr)
        threshold = n // 2


        if result == -1:
            # Check if no element exceeds threshold
            if all(count <= threshold for count in counts.values()):
                return True, "Valid: No majority element"
            else:
                max_elem = max(counts, key=counts.get)
                return False, f"Invalid: {max_elem} appears {counts[max_elem]} > {threshold} times"
        else:
            # Check if result appears > n/2 times
            if result not in counts:
                return False, f"Result {result} not in array"
            if counts[result] <= threshold:
                return False, f"Result {result} appears {counts[result]} <= {threshold} times"


            # Check if it's indeed the majority
            if counts[result] > threshold:
                return True, f"Valid: {result} appears {counts[result]} > {threshold} times"
            return False, "Invalid majority"


def parse_input_file(file_path: str) -> Tuple[int, int, List[List[int]]]:
    """
    Parse input file to extract k, n, and k arrays each of size n.
    """
    try:
        with open(file_path, 'r') as file:
            lines = [line.strip() for line in file if line.strip()]


        if len(lines) < 1:
            raise ValueError("Input file is empty")


        # Parse k and n
        first_line = list(map(int, lines[0].split()))
        if len(first_line) != 2:
            raise ValueError("First line must contain exactly two integers: k and n")
        k, n = first_line


        if len(lines) != k + 1:
            raise ValueError(f"Expected {k + 1} lines, found {len(lines)}")


        # Parse k arrays
        arrays = []
        for i in range(1, k + 1):
            arr = list(map(int, lines[i].split()))
            if len(arr) != n:
                raise ValueError(f"Array {i} length {len(arr)} doesn't match n={n}")
            arrays.append(arr)


        return k, n, arrays


    except FileNotFoundError:
        raise FileNotFoundError(f"Input file '{file_path}' not found")
    except ValueError as e:
        raise ValueError(f"Input parsing error: {e}")



def write_output_file(output_path: str, results: List[int]) -> None:
    """
    Write majority elements to output file.
    """
    try:
        with open(output_path, 'w') as file:
            file.write(' '.join(map(str, results)) + '\n')
    except Exception as e:
        raise IOError(f"Error writing to output file: {e}")



def solve_majority_problem(input_file_path: str) -> List[int]:
    """
    Solve the majority element problem for a given input file.
    """
    try:
        # Parse input
        k, n, arrays = parse_input_file(input_file_path)


        print(f"Parsed input: k={k}, n={n}, {k} arrays")


        # Initialize finder
        finder = MajorityFinder()


        # Find majority for each array
        results = []
        for i, arr in enumerate(arrays, 1):
            major = finder.find_majority(arr)
            results.append(major)


            # Verify
            is_valid, msg = finder.verify_majority(arr, major)
            if not is_valid:
                raise ValueError(f"Invalid result for array {i}: {msg}")


            print(f"Array {i}: majority {major} - {msg}")


        return results


    except Exception as e:
        raise ValueError(f"Error solving problem: {str(e)}")



def main():
    """
    Main function to run the majority element problem solver.
    """
    # Configuration
    input_file = "rosalind_maj.txt"  # Change this to your input file name
    output_file = "output_maj.txt"


    try:
        print("Solving Majority Element Problem...")


        # Solve the problem
        results = solve_majority_problem(input_file)


        # Display results
        print(f"\nResult:")
        print(f"Majority elements: {' '.join(map(str, results))}")


        # Write to output file
        write_output_file(output_file, results)
        print(f"Result written to: {output_file}")


    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found.")
        print("Please make sure the file exists in the current directory.")


    except ValueError as e:
        print(f"Error: {e}")


    except Exception as e:
        print(f"Unexpected error: {e}")



def test_majority_finder():
    """Test the majority finder with sample and edge cases."""
    print("=== Testing Majority Finder ===")


    test_cases = [
        # Sample dataset arrays
        [5, 5, 5, 5, 5, 5, 5, 5],                # 5 is majority
        [8, 7, 7, 7, 1, 7, 3, 7],                # 7 is majority (5 times > 4)
        [7, 1, 6, 5, 10, 100, 1000, 1],          # No majority
        [5, 1, 6, 7, 1, 1, 10, 1],               # 1 appears 4 times == 4, but need >4? Wait, n=8, >4
        # Note: for n=8, >4 means at least 5
        # In sample, last one has 1 appearing 4 times, so -1
        # Additional tests
        [],                                      # Empty
        [1],                                     # Single element
        [1, 2],                                  # No majority
        [1, 1, 2],                               # 1 appears 2 > 1.5
        [1, 2, 2, 3],                            # No majority
        [2, 2, 2, 2],                            # 2 is majority
    ]


    expected = [5, 7, -1, -1, -1, 1, -1, 1, -1, 2]


    finder = MajorityFinder()


    for i, arr in enumerate(test_cases, 1):
        print(f"\nTest {i}: {arr}")


        result = finder.find_majority(arr)
        print(f"Result: {result}")


        is_valid, msg = finder.verify_majority(arr, result)
        print(f"Valid: {'✓' if is_valid else '✗'} - {msg}")


        if i <= len(expected):
            exp = expected[i-1]
            print(f"Matches expected {exp}: {'✓' if result == exp else '✗'}")


if __name__ == "__main__":
    # Run tests
    test_majority_finder()


    print("\n" + "="*60)


    # Run main function
    print("Running main function...")
    main()


=== Testing Majority Finder ===

Test 1: [5, 5, 5, 5, 5, 5, 5, 5]
Result: 5
Valid: ✓ - Valid: 5 appears 8 > 4 times
Matches expected 5: ✓

Test 2: [8, 7, 7, 7, 1, 7, 3, 7]
Result: 7
Valid: ✓ - Valid: 7 appears 5 > 4 times
Matches expected 7: ✓

Test 3: [7, 1, 6, 5, 10, 100, 1000, 1]
Result: -1
Valid: ✓ - Valid: No majority element
Matches expected -1: ✓

Test 4: [5, 1, 6, 7, 1, 1, 10, 1]
Result: -1
Valid: ✓ - Valid: No majority element
Matches expected -1: ✓

Test 5: []
Result: -1
Valid: ✓ - Valid for empty array
Matches expected -1: ✓

Test 6: [1]
Result: 1
Valid: ✓ - Valid: 1 appears 1 > 0 times
Matches expected 1: ✓

Test 7: [1, 2]
Result: -1
Valid: ✓ - Valid: No majority element
Matches expected -1: ✓

Test 8: [1, 1, 2]
Result: 1
Valid: ✓ - Valid: 1 appears 2 > 1 times
Matches expected 1: ✓

Test 9: [1, 2, 2, 3]
Result: -1
Valid: ✓ - Valid: No majority element
Matches expected -1: ✓

Test 10: [2, 2, 2, 2]
Result: 2
Valid: ✓ - Valid: 2 appears 4 > 2 times
Matches expected 2: ✓

Runn