# SR 4.1: Sort Algorithm

In [1]:
import pandas as pd

df = pd.read_csv(r"C:\Users\LENOVO\OneDrive\Desktop\bky6-3za5.csv")

### Selection Sort

In [5]:
print(df['structure_count'])

0         0
1         1
2         3
3         3
4         3
       ... 
429    4759
430    5952
431    6086
432    6662
433    7963
Name: structure_count, Length: 434, dtype: int64


In [6]:
def selection_sort(data):
    """
    Perform a selection sort on the data list.

    Parameters:
    data (list): The list to be sorted.

    Returns:
    list: The sorted list.
    """
    # Traverse through all list elements
    for i in range(len(data)):
        # Find the minimum element in remaining unsorted array
        min_index = i
        for j in range(i+1, len(data)):
            if data[j] < data[min_index]:
                min_index = j

        # Swap the found minimum element with the first element
        data[i], data[min_index] = data[min_index], data[i]

    return data

# Column to be sorted
data = df['structure_count'].tolist()  # Converts the 'structure_count' column to a list
sorted_data = selection_sort(data)  # Sorts the data using selection sort
df['structure_count'] = sorted_data  # Updates the DataFrame with the sorted data

print(df['structure_count'])

0         0
1         1
2         3
3         3
4         3
       ... 
429    4759
430    5952
431    6086
432    6662
433    7963
Name: structure_count, Length: 434, dtype: int64


# SR 4.2: Algorithm Implementation: Search algorithm 


When comparing the performance of linear search and binary search, binary search is generally faster, but the context and conditions matter. 

# 1. Linear Search
**Definition**: Linear search scans each element in the list sequentially until the target value is found or the list ends.

**Time Complexity**: O(n), where n is the number of elements in the list.

**Performance**: On average, it takes more time because it may need to check each element in the worst case.
# 2. Binary Search
**Definition:** Binary search repeatedly divides a sorted list in half to find the target value.

**Time Complexity**: O(log n), where n is the number of elements in the list.

**Performance:** It is significantly faster for large lists because the number of comparisons needed grows very slowly with the size of the list.

# Comparison
**Speed:**
- Binary Search is faster because it reduces the problem size by half each time, leading to fewer comparisons.
- Linear Search checks each element one by one, making it slower, especially as the list grows larger.
  
**Conditions:**
- Binary search requires the list to be sorted, which may involve an additional cost if sorting is needed.
- Linear search works on both sorted and unsorted lists.

### Linear Search


In [7]:


def linear_search(data, target):
    # Loop over each element in 'data' using enumerate to get both index and value
    for index, value in enumerate(data):
        # Check if the current element 'value' is equal to the 'target'
        if value == target:
            # If the target is found, return the current index
            return index
    # If the loop completes and the target is not found, return -1
    return -1


data = df['structure_count']  # Extract the 'structure_count' column from the DataFrame
target = 1  # The element we are searching for in the data
pos = linear_search(data, target)  # Call the linear search function

# Check if the target was found (i.e., 'pos' is not -1)
if pos != -1:
    # Print the index where the target was found
    print(f"Element {target} found at index {pos}.")
else:
    # Print that the target was not found in the dataset
    print("Element not found.")

    

Element 1 found at index 1.


### Binary Search

In [9]:
# Binary Search
def binary_search(data, target):
    """
    Perform a binary search for the target in the sorted data list.

    Parameters:
    data (list): The sorted list to search through.
    target: The element to search for.

    Returns:
    int: The index of the target if found, else -1.
    """
    #
    left = 0 
    right = len(data) - 1 

    while left <= right:
        mid = (left + right) // 2
        mid_value = data[mid]

        if mid_value == target:
            return mid  # Target found, return its index
        elif mid_value < target:
            left = mid + 1  # Ignore the left half
        else:
            right = mid - 1  # Ignore the right half

    return -1  # Target not found

# Example usage:
target = 1
sorted_data = selection_sort(df['structure_count'].tolist())  # Sort the data
pos = binary_search(sorted_data, target)  # Perform binary search on sorted data

if pos != -1:
    print(f"Element {target} found at index {pos}.")
else:
    print("Element not found.")


Element 1 found at index 1.
