In [1]:
import numpy as np
from datetime import datetime, timedelta
import random
from time import time

In [2]:
# Generate the list of dates
st = datetime.strptime('01/01/2024', '%d/%m/%Y')
n = 365 * 10
list_of_dates = [st + timedelta(days=i) for i in range(n) if np.random.uniform() > 0.8]
random.shuffle(list_of_dates)

In [3]:
def split_date_into_components(date):
    date_str = str(date)
    date_component = date_str.split(" ")[0].split("-")
    
    return int(date_component[0]), int(date_component[1]), int(date_component[2])

def is_leap_year(year):
    """Check if a year is a leap year."""
    return (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)

def date_to_ordinal(year, month, day):
    """Convert a given date to its ordinal value."""
    # Days in each month
    days_in_month = [31, 28 + is_leap_year(year), 31, 30, 31, 30, 
                     31, 31, 30, 31, 30, 31]
    
    # Calculate the ordinal value
    ordinal = 0
    
    # Add days for complete years
    for y in range(1, year):
        ordinal += 365 + is_leap_year(y)

    # Add days for complete months of the current year
    for m in range(1, month):
        ordinal += days_in_month[m - 1]

    # Add the days of the current month
    ordinal += day

    return ordinal

In [4]:
def find_date_pairs_brute_force(dates):
    pairs = []
    
    # Loop through each date
    for i in range(len(dates)):
        first_year, first_month, first_date = split_date_into_components(dates[i])
        first_days_since_start = date_to_ordinal(first_year, first_month, first_date)
        
        for j in range(i + 1, len(dates)):
            sec_year, sec_month, sec_date = split_date_into_components(dates[j])
            sec_days_since_start = date_to_ordinal(sec_year, sec_month, sec_date)
            
            if abs(sec_days_since_start - first_days_since_start) == 7:
                pairs.append((dates[i], dates[j]))
    
    return pairs

In [None]:
%%time

start = time()
pairs_brute_force = find_date_pairs_brute_force(list_of_dates)
end = time()

print(f"Brute force solution runtime: {end - start:.6f} seconds")
print(f"Number of pairs found: {len(pairs_brute_force)}")

In [None]:
def find_date_pairs_optimized(dates):
    # Dictionary to store dates as keys
    date_dict = {}
    pairs = []
    
    for date in dates:
        year, month, day = split_date_into_components(date)
        
        # Convert the date to its ordinal value
        days_since_start = date_to_ordinal(year, month, day)
   
        # Check if there's a date exactly 7 days before
        if days_since_start - 7 in date_dict:
            pairs.append((date_dict[days_since_start - 7], date))
            
        # Check if there's a date exactly 7 days after
        if days_since_start + 7 in date_dict:
            pairs.append((date, date_dict[days_since_start + 7]))
        
        # Add this date to the dictionary
        date_dict[days_since_start] = date
    
    return pairs

In [None]:
%%time

start = time()
pairs_optimized = find_date_pairs_optimized(list_of_dates)
end = time()

print(f"Optimized solution runtime: {end - start:.6f} seconds")
print(f"Number of pairs found: {len(pairs_optimized)}")

## Checking


In [None]:
sorted_pairs_optimized = []

for pairs in pairs_optimized:
    sorted_pairs_optimized.append(tuple(sorted(pairs)))
    
sorted_pairs_optimized = sorted(sorted_pairs_optimized)

In [None]:
sorted_pairs_brute_force = []

for pairs in pairs_brute_force:
    sorted_pairs_brute_force.append(tuple(sorted(pairs)))
    
sorted_pairs_brute_force = sorted(sorted_pairs_brute_force)

In [None]:
try:
    # Checking if both methods yield the same results
    assert set(sorted_pairs_optimized) == set(sorted_pairs_brute_force)
    print("Both dataframes match")
    
except:
    missing_from_optimized = set(sorted_pairs_brute_force) - set(sorted_pairs_optimized)
    missing_from_brute_force = set(sorted_pairs_optimized) - set(sorted_pairs_brute_force)

    if missing_from_optimized:
        print("Missing from optimized:", len(missing_from_optimized))
        
    if missing_from_brute_force:
        print("Missing from brute force:", len(missing_from_brute_force))