<a href="https://colab.research.google.com/github/nikkiisfine/learngit/blob/master/Songs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from itertools import combinations

# Step 1: Load the Excel file and preprocess
df = pd.read_excel('1990-2023_top50_music.xlsx')

# Step 2: Convert Duration column to seconds
def time_to_seconds(duration):
    minutes, seconds = map(int, duration.split(':'))
    return minutes * 60 + seconds

df['time_in_seconds'] = df['Duration'].apply(time_to_seconds)

# Step 3: Define the optimized function to find song combinations
def find_combinations(df, target_time):
    # Convert target time to seconds
    min_val, sec_val = map(int, target_time.split(':'))
    target_seconds = min_val * 60 + sec_val

    # Get the song durations in seconds and titles for easy reference
    songs = [(title, duration) for title, duration in zip(df['Song_No'], df['time_in_seconds']) if duration <= target_seconds + 3]

    # Sort by duration to optimize pruning in combination checking
    songs.sort(key=lambda x: x[1])

    # Store results in a list
    results = []

    # Step 4: Find combinations of size 2, 3, or 4 using the new condition
    for r in range(2, 5):  # Combination sizes 2, 3, and 4
        for combo in combinations(songs, r):
            combo_titles, combo_durations = zip(*combo)
            total_duration = sum(combo_durations)

            # Check if the total duration is within 3 seconds of the target
            if abs(total_duration - target_seconds) <= 3:
                results.append((combo_titles, total_duration))
            elif total_duration > target_seconds + 3:
                break  # Break early if the duration exceeds the upper bound

    return results

# Step 5: Example usage
target_time = "5:14"  # 10 minutes target time
matching_combinations = find_combinations(df, target_time)

# Print results
for titles, duration in matching_combinations:
    minutes, seconds = divmod(duration, 60)
    duration_str = f"{minutes}:{seconds:02}"
    print(f"Combination: {titles} - Total Duration: {duration_str}")


Combination: (1477, 921) - Total Duration: 5:11
Combination: (1477, 1048) - Total Duration: 5:11
Combination: (1477, 1117) - Total Duration: 5:11
Combination: (1477, 1154) - Total Duration: 5:11
Combination: (1477, 1499) - Total Duration: 5:11
Combination: (1477, 1503) - Total Duration: 5:11
Combination: (1477, 1592) - Total Duration: 5:11
Combination: (1477, 1676) - Total Duration: 5:11
Combination: (1477, 103) - Total Duration: 5:12
Combination: (1477, 473) - Total Duration: 5:12
Combination: (1477, 692) - Total Duration: 5:12
Combination: (1477, 720) - Total Duration: 5:12
Combination: (1477, 893) - Total Duration: 5:12
Combination: (1477, 985) - Total Duration: 5:12
Combination: (1477, 1284) - Total Duration: 5:12
Combination: (1477, 1315) - Total Duration: 5:12
Combination: (1477, 1432) - Total Duration: 5:12
Combination: (1477, 1446) - Total Duration: 5:12
Combination: (1477, 1458) - Total Duration: 5:12
Combination: (1477, 1469) - Total Duration: 5:12
Combination: (1477, 1509) -