In [1]:
import io_utils
import time

In [2]:
def process(S):
    graph = {}
    
    for s1 in S:
        edges = []
                    
        for s2 in S:
            if s1 == s2:
                continue
            if not set(s1).issubset(s2):
                continue
            
            # ensure there is no subset s3 such that s1 < s3 < s2
            if any(s1.issubset(s3) and s3.issubset(s2) and s1 != s3 and s3 != s2 for s3 in S):
                continue
            
            edges.append(s2)
        
        if edges:
            graph[s1] = edges

    return graph

## Testing against the example

### Example

S = {  
    {1, 2},  
    {1, 2, 3},  
    {1, 2, 3, 4},  
    {1, 2, 3, 4, 5},  
    {2},  
    {2, 3}  
}  

$\to$ becomes...

{  
    ({1, 2}, {1, 2, 3}),  
    ({1, 2, 3}, {1, 2, 3, 4}),  
    ({1, 2, 3}, {1, 2, 3, 5}),
    ({2}, {1, 2}),  
    ({2}, {2, 3}),  
    ({2, 3}, {1, 2, 3})  
}  

*(I believe they meant, $(\{1,2,3,4\}, \{1,2,3,4,5\})$)*

In [3]:
data = [(1, 2), (1, 2, 3), (1, 2, 3, 4), (1, 2, 3, 4, 5), (2,), (2, 3)]
data = [ frozenset(x) for x in data ]
graph = process( data )

for k, v in graph.items():
    clean = lambda x: '(' + (','.join(str(x) for x in sorted(x)) + ')')
    print(clean(k), '->  \t', ', '.join([clean(x) for x in sorted(v)]))

(1,2) ->  	 (1,2,3)
(1,2,3) ->  	 (1,2,3,4)
(1,2,3,4) ->  	 (1,2,3,4,5)
(2) ->  	 (1,2), (2,3)
(2,3) ->  	 (1,2,3)


## Benchmarking across the data

In [4]:
for p in [26, 1109, 3515, 79867]:
    path = f'./data/{p}.txt'
    S = io_utils.import_data_sets(path)
    
    t0 = time.time()
    graph = process(S)
    duration = time.time() - t0
    
    io_utils.export_soln(graph, f'./data/solutions/{p}_naive.txt')
    
    print(f'n = {p}: in {duration:.2f} seconds')
    print()
    

n = 26: in 0.00 seconds

n = 1109: in 6.27 seconds

n = 3515: in 127.00 seconds



KeyboardInterrupt: 