In [2]:
import numpy as np
import pandas as pd

In [3]:
useable_blocks = [
                    'plate_1x1' ,
                    'plate_1x2' ,
                    'plate_1x3' ,
                    'plate_1x4' ,
                    'plate_1x6' ,
                    'plate_1x8' ,
                    'plate_1x10',
                    'plate_2x2' ,
                    'plate_2x3' ,
                    'plate_2x4' ,
                    'plate_2x6' ,
                    'plate_2x8' ,
                    'plate_2x12',
                    'plate_4x4' ,
                    'plate_4x6' ,
                    'plate_4x8' ,
                    'plate_6x6' ,
                    'plate_6x8' ,
                    'plate_6x10',
                    'plate_8x8' ,
                    'brick_1x1' ,
                    'brick_1x2' ,
                    'brick_1x3' ,
                    'brick_1x4' ,
                    'brick_1x6' ,
                    'brick_1x8' ,
                    'brick_2x2' ,
                    'brick_2x3' ,
                    'brick_2x4' ,
                    'brick_2x6' ,
                 ]

In [4]:
set_df  = pd.read_csv('data/lego_set_parts.csv'      ).drop(['Unnamed: 0'],axis=1)
part_df = pd.read_csv('data/individual_set_parts.csv').drop(['Unnamed: 0'],axis=1)
info_df = pd.read_csv('data/set_name_url.csv'        ).drop(['Unnamed: 0'],axis=1)

In [5]:
# Let's make up some test case data:

test_1 = \
{
    'plate_1x3':5,
    'plate_2x2':5,
    'plate_4x4':5,
    'brick_1x4':5,
    'brick_2x4':5,
}

test_2 = \
{
    'plate_1x3':1,
    'plate_2x2':3,
    'plate_4x4':2,
    'brick_1x4':3,
    'brick_2x4':8,    
}

test_3 = \
{
    'plate_1x3':7,
    'brick_2x4':5,        
}

test_4 = \
{
    'brick_2x6':3,
}

In [11]:
def select_valid_sets(
                        inp_dict,
                     ):
    
    possible_set_df = set_df.copy()
    
    # First select where we don't have too many of the known pieces
    for key in inp_dict.keys():
        possible_set_df = possible_set_df[ 
                                            ( possible_set_df[key] <= inp_dict[key] ) 
                                         ]
        
    # The number of parts from the set we have
    # Add this as a column
    possible_set_df['n_have'] = possible_set_df[inp_dict.keys()].sum( axis=1 )

    
    # Then select to make sure we have at least 1 of the known pieces
    possible_set_df = possible_set_df[
                                        ( possible_set_df['n_have'] > 0 )
                                     ]
    
    # Calculate number of parts needed, and fraction had
    possible_set_df['n_needed' ] = possible_set_df['n_parts'] - possible_set_df['n_have' ]
    possible_set_df['frac_have'] = possible_set_df['n_have' ] / possible_set_df['n_parts'].astype(float)
    
    return possible_set_df[
                            ['set_id','set_name','set_url','frac_have','n_parts','n_have','n_needed',]+inp_dict.keys()
                          ].sort_values( ['frac_have','n_parts','n_have'],ascending=[False,True,False]).head(20)
    
    
# Can modify this later
# For now, just take the best
#  when sorted by fraction we
#  have, and the number of parts
def return_rec_set(
                    inp_df,
                  ):
    
    ind = inp_df.index.values[0]
    out_df = inp_df.loc[ind].copy()
    new_ind = info_df['set_id'] == out_df['set_id']
    out_df['set_name'] = info_df.loc[ new_ind, 'set_title'].values[0]
    out_df['set_url' ] = info_df.loc[ new_ind, 'set_url'  ].values[0]
    return out_df
    
def get_needed_parts(
                        rec_set,
                        all_part_df,
                        part_dict,
                    ):
    set_id = rec_set['set_id']
    
    set_part_df = all_part_df[ all_part_df['set_id'] == set_id ]
    
    # Get list of all the bricks in the set, and all the plates
    brick_list  = [ block for block in set_part_df['part_name'].unique() if ( ('Brick' in block) and ( len(block) < 11 ) ) ]
    plate_list  = [ block for block in set_part_df['part_name'].unique() if ( ('Plate' in block) and ( len(block) < 11 ) ) ]
    other_list  = [ block for block in set_part_df['part_name'].unique() if ( 
                                                                            ( block not in brick_list ) and 
                                                                            ( block not in plate_list ) ) ]
    
    parts_needed = {}
    parts_have   = {}
    
    for part in other_list:
        parts_needed[part] = set_part_df.loc[ set_part_df['part_name']==part ]['quantity'].values[0]
        
    for part_list in [brick_list,plate_list]:
        for part in part_list:
            parts_have[part] = set_part_df.loc[ set_part_df['part_name']==part ]['quantity'].values[0]
    
    print parts_needed
    print parts_have
            
part_dict = test_1
poss_sets = select_valid_sets( part_dict )
rec_set   = return_rec_set( poss_sets )
#req_parts = get_needed_parts( rec_set, part_df, part_dict )

In [12]:
rec_set

set_id                                  60000-1
set_name               60000-1: Fire Motorcycle
set_url      https://brickset.com/sets/60000-1/
frac_have                              0.153846
n_parts                                      39
n_have                                        6
n_needed                                     33
plate_2x2                                     0
plate_4x4                                     1
plate_1x3                                     0
brick_1x4                                     5
brick_2x4                                     0
Name: 230, dtype: object

In [8]:
info_df.head()

Unnamed: 0,set_id,set_title,set_url
0,7992-1,7992-1: Container Stacker,https://brickset.com/sets/7992-1/
1,1088-1,1088-1: Road Burner,https://brickset.com/sets/1088-1/
2,60024-1,60024-1: City Advent Calendar,https://brickset.com/sets/60024-1/
3,6454-1,6454-1: Countdown Corner,https://brickset.com/sets/6454-1/
4,7623-1,7623-1: Temple Escape,https://brickset.com/sets/7623-1/
