In [0]:
import pandas as pd
import numpy as np
from rectpack import newPacker
import rectpack.packer as packer
import matplotlib.pyplot as plt

In [0]:
from pyspark.sql.types import NullType
import pyspark.sql.functions as F

In [0]:
%run "./preprocess"

In [0]:
def col_name(df):
    """
    this is to trim the data_frame column names to a unique format:
    all case, replace space to underscore, remove parentheses
    param df:
        raw from share drive for
    return:
        polished data set with new column names
    """
    res = [c.strip().lower().replace('-', '').replace(' ', '_').replace('(', '').replace(')', '').replace('"', '') for c in df.columns]
    return res

In [0]:
def solver(pal_dim, item_qty, sheet_dim):
  rectangles = [pal_dim for i in range(item_qty)]
  
  # Build the Packer
  pack = newPacker(mode = packer.PackingMode.Offline, bin_algo = packer.PackingBin.Global,
                   rotation=True)

  # Add the rectangles to packing queue
  for r in rectangles:
      pack.add_rect(*r)
      
  # Add the bins where the rectangles will be placed
  for b in sheet_dim:
      pack.add_bin(*b)
      
  # Start packing
  pack.pack()
  
  # Full rectangle list
  all_rects = pack.rect_list()
  
  # Pallets with dimensions
  all_pals = [sorted([p[3], p[4]]) for p in all_rects]
  

  # count number of item cutted
  p_item = all_pals.count(pal_dim)
  
  return p_item

form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall
SHEET,,,ASTM B209,,0011632,,,13.0,5.56,0.64,,
SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.42,107.0,,,
SHEET,,,ASTM B209,,0153942,,,29.75,2.0,0.54,,
SHEET,,,ASTM B209,,0153943,,,14.75,2.0,0.27,,
SHEET,,,ASTM B209,,0153945,,,29.88,29.88,4.41,,
SHEET,,,ASTM B209,,0153948,,,3.0,2.0,,,
SHEET,,,ASTM B209,,182752,,,26.0,26.0,5.7,,
SHEET,,,ASTM B209,,182801,,,81.9375,60.0,,,
SHEET,,,ASTM B209,,182802,,,57.0,2.0,,,
SHEET,,,ASTM B209,,0182813,,,6.0,2.75,0.02,,


In [0]:
df_dim = spark.table('gpsc.material_base_dimension').toPandas()
df_fab = spark.table('gpsc.fabrication_cutting_yield_comm').toPandas()

In [0]:
df_fab.head()

Unnamed: 0,form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size
0,SHEET,,,ASTM B209,,11632,,,13.0,5.56,0.64,,,72.28
1,SHEET,,,SPEC NOT SPECIFIED,,153203,,,32.42,107.0,,,,3468.94
2,SHEET,,,ASTM B209,,153942,,,29.75,2.0,0.54,,,59.5
3,SHEET,,,ASTM B209,,153943,,,14.75,2.0,0.27,,,29.5
4,SHEET,,,ASTM B209,,153945,,,29.88,29.88,4.41,,,892.8144


In [0]:
df_fab[['widthin', 'lengthin']] = df_fab[['widthin', 'lengthin']].astype('float')

In [0]:
# df_dim.createOrReplaceTempView("dim")
# df_dim = spark.sql("select *, width * length as size from dim")

In [0]:
df_dim['sheet_size'] = df_dim.width*df_dim.length

form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall
SHEET,,,ASTM B209,,0011632,,,13.0,5.56,0.64,,
SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.42,107.0,,,
SHEET,,,ASTM B209,,0153942,,,29.75,2.0,0.54,,
SHEET,,,ASTM B209,,0153943,,,14.75,2.0,0.27,,
SHEET,,,ASTM B209,,0153945,,,29.88,29.88,4.41,,
SHEET,,,ASTM B209,,0153948,,,3.0,2.0,,,
SHEET,,,ASTM B209,,182752,,,26.0,26.0,5.7,,
SHEET,,,ASTM B209,,182801,,,81.9375,60.0,,,
SHEET,,,ASTM B209,,182802,,,57.0,2.0,,,
SHEET,,,ASTM B209,,0182813,,,6.0,2.75,0.02,,


form,form_cnt
SHEET,76


num_affected_rows,num_inserted_rows


num_affected_rows,num_inserted_rows
4,4


materialType,width,length
SHEET_48120,48,120
SHEET_60120,60,120
SHEET_48144,48,144
SHEET_72144,72,144


##### Opt out to run the following cell - Cmd 11

In [0]:
# dim_dict = dict(zip(df_dim.materialType, zip(df_dim.width, df_dim.length, df_dim.sheet_size)))
# sheet_size_list = [dim_dict[k][2] for k in dim_dict.keys()]
# temp_df = df_dim.set_index('materialType')
# df_dim_T = pd.DataFrame(temp_df.unstack(level='materialType')).T
# df_dim_T.columns = df_dim_T.columns.map('{0[1]}|{0[0]}'.format)
# df_dim_T.sort_index(axis=1, inplace=True)
# df_fab_dim = df_fab.copy()
# df_fab_dim[df_dim_T.columns] = pd.concat([df_fab, df_dim_T], axis=1)[df_dim_T.columns].ffill()

In [0]:
# def yield_estimate_0(df_1, df_2, _solver):
#   yield_estimate_result = []
#   padding_sheet_as_zeros = sum(~df_2.materialType.str.contains('PLATE'))
#   padding_plate_as_zeros = sum(~df_2.materialType.str.contains('SHEET'))
  
#   df_2_selected_plate = df_2[df_2["materialType"].str.contains('PLATE', na=False)]
#   df_2_selected_sheet = df_2[df_2["materialType"].str.contains('SHEET', na=False)]
  
#   if 'PLATE' in df_1[0]:
#     res = [0] * padding_sheet_as_zeros
    
#     for _, i in df_2_selected_plate.iterrows():
#       sheet_size = i[3]
#       blank_size = df_1[3]
#       max_n = int(sheet_size//blank_size)
#       sheet_dim = np.sort([(i[1], i[2])])
#       pal_dim = np.sort([df_1[1], df_1[2]]).tolist()
#       yield_item_count = _solver(pal_dim, max_n, sheet_dim)
#       yield_estimate_result.append(yield_item_count)  
#     ret = res + yield_estimate_result
  
#   else: 
#     res = [0]*padding_plate_as_zeros
    
#     for _, i in df_2_selected_sheet.iterrows():
#       sheet_size = i[3]
#       blank_size = df_1[3]
#       max_n = int(sheet_size//blank_size)
#       sheet_dim = np.sort([(i[1], i[2])])
#       pal_dim = np.sort([df_1[1], df_1[2]]).tolist()
#       yield_item_count = _solver(pal_dim, max_n, sheet_dim)
#       yield_estimate_result.append(yield_item_count)
#     ret = yield_estimate_result + res    
    
#   return ret
  

In [0]:
def yield_estimate(df_1, df_2, _solver, opt_out_threshold = 200):
  yield_estimate_result = []
  padding_sheet_as_zeros = sum(~df_2.materialType.str.contains('PLATE'))
  padding_plate_as_zeros = sum(~df_2.materialType.str.contains('SHEET'))
  
  df_2_selected_plate = df_2[df_2["materialType"].str.contains('PLATE', na=False)]
  df_2_selected_sheet = df_2[df_2["materialType"].str.contains('SHEET', na=False)]
  
  if 'PLATE' in df_1[0]:
    res = [0] * padding_sheet_as_zeros
    
    for _, i in df_2_selected_plate.iterrows():
      sheet_size = i[3]
      blank_size = df_1[3]
      max_n = int(sheet_size//blank_size)
      sheet_dim = np.sort([(i[1], i[2])])
      pal_dim = np.sort([df_1[1], df_1[2]]).tolist()
      
      if max_n <= opt_out_threshold:
        yield_item_count = _solver(pal_dim, max_n, sheet_dim)
        yield_estimate_result.append(yield_item_count)  
        
      else:
        yield_item_count = max_n 
        yield_estimate_result.append(yield_item_count)
    ret = res + yield_estimate_result
  
  else: 
    res = [0]*padding_plate_as_zeros
    
    for _, i in df_2_selected_sheet.iterrows():
      sheet_size = i[3]
      blank_size = df_1[3]
      max_n = int(sheet_size//blank_size)
      sheet_dim = np.sort([(i[1], i[2])])
      pal_dim = np.sort([df_1[1], df_1[2]]).tolist()
      
      if max_n <= opt_out_threshold:
        yield_item_count = _solver(pal_dim, max_n, sheet_dim)
        yield_estimate_result.append(yield_item_count) 
        
      else:
        yield_item_count = max_n
        yield_estimate_result.append(yield_item_count)
    ret = yield_estimate_result + res    
    
  return ret
  

In [0]:
selected_col = [ 'form', 'widthin', 'lengthin', 'blank_sheet_size']

In [0]:
batch_1_start, batch_1_end = 0, 5
batch_2_start, batch_2_end = batch_1_end, 30
batch_3_start, batch_3_end = batch_1_end, 300

batch_load_1 = df_fab[selected_col].iloc[range(batch_1_start,batch_1_end), :]
# batch_load_2 = df_fab[selected_col].loc[range(batch_2_start,batch_2_end), :].drop_duplicates(subset = ['widthin','lengthin'])
batch_load_2 = df_fab[selected_col].loc[range(batch_2_start,batch_2_end), :]
batch_load_3 = df_fab[selected_col].iloc[:, :]

In [0]:
df_dim

Unnamed: 0,materialType,width,length,sheet_size
0,SHEET_48120,48,120,5760
1,SHEET_60120,60,120,7200
2,SHEET_48144,48,144,6912
3,SHEET_72144,72,144,10368


In [0]:
display(df_fab.head())

form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size
SHEET,,,ASTM B209,,11632,,,13.0,5.56,0.64,,,72.28
SHEET,,,SPEC NOT SPECIFIED,,153203,,,32.42,107.0,,,,3468.94
SHEET,,,ASTM B209,,153942,,,29.75,2.0,0.54,,,59.5
SHEET,,,ASTM B209,,153943,,,14.75,2.0,0.27,,,29.5
SHEET,,,ASTM B209,,153945,,,29.88,29.88,4.41,,,892.8144


In [0]:
import time

In [0]:
batch_load_3

Unnamed: 0,form,widthin,lengthin,blank_sheet_size
0,SHEET,13.0000,5.56,72.280000
1,SHEET,32.4200,107.00,3468.940000
2,SHEET,29.7500,2.00,59.500000
3,SHEET,14.7500,2.00,29.500000
4,SHEET,29.8800,29.88,892.814400
...,...,...,...,...
71,SHEET,43.5000,0.75,32.625000
72,SHEET,60.0000,0.75,45.000000
73,SHEET,94.3125,35.25,3324.515625
74,SHEET,94.3125,43.50,4102.593750


In [0]:
yield_estimate_result_1 = batch_load_1.apply(lambda x: yield_estimate(x, df_2=df_dim, _solver=solver), axis=1)
yield_estimate_result_1

In [0]:
yield_estimate_result_2 = batch_load_2.apply(lambda x: yield_estimate(x, df_2=df_dim, _solver=solver), axis=1)
yield_estimate_result_2

In [0]:
yield_estimate_result_3 = batch_load_3.apply(lambda x: yield_estimate(x, df_2=df_dim, _solver=solver, opt_out_threshold=300), axis=1)
yield_estimate_result_3

In [0]:
new_col = df_dim.materialType.astype('str') + '_yield'
new_col

In [0]:
test = pd.concat([df_fab.iloc[:, :].reset_index(drop = True), pd.DataFrame(yield_estimate_result_3.tolist(), columns=new_col)], axis=1)

In [0]:
test

Unnamed: 0,form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size,SHEET_48120_yield,SHEET_60120_yield,SHEET_48144_yield,SHEET_72144_yield
0,SHEET,,,ASTM B209,,0011632,,,13.0000,5.56,0.64,,,72.280000,70,91,88,133
1,SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.4200,107.00,,,,3468.940000,1,1,1,2
2,SHEET,,,ASTM B209,,0153942,,,29.7500,2.00,0.54,,,59.500000,96,120,108,166
3,SHEET,,,ASTM B209,,0153943,,,14.7500,2.00,0.27,,,29.500000,187,231,222,351
4,SHEET,,,ASTM B209,,0153945,,,29.8800,29.88,4.41,,,892.814400,4,8,4,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,SHEET,,,ASTM B209,,MA-41946-02,,,43.5000,0.75,,,,32.625000,172,204,210,317
72,SHEET,,,ASTM B209,,MA-41946-03,,,60.0000,0.75,,,,45.000000,128,160,128,224
73,SHEET,,,ASTM B209,,MB-41943-01,,,94.3125,35.25,38.28,,,3324.515625,1,1,1,2
74,SHEET,,,ASTM B209,,MB-41943-02,,,94.3125,43.50,,,,4102.593750,1,1,1,1


In [0]:
df_dim.shape

In [0]:
temp = pd.concat([df_fab.iloc[:, :].reset_index(drop = True), pd.DataFrame(yield_estimate_result_3.tolist(), columns=new_col)], axis=1)

In [0]:
temp

Unnamed: 0,form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size,SHEET_48120_yield,SHEET_60120_yield,SHEET_48144_yield,SHEET_72144_yield
0,SHEET,,,ASTM B209,,0011632,,,13.0000,5.56,0.64,,,72.280000,70,91,88,133
1,SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.4200,107.00,,,,3468.940000,1,1,1,2
2,SHEET,,,ASTM B209,,0153942,,,29.7500,2.00,0.54,,,59.500000,96,120,108,166
3,SHEET,,,ASTM B209,,0153943,,,14.7500,2.00,0.27,,,29.500000,187,231,222,351
4,SHEET,,,ASTM B209,,0153945,,,29.8800,29.88,4.41,,,892.814400,4,8,4,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,SHEET,,,ASTM B209,,MA-41946-02,,,43.5000,0.75,,,,32.625000,172,204,210,317
72,SHEET,,,ASTM B209,,MA-41946-03,,,60.0000,0.75,,,,45.000000,128,160,128,224
73,SHEET,,,ASTM B209,,MB-41943-01,,,94.3125,35.25,38.28,,,3324.515625,1,1,1,2
74,SHEET,,,ASTM B209,,MB-41943-02,,,94.3125,43.50,,,,4102.593750,1,1,1,1


In [0]:
yield_square_col = df_dim.materialType.astype('str') + '_yield_square'
yield_square_col

In [0]:
start_col_idx = df_dim.shape[0]
temp_square = temp.iloc[:, -start_col_idx:].multiply(temp.blank_sheet_size, axis = 'index')

In [0]:
list(yield_square_col)

In [0]:
temp_square.columns = list(yield_square_col)

In [0]:
temp

Unnamed: 0,form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size,SHEET_48120_yield,SHEET_60120_yield,SHEET_48144_yield,SHEET_72144_yield
0,SHEET,,,ASTM B209,,0011632,,,13.0000,5.56,0.64,,,72.280000,70,91,88,133
1,SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.4200,107.00,,,,3468.940000,1,1,1,2
2,SHEET,,,ASTM B209,,0153942,,,29.7500,2.00,0.54,,,59.500000,96,120,108,166
3,SHEET,,,ASTM B209,,0153943,,,14.7500,2.00,0.27,,,29.500000,187,231,222,351
4,SHEET,,,ASTM B209,,0153945,,,29.8800,29.88,4.41,,,892.814400,4,8,4,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,SHEET,,,ASTM B209,,MA-41946-02,,,43.5000,0.75,,,,32.625000,172,204,210,317
72,SHEET,,,ASTM B209,,MA-41946-03,,,60.0000,0.75,,,,45.000000,128,160,128,224
73,SHEET,,,ASTM B209,,MB-41943-01,,,94.3125,35.25,38.28,,,3324.515625,1,1,1,2
74,SHEET,,,ASTM B209,,MB-41943-02,,,94.3125,43.50,,,,4102.593750,1,1,1,1


In [0]:

temp_square

Unnamed: 0,SHEET_48120_yield_square,SHEET_60120_yield_square,SHEET_48144_yield_square,SHEET_72144_yield_square
0,5059.600000,6577.480000,6360.640000,9613.24000
1,3468.940000,3468.940000,3468.940000,6937.88000
2,5712.000000,7140.000000,6426.000000,9877.00000
3,5516.500000,6814.500000,6549.000000,10354.50000
4,3571.257600,7142.515200,3571.257600,7142.51520
...,...,...,...,...
71,5611.500000,6655.500000,6851.250000,10342.12500
72,5760.000000,7200.000000,5760.000000,10080.00000
73,3324.515625,3324.515625,3324.515625,6649.03125
74,4102.593750,4102.593750,4102.593750,4102.59375


In [0]:
df_fab_with_yield = pd.concat([temp, temp_square], axis=1)

In [0]:
df_fab_with_yield

Unnamed: 0,form,alloy,grade,specification,part_on_schedule,part_number_detail,vendor,lower_gaugein,widthin,lengthin,weightlbs,base_material,total_weight_st_overall,blank_sheet_size,SHEET_48120_yield,SHEET_60120_yield,SHEET_48144_yield,SHEET_72144_yield,SHEET_48120_yield_square,SHEET_60120_yield_square,SHEET_48144_yield_square,SHEET_72144_yield_square
0,SHEET,,,ASTM B209,,0011632,,,13.0000,5.56,0.64,,,72.280000,70,91,88,133,5059.600000,6577.480000,6360.640000,9613.24000
1,SHEET,,,SPEC NOT SPECIFIED,,0153203,,,32.4200,107.00,,,,3468.940000,1,1,1,2,3468.940000,3468.940000,3468.940000,6937.88000
2,SHEET,,,ASTM B209,,0153942,,,29.7500,2.00,0.54,,,59.500000,96,120,108,166,5712.000000,7140.000000,6426.000000,9877.00000
3,SHEET,,,ASTM B209,,0153943,,,14.7500,2.00,0.27,,,29.500000,187,231,222,351,5516.500000,6814.500000,6549.000000,10354.50000
4,SHEET,,,ASTM B209,,0153945,,,29.8800,29.88,4.41,,,892.814400,4,8,4,8,3571.257600,7142.515200,3571.257600,7142.51520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,SHEET,,,ASTM B209,,MA-41946-02,,,43.5000,0.75,,,,32.625000,172,204,210,317,5611.500000,6655.500000,6851.250000,10342.12500
72,SHEET,,,ASTM B209,,MA-41946-03,,,60.0000,0.75,,,,45.000000,128,160,128,224,5760.000000,7200.000000,5760.000000,10080.00000
73,SHEET,,,ASTM B209,,MB-41943-01,,,94.3125,35.25,38.28,,,3324.515625,1,1,1,2,3324.515625,3324.515625,3324.515625,6649.03125
74,SHEET,,,ASTM B209,,MB-41943-02,,,94.3125,43.50,,,,4102.593750,1,1,1,1,4102.593750,4102.593750,4102.593750,4102.59375


In [0]:
yield_columns = list(df_fab_with_yield.columns)

In [0]:
[item for item in yield_columns if yield_columns.count(item) > 1]

In [0]:
spark_df_fab_with_yield = spark.createDataFrame(df_fab_with_yield)

In [0]:
spark_df_fab_with_yield_save = spark_df_fab_with_yield.select([
    F.lit(None).cast('string').alias(i.name)
    if isinstance(i.dataType, NullType)
    else i.name
    for i in spark_df_fab_with_yield.schema
])

In [0]:
path = '/mnt/ml/com/dmnd00xxxxx_fabrication_cutting_yield_optimization/input_data/(result)_fabrication_material_sdx_comm.csv'
spark_df_fab_with_yield_save.coalesce(1).write.mode('overwrite').option("header", "true").csv(path)

In [0]:
df_fab_with_yield['item_qty'] = np.nan_to_num(df_fab_with_yield.sheet_size/df_fab_with_yield.blank_sheet_size).astype(int)