In [47]:
import openai
from pathlib import Path
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import backoff
import time
from collections import Counter
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import shutil
import os
from sklearn.preprocessing import LabelEncoder
from PIL import Image


# For neural network stuff
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.preprocessing.image import ImageDataGenerator

import splitfolders


import sys
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
# def --> Create png folder only + encoderer link
# -- dict = {'bodyid': material}
# Then split the png to train and test

def get_all_files(directory, pattern):
    return [f for f in Path(directory).glob(pattern)]

In [5]:
def id2name(id):
    id = id.lower()
    # id = id.rstrip()
    if id == 'metal_non-ferrous':
        return 'non-ferrous metal'
    elif id == 'metal_ferrous':
        return 'ferrous metal'
    elif id == 'metal_ferrous_steel':
        return 'steel'
    elif id == 'metal_aluminum':
        return 'aluminum'
    elif id in ['other', 'wood', 'plastic']:
        return id
    else:
        raise f"Non-default id! {id}"
        # return 'other'

In [87]:
input_dir = r"/Users/pat/Documents/Development/patbam/Fusion360GalleryDataset_23hackathon_train"     # Change this to the directory where you downloaded the dataset
input_jsons = get_all_files(input_dir, "*/assembly.json")

# Create new folder for collecting png for training
output_dir = "/Users/pat/Documents/Development/patbam/PNG_NeuralNetwork"

# Classify the png folder in to material class
# print((input_jsons[0]))

# Create data frame for body part
df_bodyPart = pd.DataFrame(columns = ['name', 'area', 'volume','material'])

col = ['name', 'area', 'volume','material']



In [88]:
assemblies = {}
counter = 0
desired_img_sz = (224, 224)  # Desired size of the image

for input_json in tqdm(input_jsons): # tqdm: to show the progress bar 

    with open(input_json, "r", encoding="utf-8") as f:
        assembly_data = json.load(f)

    bodies = []
    # print(input_json)

    # ------ get name, material, area, volume ----------------
    for key, value in assembly_data['bodies'].items():
        name = value['name']
        material = value['material_category']
        area = value['physical_properties']['area']
        volume = value['physical_properties']['volume']
        png_name = value['png']

        # print(name, " ", png_name)


        if name[:4] != 'Body':  # some of the bodies do not have a custom name. This filters those bodies out.
            
            # bodies.append({'name':name, 'material':material})
            bodies.append({'id': key, 'name':name, 'material': material, 'area': area, 'volume': volume})

            # Will include the data with num_joint later

            # Add data to pandas             
            df_curBody = {'name':name, 'area': area, 'volume':volume, 'material':material}
            df_bodyPart.loc[len(df_bodyPart)] = df_curBody # Add the body part data to dataFrame


             # ----------For categorizing the pictures to material base ----------------------
            if material == "Metal_Aluminum":
                destination_path = output_dir + "/metal_aluminum/" +  png_name
            elif material == "Metal_Ferrous_Steel":
                destination_path = output_dir + "/metal_ferrous_steel/" +  png_name
            elif material == "Metal_Non-Ferrous":
                destination_path = output_dir + "/metal_non-ferrous/" +  png_name
            elif material == "Metal_Ferrous":
                destination_path = output_dir + "/metal_ferrous/" +  png_name
            elif material == "Wood":
                destination_path = output_dir + "/wood/" +  png_name       
            elif material == "Plastic":
                destination_path = output_dir + "/plastic/" +  png_name
            else:
                destination_path = output_dir + "/other/" +  png_name

            # print(destination_path)
            # ---- copy png file with resized to PNG_NeuralNetwork ----
            # Get the source path
            # cur_json = str(input_json)
            # cur_folder = cur_json.replace("assembly.json", "")


            # Remove "assembly.json" by getting the parent directories
            cur_folder = input_json.parents[0] / input_json.name.replace("assembly.json", "")
            # print(cur_folder)


            source_path = os.path.join(cur_folder, png_name)


            # Open the image using PIL
            image = Image.open(source_path)

            # Resize the image while maintaining aspect ratio
            image.thumbnail(desired_img_sz, Image.ANTIALIAS)

            #destination_path = os.path.join(output_dir, png_name)
            


            #shutil.copy(source_path, destination_path) # this is for the copy
            image.save(destination_path)

            # ----------------------------------------------------------------------- #


        
        

    if len(bodies) > 0:
        assemblies[input_json.parts[-2]] = bodies
    counter +=1 

# assemblies

100%|██████████| 6336/6336 [00:14<00:00, 426.86it/s]


In [None]:
print(df_bodyPart)

In [60]:
# delete data in ./output folder (in case not run it for the 1st time)
item_path = "./output"
shutil.rmtree(item_path)

In [61]:
# Train-validation-test
splitfolders.ratio("./", output="output", seed=1337, ratio=(.7, .15, .15), group_prefix=None)

Copying files: 13885 files [00:04, 2798.10 files/s]


In [82]:
def get_file_names_list(folder_path): # Get file name from the folder path that have material subfolder
    file_names = []
    material_names = ['metal_aluminum', 'metal_ferrous_steel','metal_ferrous','metal_non-ferrous','wood','other','plastic']
    for material in material_names:
        cur_folder = os.path.join(folder_path, material)
        files_path = get_all_files(cur_folder,"*")
        for file_path in files_path:
            file_name = os.path.basename(file_path)
            file_name = os.path.splitext(file_name)[0]
            file_names.append(file_name)
    return file_names
    

In [84]:
train_dir = r"./output/train/"     # current location of train directory
# List all files in the directory
file_names_train = get_file_names_list(train_dir)


val_dir = r"./output/val/"     # current location of validation directory
# List all files in the directory
file_names_val = get_file_names_list(val_dir)


test_dir = r"./output/test/"     # current location of test directory
# List all files in the directory
file_names_test = get_file_names_list(test_dir)



In [62]:
IMAGE_DIMS = (224, 224, 3)
train_data_dir = './output/train/'
validation_data_dir = './output/val/'
batch_size=36


In [63]:
train_datagen = ImageDataGenerator(rescale=1.0/255.0)
validation_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(IMAGE_DIMS[0], IMAGE_DIMS[1]),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)
validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(IMAGE_DIMS[0], IMAGE_DIMS[1]),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

Found 9716 images belonging to 7 classes.
Found 2078 images belonging to 7 classes.


In [64]:
nb_train_samples =9716
nb_validation_samples =2078

In [48]:
# define cnn model
def define_model(h,w):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform',
    padding='same', input_shape=(h,w, 3)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform',
    padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform',
    padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(7, activation='softmax'))
    return model
    

In [49]:
def summarize_diagnostics(history):
# plot loss
    epochs = 8
    plt.style.use("ggplot")
    plt.figure()
    N = epochs
    plt.plot(np.arange(0, N), history.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), history.history["val_loss"], label="val_loss")
    plt.title("Loss")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss")
    plt.legend(loc="upper left")
    # plot accuracy
    plt.style.use("ggplot")
    plt.figure()
    N = epochs
    plt.plot(np.arange(0, N), history.history["accuracy"], label="accuracy")
    plt.plot(np.arange(0, N), history.history["val_accuracy"], label="val_accuracy")
    plt.title("Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Accuracy")
    plt.legend(loc="upper left")
    # save plot to file

In [52]:
# Model 1: default setting from 
# URL: https://www.kaggle.com/code/koheimuramatsu/model-explainability-in-industrial-image-detection

model = define_model(IMAGE_DIMS[0],IMAGE_DIMS[1])
model.compile(loss = 'categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
epochs = 8
batch_size = 36
checkpoint = ModelCheckpoint("./weights.h5",monitor="loss",mode="min",save_best_only = True,verbose=1)
callbacks = [checkpoint]
history = model.fit_generator(train_generator,
    steps_per_epoch = nb_train_samples // batch_size,
    epochs = epochs,
    callbacks = callbacks,
    validation_data = validation_generator,
    validation_steps = nb_validation_samples // batch_size)

Epoch 1/8


  history = model.fit_generator(train_generator,


Epoch 1: loss improved from inf to 4.28223, saving model to ./weights.h5
Epoch 2/8


  saving_api.save_model(


Epoch 2: loss improved from 4.28223 to 1.77636, saving model to ./weights.h5
Epoch 3/8
Epoch 3: loss improved from 1.77636 to 1.67130, saving model to ./weights.h5
Epoch 4/8
Epoch 4: loss improved from 1.67130 to 1.57808, saving model to ./weights.h5
Epoch 5/8
Epoch 5: loss improved from 1.57808 to 1.47839, saving model to ./weights.h5
Epoch 6/8
Epoch 6: loss improved from 1.47839 to 1.37402, saving model to ./weights.h5
Epoch 7/8
Epoch 7: loss improved from 1.37402 to 1.25673, saving model to ./weights.h5
Epoch 8/8
Epoch 8: loss improved from 1.25673 to 1.13476, saving model to ./weights.h5


In [54]:
# Model 1.2: start training LLM
def define_model2(h,w):
    model = Sequential()
    model.add(Conv2D(32, (7, 7), activation='relu', kernel_initializer='he_uniform',
    padding='same', input_shape=(h,w, 3)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform',
    padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform',
    padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(7, activation='softmax'))
    return model

model = define_model2(IMAGE_DIMS[0],IMAGE_DIMS[1])
model.compile(loss = 'categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
epochs = 10
batch_size = 36
checkpoint = ModelCheckpoint("./weights.h5",monitor="loss",mode="min",save_best_only = True,verbose=1)
callbacks = [checkpoint]
history = model.fit_generator(train_generator,
    steps_per_epoch = nb_train_samples // batch_size,
    epochs = epochs,
    callbacks = callbacks,
    validation_data = validation_generator,
    validation_steps = nb_validation_samples // batch_size)

Epoch 1/10


  history = model.fit_generator(train_generator,


Epoch 1: loss improved from inf to 4.69176, saving model to ./weights.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: loss improved from 4.69176 to 1.71997, saving model to ./weights.h5
Epoch 3/10
Epoch 3: loss improved from 1.71997 to 1.61400, saving model to ./weights.h5
Epoch 4/10
Epoch 4: loss improved from 1.61400 to 1.51939, saving model to ./weights.h5
Epoch 5/10
Epoch 5: loss improved from 1.51939 to 1.43745, saving model to ./weights.h5
Epoch 6/10
Epoch 6: loss improved from 1.43745 to 1.34691, saving model to ./weights.h5
Epoch 7/10
Epoch 7: loss improved from 1.34691 to 1.25758, saving model to ./weights.h5
Epoch 8/10
Epoch 8: loss improved from 1.25758 to 1.15233, saving model to ./weights.h5
Epoch 9/10
Epoch 9: loss improved from 1.15233 to 1.06327, saving model to ./weights.h5
Epoch 10/10
Epoch 10: loss improved from 1.06327 to 0.98259, saving model to ./weights.h5


In [84]:
# # Split the image to train and test set 

# body_list = [assembly_data for assembly_data in assemblies.values()]

# train_set, test_set = train_test_split(body_list, test_size=0.2, shuffle=True, random_state=0)

# cur_png_folder = output_dir
# train_set_dir = "/Users/pat/Documents/Development/patbam/PNG_NeuralNetwork/train_set"
# test_set_dir = "/Users/pat/Documents/Development/patbam/PNG_NeuralNetwork/test_set"


# # Train set

# for train_assem in train_set:
#     for train_body in train_assem:
#         png_name = train_body['id'] + '.png'
#         source_path = os.path.join(cur_png_folder, png_name)
#         destination_path = os.path.join(train_set_dir, png_name)
#         shutil.move(source_path, destination_path)

# # Test set

# for test_assem in test_set:
#     for test_body in test_assem:
#         png_name = test_body['id'] + '.png'
#         source_path = os.path.join(cur_png_folder, png_name)
#         destination_path = os.path.join(test_set_dir, png_name)
#         shutil.move(source_path, destination_path)



In [53]:
# # Define the CNN architecture
# model = keras.Sequential([
#     layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(64, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),

#     layers.Conv2D(128, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),
#     layers.Flatten(),
#     layers.Dense(128, activation='relu'),
#     layers.Dense(7, activation='softmax')  # Assuming 10 classes
# ])

# # Compile the model
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# # Load and preprocess PNG images using ImageDataGenerator

# image_size = (224, 224)
# batch_size = 36

# train_datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)


# # ###########
# # # List all files in the folder
# # file_list = os.listdir('/Users/pat/Documents/Development/patbam/PNG_NeuralNetwork/train_set/')


# # # Count the number of files
# # num_files = len(file_list)

# # print("Number of files in the folder:", num_files)

# # #############

# train_generator = train_datagen.flow_from_directory(
#     './train_set/',
#     target_size=image_size,
#     batch_size=batch_size,
#     class_mode='categorical',
#     subset='training'
# )

# validation_generator = train_datagen.flow_from_directory(
#     './PNG_NeuralNetwork/train_set/',
#     target_size=image_size,
#     batch_size=batch_size,
#     class_mode='categorical',
#     subset='validation'
# )


# # Get a batch of images and labels
# batch_images, batch_labels = next(validation_generator)

# # Print the shape of the batch of images
# print("Batch of images shape:", batch_images.shape)


# try:
#     batch_images, batch_labels = next(validation_generator)
#     print("Images found and loaded successfully.")
# except Exception as e:
#     print("An error occurred while loading images:", e)

# # Train the model
# epochs = 10
# history = model.fit(train_generator, epochs=epochs, validation_data=validation_generator)

# # Evaluate the model
# test_datagen = ImageDataGenerator(rescale=1.0/255.0)
# test_generator = test_datagen.flow_from_directory(
#     './PNG_NeuralNetwork/test_set/',
#     target_size=image_size,
#     batch_size=batch_size,
#     class_mode='categorical'
# )
# test_loss, test_accuracy = model.evaluate(test_generator)

# print(f'Test accuracy: {test_accuracy}')



# # ------------ THIS SECTION IS IN FORMAT DEBATE BETWEEN OG/COMPRESS FILESIZE IN IMAGE PREPARATION ---------------
            #  ------- Only copy with OG size

            # # Get the source path
            # cur_json = str(input_json)
            # cur_folder = cur_json.replace("assembly.json", "")
            # source_path = os.path.join(cur_folder, png_name)

            # destination_path = os.path.join(output_dir, png_name)
            
            # shutil.copy(source_path, destination_path) # this is for the copy
# # ----------------------------------------------------------------------------------------------------------------
