## train cnn model

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import numpy as np
from sklearn.metrics import confusion_matrix

folder_dir = '/Users/mengjiayu/Downloads/clothes 2'

files = os.listdir(folder_dir)
files.remove('.DS_Store')
files = list(files)

print(files)

img_height = 28
img_width = 28
batch_size = 6

ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/mengjiayu/Downloads/clothes 2',
    labels='inferred',
    label_mode='categorical',
    class_names=files,
    batch_size=batch_size,
    image_size=(img_height, img_width),
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset='training'
)

ds_val = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/mengjiayu/Downloads/clothes 2',
    labels='inferred',
    label_mode='categorical',
    class_names=files,
    batch_size=batch_size,
    image_size=(img_height, img_width),
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset='validation'
)

data_augmentation = tf.keras.Sequential([
    layers.experimental.preprocessing.RandomFlip("horizontal"),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
])

ds_train = ds_train.map(lambda x, y: (data_augmentation(x), y))

model = keras.Sequential([
    layers.Input((img_height, img_width, 3)),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.Dropout(0.2),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(files), activation='softmax')  # Adjust to match number of classes
])

model.compile(optimizer=keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(ds_train, epochs=15, validation_data=ds_val)

val_results = model.evaluate(ds_val)
print("Validation Loss:", val_results[0])
print("Validation Accuracy:", val_results[1])

true_labels = []
predicted_classes = []

for images, labels in ds_val:
    true_labels.extend(np.argmax(labels.numpy(), axis=1))
    predicted_classes.extend(np.argmax(model.predict(images), axis=1))

cm = confusion_matrix(true_labels, predicted_classes)
print("Confusion Matrix:")
print(cm)


['短褲', '羽絨', '外套', '無洋', '帽踢', '長袖', '背心', '長洋', '短袖', '高領', '長裙', '長褲']
Found 6177 files belonging to 12 classes.
Using 5560 files for training.
Found 6177 files belonging to 12 classes.
Using 617 files for validation.
Epoch 1/15


  output, from_logits = _get_logits(


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Validation Loss: 0.9708488583564758
Validation Accuracy: 0.6871961355209351
Confusion Matrix:
[[59  0  3  0  0  1  6  0  4  0  2  0]
 [ 2 10 17  0  8  1  0  5  0  3  0  0]
 [ 0  4 30  0  7  4  2  3  0  8  1  0]
 [ 0  0  0 46  0  0  0  0  0  0  0  0]
 [ 0  4  3  0 27  0  0  0  0  2  0  0]
 [ 0  0 19  0 13 23  0  1  0 15  1  0]
 [ 2  1  0  4  0  0 41  0  0  0  1  0]
 [ 0  1  1  2  0  0  1 41  2  1  2  0]
 [ 0  0  2  0  0  0  1  2 53  0  0  0]
 [ 0  1  1  1  1  1  0  0  0 28  1  0]
 [ 0  1  1  4  1  0  1  4  0  0 25  2]
 [ 0  0  0  5  0  0  0  1  0  0  5 41]]


In [7]:
#model.save('/Users/mengjiayu/Desktop/newmodel') 

INFO:tensorflow:Assets written to: /Users/mengjiayu/Desktop/newmodel/assets


INFO:tensorflow:Assets written to: /Users/mengjiayu/Desktop/newmodel/assets


In [9]:
from keras.models import load_model
model= load_model('/Users/mengjiayu/Desktop/newmodel')

## evaluate (confusion metrix, accuracy)  (f1, precision, recall)

In [10]:
# Evaluate validation set
val_results = model.evaluate(ds_val)
print("Validation Loss:", val_results[0])
print("Validation Accuracy:", val_results[1])

# Evaluate training set
train_results = model.evaluate(ds_train)
print("Training Loss:", train_results[0])
print("Training Accuracy:", train_results[1])

true_labels_val = []
predicted_classes_val = []

true_labels_train = []
predicted_classes_train = []

# Predictions and true labels for validation set
for images, labels in ds_val:
    true_labels_val.extend(np.argmax(labels.numpy(), axis=1))
    predicted_classes_val.extend(np.argmax(model.predict(images), axis=1))

# Predictions and true labels for training set
for images, labels in ds_train:
    true_labels_train.extend(np.argmax(labels.numpy(), axis=1))
    predicted_classes_train.extend(np.argmax(model.predict(images), axis=1))

cm_val = confusion_matrix(true_labels_val, predicted_classes_val)
cm_train = confusion_matrix(true_labels_train, predicted_classes_train)

print("Confusion Matrix (Validation):")
print(cm_val)

print("Confusion Matrix (Training):")
print(cm_train)


  output, from_logits = _get_logits(


Validation Loss: 0.970848560333252
Validation Accuracy: 0.6871961355209351
Training Loss: 0.7362139225006104
Training Accuracy: 0.7320144176483154
Confusion Matrix (Validation):
[[59  0  3  0  0  1  6  0  4  0  2  0]
 [ 2 10 17  0  8  1  0  5  0  3  0  0]
 [ 0  4 30  0  7  4  2  3  0  8  1  0]
 [ 0  0  0 46  0  0  0  0  0  0  0  0]
 [ 0  4  3  0 27  0  0  0  0  2  0  0]
 [ 0  0 19  0 13 23  0  1  0 15  1  0]
 [ 2  1  0  4  0  0 41  0  0  0  1  0]
 [ 0  1  1  2  0  0  1 41  2  1  2  0]
 [ 0  0  2  0  0  0  1  2 53  0  0  0]
 [ 0  1  1  1  1  1  0  0  0 28  1  0]
 [ 0  1  1  4  1  0  1  4  0  0 25  2]
 [ 0  0  0  5  0  0  0  1  0  0  5 41]]
Confusion Matrix (Training):
[[539   3   6   0   2  11  24   4   2   1  16   0]
 [  9 183  78   0  49  10   1  14   8  42   9   0]
 [  8  57 265   0  61  59   5   4   5  89   9   0]
 [  0   0   1 384   1   0   3   2   0   1  18   2]
 [  2  46  44   0 242  25   3   0   1  16   8   0]
 [ 12  22 145   0  48 323   4   7   2  85   6   0]
 [ 12   3   4   5 

In [2]:
import tensorflow as tf
from tensorflow.keras import layers
import os
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score

# Remaining code remains the same until this point

# Model training and validation
#history = model.fit(ds_train, epochs=15, validation_data=ds_val)

# Model evaluation on validation set
val_results = model.evaluate(ds_val)
print("Validation Loss:", val_results[0])
print("Validation Accuracy:", val_results[1])

true_labels = []
predicted_classes = []

for images, labels in ds_val:
    true_labels.extend(np.argmax(labels.numpy(), axis=1))
    predicted_classes.extend(np.argmax(model.predict(images), axis=1))

# Confusion Matrix
cm = confusion_matrix(true_labels, predicted_classes)
print("Confusion Matrix:")
print(cm)

# Additional Metrics
accuracy = accuracy_score(true_labels, predicted_classes)
recall = recall_score(true_labels, predicted_classes, average='weighted')  # Calculate weighted average recall
f1 = f1_score(true_labels, predicted_classes, average='weighted')  # Calculate weighted average F1-score

print("Accuracy:", accuracy)
print("Recall:", recall)
print("F1-score:", f1)


Validation Loss: 0.970848560333252
Validation Accuracy: 0.6871961355209351
Confusion Matrix:
[[59  0  3  0  0  1  6  0  4  0  2  0]
 [ 2 10 17  0  8  1  0  5  0  3  0  0]
 [ 0  4 30  0  7  4  2  3  0  8  1  0]
 [ 0  0  0 46  0  0  0  0  0  0  0  0]
 [ 0  4  3  0 27  0  0  0  0  2  0  0]
 [ 0  0 19  0 13 23  0  1  0 15  1  0]
 [ 2  1  0  4  0  0 41  0  0  0  1  0]
 [ 0  1  1  2  0  0  1 41  2  1  2  0]
 [ 0  0  2  0  0  0  1  2 53  0  0  0]
 [ 0  1  1  1  1  1  0  0  0 28  1  0]
 [ 0  1  1  4  1  0  1  4  0  0 25  2]
 [ 0  0  0  5  0  0  0  1  0  0  5 41]]
Accuracy: 0.6871961102106969
Recall: 0.6871961102106969
F1-score: 0.6781711982172964


In [11]:
true_labels = []
predicted_classes = []

# Collecting true labels and predicted classes
for images, labels in ds_val:
    true_labels.extend(np.argmax(labels.numpy(), axis=1))
    predicted_classes.extend(np.argmax(model.predict(images), axis=1))

# Matching true labels and predicted classes to count correct predictions
correct_predictions = sum([1 for true, pred in zip(true_labels, predicted_classes) if true == pred])
total_samples = len(true_labels)

# Printing the number of correct predictions and total samples
print("Total Correct Predictions:", correct_predictions)
print("Total Samples in Validation Set:", total_samples)
print("Accuracy on Validation Set:", correct_predictions / total_samples)


Total Correct Predictions: 428
Total Samples in Validation Set: 617
Accuracy on Validation Set: 0.6936790923824959


## test new images

In [None]:
from keras.models import load_model
model= load_model('/Users/mengjiayu/Desktop/newmodel')

In [44]:
import pandas as pd

In [34]:
import os
from os import listdir
imgl=[]

folder_dir = "/Users/mengjiayu/Downloads/clothes_rbg"
for images in os.listdir(folder_dir):
    imgl.append(images)
 
    if (images.endswith(".png")):
        print(images)

牛仔外套.png
灰色高領帶扣毛衣.png
Maison Kitsune Shirt.png
藍色圓領毛衣.png
白色薄長裙.png
粉紅色長袖襯衫.png
紅條紋長袖.png
深藍牛仔寬褲.png
長褲.png
灰色高領毛衣.png
墨綠色羽絨.png
灰色挖洞長袖洋裝.png
短褲.png
深藍小狐狸衛衣.png
白色連帽羽絨.png
長洋裝.png
小狐狸棒球外套.png
深藍nyc大學踢.png
8.png
Ami Cardigan.png
紅色高領長袖.png
長袖.png
白飛行外套.png
9.png
米色小狐狸衛衣.png
卡其風衣外套.png
14.png
灰色連帽衛衣.png
無袖洋裝.png
小狐狸卡其針織衫.png
黑色皮夾克.png
白色連毛長版羽絨.png
15.png
條文高領衛衣.png
白粗條針織長袖.png
灰色飛行外套.png
白色牛仔長褲.png
17.png
黑色泡泡長袖.png
Maison Kitsune High Waisted Jeans.png
16.png
灰色大衣外套.png
棕色長袖.png
米白色霧面羽絨.png
12.png
灰色紋路長袖.png
刺繡拼接長袖洋裝.png
黑色zara工裝褲.png
灰色女友長褲.png
13.png
牛仔長裙.png
黑色亮面羽絨.png
銀色長裙.png
長袖灰色jpg.png
條紋長袖.png
小狐狸灰色針織長袖毛衣.png
牛角釦外套.png
白色高領上衣.png
藍色寬衛褲.png
米色長袖洋裝.png
灰色水洗長袖.png
長牛仔褲.png
墨綠色zara工裝褲.png
灰色圓領長袖.png
黑色霧面羽絨.png
羽絨.png
針織連帽外套kirsh.png
高領.png
灰色長袖.png
卡其zara工裝褲.png
卡其長袖襯衫.png
黑掃高領長袖.png
灰常百褶裙.png
Maison Kitsune Navy Hoodie.png
帽踢.png
灰色水洗長褲.png
黑色牛仔長褲.png
米白薄長裙.png
短板卡其外套.png
外套.png
Zara 灰色短裙.png
長袖上衣訓練.png
4.png
黑色長洋裝.png
灰米長裙.png
5.png
長裙.png
白色長袖洋裝.png
黑色連帽外套.png
6.png
2.png
黑色小狐狸帽踢

In [35]:
files=['短褲', '羽絨', '外套', '無洋', '帽踢', '長袖', '背心', '長洋', '短袖', '高領', '長裙', '長褲']

In [36]:
precl=[]
prel=[]
for images in os.listdir(folder_dir):
    #read user photo from database ##please change following path
    img = tf.keras.preprocessing.image.load_img("/Users/mengjiayu/Downloads/clothes_rbg/"+images, target_size=(img_height, img_width))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    predictions = model.predict(tf.expand_dims(img_array, 0)) #use mod as model name if use pickle
    
    iname=np.argmax(predictions)
    cname=files[iname] 
    
    prel.append(iname)#class label(random) ##we actualy don't this need label
    precl.append(cname)#predicted class name ##insert result into database
    
    print(images,predictions, iname, cname)#see result

牛仔外套.png [[4.5104302e-05 2.2482683e-01 6.5431398e-01 1.2801554e-09 3.6180295e-02
  5.6522917e-02 3.8739654e-05 5.9875794e-05 1.1914128e-06 2.8010340e-02
  6.8037684e-07 1.2882975e-09]] 2 外套
灰色高領帶扣毛衣.png [[1.4481402e-04 1.3257904e-01 5.4063380e-01 3.4128993e-07 1.8581718e-01
  9.5220305e-02 4.2604977e-05 7.4150151e-04 9.5111982e-06 4.4764921e-02
  4.5831384e-05 1.0813467e-07]] 2 外套
Maison Kitsune Shirt.png [[1.13661774e-10 4.61597228e-04 4.84985895e-02 3.69030317e-15
  3.28693330e-01 6.17799401e-01 1.59971307e-08 1.00633770e-04
  6.73794076e-10 4.44646319e-03 3.55641494e-09 8.35151531e-15]] 5 長袖
藍色圓領毛衣.png [[9.2902534e-05 7.3298286e-03 4.0076777e-01 5.9722831e-09 1.4784472e-01
  4.3070120e-01 2.2428931e-06 5.6992206e-03 1.9500274e-06 7.5596245e-03
  5.5197472e-07 1.5120704e-09]] 5 長袖
白色薄長裙.png [[5.6486293e-10 6.3886342e-05 1.0250113e-07 8.1389362e-01 7.6052208e-07
  9.8777209e-09 7.6531144e-03 1.0529256e-01 1.9133877e-08 3.5903988e-06
  6.9991559e-02 3.1007188e-03]] 3 無洋
粉紅色長袖襯衫.png [[1

In [37]:
result=pd.DataFrame({'file name':imgl, 'predicted label': prel
                    ,'predicted class': precl})#predicted table ##plz ignore label
result

Unnamed: 0,file name,predicted label,predicted class
0,牛仔外套.png,2,外套
1,灰色高領帶扣毛衣.png,2,外套
2,Maison Kitsune Shirt.png,5,長袖
3,藍色圓領毛衣.png,5,長袖
4,白色薄長裙.png,3,無洋
...,...,...,...
96,1.png,8,短袖
97,黑色西裝寬褲.png,11,長褲
98,kirsh羽絨.png,2,外套
99,0.png,2,外套


In [38]:
result.to_csv('without bg.csv')

## check damaged images 

In [17]:
import os
import shutil
import warnings
import cv2
import io
from PIL import Image
warnings.filterwarnings("error", category=UserWarning)
base_dir = "/Users/mengjiayu/Downloads/b db"#删除图片的根目录
i = 0
def is_read_successfully(file):
    try:
        imgFile = Image.open(file)#这个就是一个简单的打开成功与否
        return True
    except Exception:
        return False
for parent, dirs, files in os.walk(base_dir):#(root,dirs,files)
    for file in files:
        if not is_read_successfully(os.path.join(parent, file)):
            print(os.path.join(parent, file))
            #os.remove(os.path.join(parent, file)) #真正使用时，这一行要放开，自己一般习惯先跑一遍，没有错误了再删除，防止删错。
            i = i + 1
print(i)

/Users/mengjiayu/Downloads/b db/.DS_Store
1
