In [203]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from PIL import Image


In [204]:
folders = ["damage", "no_damage"]

data = []
for folder in folders:
    folder_path = folder
    for fname in os.listdir(folder_path):
        if fname.lower().endswith((".jpg", ".jpeg", ".png")):
            data.append({
                "image_path": os.path.join(folder_path, fname),
                "label": folder
            })

data = pd.DataFrame(data)
print(data.head())

                                 image_path   label
0          damage\-93.528502_30.987438.jpeg  damage
1            damage\-93.5302_30.988157.jpeg  damage
2  damage\-93.53950999999999_30.982944.jpeg  damage
3          damage\-93.539521_30.982434.jpeg  damage
4          damage\-93.540151_30.982689.jpeg  damage


In [205]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21322 entries, 0 to 21321
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_path  21322 non-null  object
 1   label       21322 non-null  object
dtypes: object(2)
memory usage: 333.3+ KB


In [206]:
data.duplicated().sum()

np.int64(0)

In [207]:
data.size

42644

In [208]:
img = Image.open("damage/-93.795_30.03779.jpeg")
img.size


(128, 128)

In [209]:
data["label"]

0           damage
1           damage
2           damage
3           damage
4           damage
           ...    
21317    no_damage
21318    no_damage
21319    no_damage
21320    no_damage
21321    no_damage
Name: label, Length: 21322, dtype: object

In [210]:
data['label'] = data['label'].astype("string")
data = pd.get_dummies(data, columns=["label"], drop_first=True)

In [211]:
data.columns

Index(['image_path', 'label_no_damage'], dtype='object')

In [212]:
data.head

<bound method NDFrame.head of                                          image_path  label_no_damage
0                  damage\-93.528502_30.987438.jpeg            False
1                    damage\-93.5302_30.988157.jpeg            False
2          damage\-93.53950999999999_30.982944.jpeg            False
3                  damage\-93.539521_30.982434.jpeg            False
4                  damage\-93.540151_30.982689.jpeg            False
...                                             ...              ...
21317  no_damage\-97.001436_28.876759999999997.jpeg             True
21318   no_damage\-97.00144_28.622428999999997.jpeg             True
21319  no_damage\-97.001677_28.864984000000003.jpeg             True
21320  no_damage\-97.001948_28.633053999999998.jpeg             True
21321  no_damage\-97.001979_28.623572999999997.jpeg             True

[21322 rows x 2 columns]>

In [213]:
data['label_no_damage'] = data['label_no_damage'].apply(lambda x: 1 if x == False else 0)
data.info()
data.isna().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21322 entries, 0 to 21321
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   image_path       21322 non-null  object
 1   label_no_damage  21322 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 333.3+ KB


image_path         0
label_no_damage    0
dtype: int64

In [214]:
data.head

<bound method NDFrame.head of                                          image_path  label_no_damage
0                  damage\-93.528502_30.987438.jpeg                1
1                    damage\-93.5302_30.988157.jpeg                1
2          damage\-93.53950999999999_30.982944.jpeg                1
3                  damage\-93.539521_30.982434.jpeg                1
4                  damage\-93.540151_30.982689.jpeg                1
...                                             ...              ...
21317  no_damage\-97.001436_28.876759999999997.jpeg                0
21318   no_damage\-97.00144_28.622428999999997.jpeg                0
21319  no_damage\-97.001677_28.864984000000003.jpeg                0
21320  no_damage\-97.001948_28.633053999999998.jpeg                0
21321  no_damage\-97.001979_28.623572999999997.jpeg                0

[21322 rows x 2 columns]>

Damage is 1, no damage is 0

In [215]:
from sklearn.model_selection import train_test_split, GridSearchCV

X = data.drop(["label_no_damage"], axis = 1)
Y = data["label_no_damage"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=1)

In [216]:
data['image_path'] = data['image_path'].astype("string")
data['image_path']

0                    damage\-93.528502_30.987438.jpeg
1                      damage\-93.5302_30.988157.jpeg
2            damage\-93.53950999999999_30.982944.jpeg
3                    damage\-93.539521_30.982434.jpeg
4                    damage\-93.540151_30.982689.jpeg
                             ...                     
21317    no_damage\-97.001436_28.876759999999997.jpeg
21318     no_damage\-97.00144_28.622428999999997.jpeg
21319    no_damage\-97.001677_28.864984000000003.jpeg
21320    no_damage\-97.001948_28.633053999999998.jpeg
21321    no_damage\-97.001979_28.623572999999997.jpeg
Name: image_path, Length: 21322, dtype: string

In [None]:
path_ds = tf.data.Dataset.from_tensor_slices(data['image_path']) # instead of image_path xtrain xtest ytrain ytest
# Step 2: Define a function to load and preprocess images
def load_and_preprocess_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [128, 128])
    img = tf.cast(img, tf.float32) / 255.0
    return img, path  # keep path for filename
# Step 3: Apply preprocessing to all paths
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
# Step 4: Convert the TensorFlow dataset to numpy arrays
images, paths = zip(*[(img.numpy(), p.numpy().decode("utf-8")) for img, p in image_ds])
# Step 5: Flatten image tensors and build a pandas DataFrame
image_arrays = [img.reshape(-1) for img in images]
df = pd.DataFrame(image_arrays)
df["filename"] = [os.path.basename(p) for p in paths]
print(df.shape)
df.head()

In [None]:
#ANN
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

image_size=128*128

# create model
model = Sequential()
# input layer
model.add(Dense(49152, activation='relu',input_shape=(image_size,)))

# Hidden layers
model.add(Dense(6767, activation='relu'))
model.add(Dense(676, activation='relu'))

# Softmax activation function is selected for multiclass classification
model.add(Dense(2, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, Y_train, validation_split=0.2, epochs=5, batch_size=128, verbose=2)