In [2]:
# Import necessary libraries
import pandas as pd
from pathlib import Path
import os
from datasets import Dataset, Image, ClassLabel
import random
import shutil
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, Activation, add, GlobalAveragePooling2D, Dense

# Initialize empty lists to store file names and labels
file_names = []
labels = []

# Specify the directory using os.path.join to handle paths across different OS
directory = 'C:\\Users\\Noura\\Downloads\\Untitled Folder 1\\dataset'

# Iterate through all image files in the specified directory
for file in tqdm(sorted(Path(directory).glob('*/*.*'))):
    label = os.path.basename(os.path.dirname(file))  # Extract the label from the file path
    labels.append(label)  # Add the label to the list
    file_names.append(str(file))  # Add the file path to the list

# Print the total number of file names and labels
print(len(file_names), len(labels))

# Create a pandas dataframe from the collected file names and labels
df = pd.DataFrame.from_dict({"image": file_names, "label": labels})
print(df.shape)



ModuleNotFoundError: No module named 'datasets'

In [40]:
df.head()


Unnamed: 0,image,label
0,C:\Users\Noura\Downloads\Untitled Folder 1\dat...,dew
1,C:\Users\Noura\Downloads\Untitled Folder 1\dat...,dew
2,C:\Users\Noura\Downloads\Untitled Folder 1\dat...,dew
3,C:\Users\Noura\Downloads\Untitled Folder 1\dat...,dew
4,C:\Users\Noura\Downloads\Untitled Folder 1\dat...,dew


In [41]:
df['label'].unique()

array(['dew', 'fogsmog', 'glaze', 'rime', 'sandstorm'], dtype=object)

In [42]:

dataset = Dataset.from_pandas(df).cast_column("image", Image())


In [43]:
# Define the desired order of labels
labels_list = ['rime', 'fogsmog', 'dew', 'sandstorm', 'glaze']

# Initialize empty dictionaries to map labels to IDs and vice versa
label2id, id2label = dict(), dict()

# Assign IDs based on the desired order
for i, label in enumerate(labels_list):
    label2id[label] = i  # Map the label to its corresponding ID
    id2label[i] = label  # Map the ID to its corresponding label

# Print the resulting dictionaries for reference
print("Mapping of IDs to Labels:", id2label, '\n')
print("Mapping of Labels to IDs:", label2id)



Mapping of IDs to Labels: {0: 'rime', 1: 'fogsmog', 2: 'dew', 3: 'sandstorm', 4: 'glaze'} 

Mapping of Labels to IDs: {'rime': 0, 'fogsmog': 1, 'dew': 2, 'sandstorm': 3, 'glaze': 4}


In [44]:
labels_list

['rime', 'fogsmog', 'dew', 'sandstorm', 'glaze']

In [45]:
root_directory = r'C:\\Users\\Noura\\Downloads\\Untitled Folder 1\\dataset\\'
train_directory = os.path.join(root_directory, 'train')
test_directory = os.path.join(root_directory, 'test')

os.makedirs(train_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

for t in labels_list:
    origin = os.path.join(root_directory, t)
    train_class_directory = os.path.join(train_directory, t)
    test_class_directory = os.path.join(test_directory, t)
    
    os.makedirs(train_class_directory, exist_ok=True)
    os.makedirs(test_class_directory, exist_ok=True)

    for root, dirs, files in os.walk(origin):
        random.shuffle(files)
        train_list = files[:int((0.75) * len(files))]
        test_list = files[int((0.75) * len(files)):]

        for f in train_list:
            shutil.copy(os.path.join(origin, f), os.path.join(train_class_directory, f))

        for f in test_list:
            shutil.copy(os.path.join(origin, f), os.path.join(test_class_directory, f))


In [46]:
# Checking the total number of files

print("TRAIN DATASET")
print("_____________")
sum = 0
for root, dirs, files in os.walk('C:\\Users\\Noura\\Downloads\\Untitled Folder 1\\dataset\\train\\'):
    if files != []:
        print(root, "-->", len(files))
        sum += len(files)

print("_____________")
print("TOTAL = ", sum)
print()
print()

print("TEST DATASET")
print("_____________")
sum = 0
for root, dirs, files in os.walk('C:\\Users\\Noura\\Downloads\\Untitled Folder 1\\dataset\\test\\'):
    if files != []:
        print(root, "-->", len(files))
        sum += len(files)

print("_____________")
print("TOTAL = ", sum)
print()

TRAIN DATASET
_____________
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\train\dew --> 523
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\train\fogsmog --> 638
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\train\glaze --> 479
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\train\rime --> 870
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\train\sandstorm --> 519
_____________
TOTAL =  3029


TEST DATASET
_____________
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\test\dew --> 175
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\test\fogsmog --> 213
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\test\glaze --> 160
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\test\rime --> 290
C:\Users\Noura\Downloads\Untitled Folder 1\dataset\test\sandstorm --> 173
_____________
TOTAL =  1011



In [47]:
# Input layer
input_tensor = Input(shape=(224, 224, 3))  # Adjust input shape as needed

# Initial Conv and MaxPooling
x = Conv2D(32, (7, 7), strides=(2, 2), padding='same')(input_tensor)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

# Residual blocks
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x

    x = Conv2D(filters[0], (1, 1), strides=(stride, stride), padding='valid')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters[1], (kernel_size, kernel_size), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters[2], (1, 1), padding='valid')(x)
    x = BatchNormalization()(x)

    shortcut = Conv2D(filters[2], (1, 1), strides=(stride, stride), padding='valid')(shortcut)
    shortcut = BatchNormalization()(shortcut)

    x = add([x, shortcut])
    x = Activation('relu')(x)

    return x

# Conv2_x
x = residual_block(x, [32, 32, 64])
x = residual_block(x, [32, 32, 64])
x = residual_block(x, [32, 32, 64])

# Conv3_x
x = residual_block(x, [64, 64, 128])
x = residual_block(x, [64, 64, 128])
x = residual_block(x, [64, 64, 128])

# Conv4_x
x = residual_block(x, [128, 128, 256])
x = residual_block(x, [128, 128, 256])
x = residual_block(x, [128, 128, 256])

# Conv5_x
x = residual_block(x, [256, 256, 512])
x = residual_block(x, [256, 256, 512])
x = residual_block(x, [256, 256, 512])

# Global Average Pooling
x = GlobalAveragePooling2D()(x)

# Fully Connected layer
x = Dense(512, activation='relu')(x)

# Softmax layer
output_tensor = Dense(4, activation='softmax')(x)  # Assuming 4 classes

# Instantiate the model
model = Model(inputs=input_tensor, outputs=output_tensor)

# Print model summary
model.summary()


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d_98 (Conv2D)          (None, 112, 112, 32)         4736      ['input_3[0][0]']             
                                                                                                  
 batch_normalization_98 (Ba  (None, 112, 112, 32)         128       ['conv2d_98[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 activation_74 (Activation)  (None, 112, 112, 32)         0         ['batch_normalization_98

In [48]:
from sklearn.preprocessing import LabelEncoder

# Root directory where your dataset is stored
root_directory = 'C:\\Users\\Noura\\Downloads\\Untitled Folder 1\\dataset\\'

# Mapping of labels to specific integers
specific_mapping = {'rime': 0, 'fogsmog': 1, 'dew': 2, 'sandstorm': 3, 'glaze': 4}

# Initialize empty lists to store file paths and labels for train and test sets
train_file_paths = []
train_labels = []
test_file_paths = []
test_labels = []

# Function to map original labels to specific integers based on the mapping
def map_labels(label):
    return specific_mapping[label]

# Function to gather file paths and labels for a specific directory (train or test)
def gather_files_and_labels(directory, file_paths, labels):
    for class_name in os.listdir(directory):
        class_dir = os.path.join(directory, class_name)
        if os.path.isdir(class_dir):
            for file_name in os.listdir(class_dir):
                file_path = os.path.join(class_dir, file_name)
                file_paths.append(file_path)
                labels.append(class_name)  # Store the corresponding label

# Gathering files and labels for training and testing data separately
train_directory = os.path.join(root_directory, 'train')
test_directory = os.path.join(root_directory, 'test')
gather_files_and_labels(train_directory, train_file_paths, train_labels)
gather_files_and_labels(test_directory, test_file_paths, test_labels)

# Map the original labels to the specific integers based on the mapping
encoded_train_labels = list(map(map_labels, train_labels))
encoded_test_labels = list(map(map_labels, test_labels))

# Create pandas DataFrames with image paths, original labels, and encoded labels for train and test sets
train_df = pd.DataFrame({'image': train_file_paths, 'label': train_labels, 'encoded_label': encoded_train_labels})
test_df = pd.DataFrame({'image': test_file_paths, 'label': test_labels, 'encoded_label': encoded_test_labels})

# Show the DataFrames containing file paths, original labels, and encoded labels for train and test sets
print("Train Data:")
print(train_df)
print("\nTest Data:")
print(test_df)


Train Data:
                                                  image      label  \
0     C:\Users\Noura\Downloads\Untitled Folder 1\dat...        dew   
1     C:\Users\Noura\Downloads\Untitled Folder 1\dat...        dew   
2     C:\Users\Noura\Downloads\Untitled Folder 1\dat...        dew   
3     C:\Users\Noura\Downloads\Untitled Folder 1\dat...        dew   
4     C:\Users\Noura\Downloads\Untitled Folder 1\dat...        dew   
...                                                 ...        ...   
3024  C:\Users\Noura\Downloads\Untitled Folder 1\dat...  sandstorm   
3025  C:\Users\Noura\Downloads\Untitled Folder 1\dat...  sandstorm   
3026  C:\Users\Noura\Downloads\Untitled Folder 1\dat...  sandstorm   
3027  C:\Users\Noura\Downloads\Untitled Folder 1\dat...  sandstorm   
3028  C:\Users\Noura\Downloads\Untitled Folder 1\dat...  sandstorm   

      encoded_label  
0                 2  
1                 2  
2                 2  
3                 2  
4                 2  
...            

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.metrics import categorical_accuracy

# Assuming you have training and validation data
# Replace X_train, y_train with your actual data
X_train = ...
y_train = ...

# Compile the model
model.compile(optimizer=Adam(lr=0.001),
              loss=categorical_crossentropy,
              metrics=[categorical_accuracy])

# Fit the model
history = model.fit(X_train, y_train, 
                    epochs=10,       # Adjust the number of epochs as needed
                    batch_size=32,   # Adjust the batch size as needed
                    validation_data=(X_val, y_val))

# Evaluate the model on test data if available
# Replace X_test and y_test with your actual test data
X_test = ...
y_test = ...

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')