## 1. Set Kaggle API information and dataset local directories

In [None]:
# Set Kaggle API - Username / Key
import os
os.environ['KAGGLE_USERNAME'] = "franconicolsmerenda"
os.environ['KAGGLE_KEY'] = "edcd91fb1521ad3ee181a8b093795eab"
os.environ['DATASET_FOLDER'] = "/home/millenium-falcon/SoftwareProjects/ai-art-casa/.datasets"
os.environ['ART_STYLE'] = "art_nouveau"

# DATASET Folders
os.environ['TRAIN_DATASET_ART_STYLE'] = f"{os.environ['DATASET_FOLDER']}/Real_AI_SD_LD_Dataset/train"
os.environ['TEST_DATASET_ART_STYLE'] = f"{os.environ['DATASET_FOLDER']}/Real_AI_SD_LD_Dataset/test"

# Data to be consumed by the model!
os.environ['TOP_DIR']=f"{os.environ['DATASET_FOLDER']}/{os.environ['ART_STYLE']}"
os.environ['TRAIN_ART_STYLE_DATA'] = f"{os.environ['DATASET_FOLDER']}/{os.environ['ART_STYLE']}/train"
os.environ['VALID_ART_STYLE_DATA'] = f"{os.environ['DATASET_FOLDER']}/{os.environ['ART_STYLE']}/valid"

## 2. Download dataset if it is not stored locally

In [None]:
# Download Datasets if Needed
import os

# Define the folder path
folder_path = f"{os.environ['DATASET_FOLDER']}"

# Check if the folder exists
if not os.path.exists(f"{folder_path}/Real_AI_SD_LD_Dataset"):
    # Install kaggle package
    !pip install -q kaggle
    # Download the dataset from Kaggle
    !kaggle datasets download -d ravidussilva/real-ai-art -p $folder_path --unzip
else:
    print("Folder already exists.")

## 3. Copy and divide images into directories separating them by the requested Art Style

In [None]:
# Set Art Style Dataset

# 1. Make clean data
!rm -rf $TRAIN_ART_STYLE_DATA
!rm -rf $VALID_ART_STYLE_DATA

# 2. Create folders of the current art style training/validation data
!mkdir -p $TRAIN_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!mkdir -p $TRAIN_ART_STYLE_DATA/$ART_STYLE

!mkdir -p $VALID_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!mkdir -p $VALID_ART_STYLE_DATA/$ART_STYLE

# 3. Load with data from DATASET
!cp -r $TRAIN_DATASET_ART_STYLE/AI_LD_$ART_STYLE/*.jpg $TRAIN_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!cp -r $TRAIN_DATASET_ART_STYLE/AI_SD_$ART_STYLE/*.jpg $TRAIN_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!cp -r $TRAIN_DATASET_ART_STYLE/$ART_STYLE/*.jpg $TRAIN_ART_STYLE_DATA/$ART_STYLE

!cp -r $TEST_DATASET_ART_STYLE/AI_LD_$ART_STYLE/*.jpg $VALID_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!cp -r $TEST_DATASET_ART_STYLE/AI_SD_$ART_STYLE/*.jpg $VALID_ART_STYLE_DATA/AI_GENERATED_$ART_STYLE
!cp -r $TEST_DATASET_ART_STYLE/$ART_STYLE/*.jpg $VALID_ART_STYLE_DATA/$ART_STYLE

## 4. Import the needed dependencies for the model and data visualization

In [None]:
#Import Dependencies
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model, Sequential
from keras.layers import Rescaling, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Activation, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import seaborn as sns
from tensorflow.keras import regularizers

## 5. Set dataset parameters and details

In [None]:
# Define paths to training and validation data
train_data_dir = os.environ['TRAIN_ART_STYLE_DATA']
valid_data_dir = os.environ['VALID_ART_STYLE_DATA']

# Define constants
IMG_WIDTH, IMG_HEIGHT = 32,32
BATCH_SIZE = 64
NUM_EPOCHS = 30

## 6. Define the directory path for training dataset

In [None]:
top_dir = os.environ['TOP_DIR']

# Define the training paths
train_dir = os.path.join(top_dir, 'train')

# List all directories in the train directory
all_directories = os.listdir(train_dir)

# Initialize lists to store directories for human-drawn and AI-generated images
train_human = []
train_ai = []

# Loop through all directories
for directory in all_directories:
    # Check if the directory represents human-drawn images
    if not directory.startswith('AI_'):
        train_human.append(os.path.join(train_dir, directory))
    # Check if the directory represents AI-generated images
    else:
        train_ai.append(os.path.join(train_dir, directory))

# Print the lists of directories
print("Train directories containing human-drawn images:")
for i, directory in enumerate(train_human):
    print(f"{i}. {directory}")

print("\nTrain directories containing AI-generated images:")
for i, directory in enumerate(train_ai):
    print(f"{i}. {directory}")

## 7. Define the directory path for validation dataset

In [None]:
# Define the test paths
test_dir = os.path.join(top_dir, 'valid')

# List all directories in the test directory
all_directories = os.listdir(test_dir)

# Initialize lists to store directories for human-drawn and AI-generated images
test_human = []
test_ai = []

# Loop through all directories
for directory in all_directories:
    # Check if the directory represents human-drawn images
    if not directory.startswith('AI_'):
        test_human.append(os.path.join(test_dir, directory))
    # Check if the directory represents AI-generated images
    else:
        test_ai.append(os.path.join(test_dir, directory))

# Print the lists of directories
print("Test directories containing human-drawn images:")
for i, directory in enumerate(test_human):
    print(f"{i}. {directory}")

print("\nTest directories containing AI-generated images:")
for i, directory in enumerate(test_ai):
    print(f"{i}. {directory}")

## 8. Data preprocessing- Labeling training data.

In [None]:
# Initialize lists to store file paths and labels
filepaths = []
labels = []

# Initialize an empty DataFrame for train_data
train_data = pd.DataFrame(columns=['filepath', 'label'])

# Label files under train_human as "human"
for directory in train_human:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("human")

# Label files under train_ai as "AI"
for directory in train_ai:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("AI")

# Create a DataFrame with file paths and labels
data = pd.DataFrame({'filepath': filepaths, 'label': labels})

# Concatenate data with train_data
train_data = pd.concat([train_data, data], ignore_index=True)

## 9. Display the amount of images for training dataset

In [None]:
# Count the number of files under each label
file_counts = train_data['label'].value_counts()

# Print the counts
print("Number of files under each label:")
print(file_counts)

## 10. Data preprocessing- Labeling validation data.

In [None]:
# Initialize lists to store file paths and labels
filepaths = []
labels = []

# Initialize an empty DataFrame for test_data
test_data = pd.DataFrame(columns=['filepath', 'label'])

# Label files under test_human as "human"
for directory in test_human:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("human")

# Label files under test_ai as "AI"
for directory in test_ai:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("AI")

# Create a DataFrame with file paths and labels
data = pd.DataFrame({'filepath': filepaths, 'label': labels})

# Concatenate data with test_data
test_data = pd.concat([test_data, data], ignore_index=True)

## 11. Display the amount of images for validation dataset

In [None]:
# Display the first few rows of the test_data DataFrame
print(test_data.head())

# Count the number of files under each label
file_counts = test_data['label'].value_counts()

# Print the counts
print("\nNumber of files under each label:")
print(file_counts)

## 12. Data preprocessing- Adding data augmentation and creating training dataset. 

In [None]:
training_generator = ImageDataGenerator(rescale=1./255,  # to normalize pixel value
                                        rotation_range=7, # it will apply rotations to the image
                                       horizontal_flip=True # it will flip image horizontally
                                       )
train_dataset = training_generator.flow_from_dataframe(
    dataframe=train_data,
    x_col='filepath',  # Column containing file paths
    y_col='label',     # Column containing labels
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='binary',  
    shuffle=True
)

## 13. Data preprocessing- Adding data augmentation and creating validation dataset. 

In [None]:
val_generator = ImageDataGenerator(rescale=1./255,            # Normalize the image pixel values to the range [0, 1] by scaling by 1/255
    rotation_range=7,         # Randomly rotate images by up to 7 degrees
    horizontal_flip=True)      # Randomly flip images horizontally
val_dataset = val_generator.flow_from_dataframe(dataframe=test_data,
                                                x_col='filepath',  # Column containing file paths
                                                y_col='label',  # Column containing labels
                                                target_size = (IMG_WIDTH, IMG_HEIGHT),
                                                batch_size = 1,  # 1 image at a time to evaluate the NN
                                                class_mode = 'binary',
                                                shuffle = False
                                                )   # to associate the prediction with expected output

## 14. Defining CNN model and show its summary.

In [None]:
#Build the model
model = Sequential()
#Adding Convolutional layer with 512 filters and relu activation function. 
model.add(Conv2D(filters = 512, kernel_size = 3, input_shape = (IMG_WIDTH, IMG_HEIGHT, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
#Adding Max Pooling Layer
model.add(MaxPooling2D(2, 2))
#Adding Convolutional layer with 128 filters and relu activation function. 
model.add(Conv2D(filters = 128, kernel_size = 3, activation='relu'))
#Adding Max Pooling Layer
model.add(MaxPooling2D(2, 2))
#Adding Convolutional layer with 32 filters and relu activation function. 
model.add(Conv2D(filters = 32, kernel_size = 3, activation='relu'))
model.add(MaxPooling2D(2, 2))
#Adding a Flatten layer to reduce the final model's matrix to a 1D matrix
model.add(Flatten())
#Added 3 full connected layers to have a final result. 
model.add(Dense(units =32, activation='relu'))
model.add(Dense(units =16, activation='relu'))
model.add(Dense(units = 1, activation='sigmoid'))
#Show the model's summary. 
model.summary()

## 15. Compile the model

In [None]:
# Compile the model
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

#Add EarlyStopping with a patience of 5 to avoid overfitting. 
es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5,  restore_best_weights=True)

## 16. Train the model :)

In [None]:
#TRAINING MODEL
history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=val_dataset, callbacks=[es])

## 17. Plotting the accuracy and loss functions of the epochs

In [None]:
# Accuracy vs Loss
# Plotting accuracy
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend()

# Plotting loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.legend()

plt.tight_layout()
plt.show()

## 18. Evaluate the trained model

In [None]:
test_loss, test_acc = model.evaluate(val_dataset)
print(f'Test accuracy: {test_acc}')
print(f'Test loss: {test_loss}')

## 19. Make predictions with the trained model

In [None]:
# Get the true labels
true_labels = val_dataset.classes

# Make predictions
predictions = model.predict(val_dataset)
predicted_labels = np.round(predictions).flatten()  # For binary classification

## 20. Generate F1 Score

In [None]:
# Make predictions on the test data
y_pred_prob = predictions
y_pred = np.where(y_pred_prob > 0.5, 1, 0)
# Calculate F1 score
print(classification_report(true_labels, y_pred, target_names=val_dataset.class_indices.keys()))
f1 = f1_score(true_labels, y_pred)
print(f'F1 Score: {f1}')

## 21. Generate the confusion matrix to validate results and plot the information

In [None]:
# Generate the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['AI', 'human'], yticklabels=['AI', 'human'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()