## **Import libraries**

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
import pandas as pd
from skimage.filters import prewitt_h,prewitt_v
from sklearn.model_selection import train_test_split

## **Connect to the colab**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
size_image = 100

## **Functions**

In [None]:
from_source = ['stable', 'dalle', 'dreamstudio', 'midjourney', 'dream', 'bing', 'craiyon', 'none']
from_class = {'mountain': 0, 'sea': 1, 'jungle': 2}

def get_feature_from_path(path):
    # Perform some transformations on the path string
    splited_path = (path.replace("delle", "dalle").replace("dreamai", "dream")
                    .replace("-", "_").replace("sea5", "sea_5").replace("dallebot", "dalle")
                    .replace(".", "").replace("forest", "jungle").replace("see", "sea")
                    .replace("dall", "dalle").replace("dallee", "dalle")
                    .replace("dalleminibot", "dalle").replace("dallemini", "dalle")
                    .replace("dalleminbot", "dalle").replace("junlge", "jungle")
                    .lower().split("_"))

    # Check if the split path has a length different than 5 and print the path if it does
    if len(splited_path) != 5:
        print(splited_path)

    # Extract the source and image_class from the split path
    source = splited_path[2]
    image_class = splited_path[3]

    # Check if the extracted source is valid based on the predefined list and print the source and path if it's not
    if source not in from_source:
        print(source, path)

    # Check if the extracted image_class is valid based on the predefined dictionary and print the image_class and path if it's not
    if image_class not in from_class:
        print(image_class, path)

    # Return the extracted source and the corresponding class index from the dictionary
    return source, from_class[image_class]


In [None]:
def get_average_of_pixels(image):
    # Create a feature matrix of zeros with the shape (size_image, size_image)
    feature_matrix = np.zeros((size_image, size_image))

    # Iterate over each pixel in the image
    for i in range(0, image.shape[0]):
        for j in range(0, image.shape[1]):
            # Calculate the average pixel value by summing the RGB values and dividing by 3
            feature_matrix[i][j] = ((int(image[i, j, 0]) + int(image[i, j, 1]) + int(image[i, j, 2])) / 3)

    # Reshape the feature matrix into a 1D array by flattening it
    return np.reshape(feature_matrix, size_image * size_image)


In [None]:
def get_edge_of_image(image):
    # Apply the Canny edge detection algorithm to the image with threshold values of 50 and 150
    edges = cv2.Canny(image, 50, 150)

    # Reshape the edges matrix into a 1D array by flattening it
    return np.reshape(edges, size_image * size_image)


In [None]:
def get_color_histogram(image, bins=8):
    # Convert the image to the HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Calculate the histogram for the hue channel
    hist_hue = cv2.calcHist([hsv_image], [0], None, [bins], [0, 180])

    # Calculate the histogram for the saturation channel
    hist_saturation = cv2.calcHist([hsv_image], [1], None, [bins], [0, 256])

    # Calculate the histogram for the value channel
    hist_value = cv2.calcHist([hsv_image], [2], None, [bins], [0, 256])

    # Concatenate the hue, saturation, and value histograms into a single feature vector
    hist_features = np.concatenate((hist_hue, hist_saturation, hist_value)).flatten()

    # Normalize the histogram features by dividing by the sum of all elements
    hist_features /= hist_features.sum()

    # Return the normalized histogram features
    return hist_features


In [None]:
def get_features(image):
    # Extract the source and image class index from the filename using get_feature_from_path
    source, image_class_index = get_feature_from_path(filename)

    # Calculate the average pixel value feature using get_average_of_pixels
    aop = get_average_of_pixels(img_arr)

    # Convert the image array to grayscale
    gray_img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2GRAY)

    # Calculate the edge feature using get_edge_of_image on the grayscale image
    eoi = get_edge_of_image(gray_img_arr)

    # Calculate the color histogram feature using get_color_histogram on the original image
    ch = get_color_histogram(img_arr)

    # Create a feature vector by combining the average pixel value feature
    features_of_image = np.array(aop)

    # Append the edge feature to the feature vector along the axis=0
    features_of_image = np.append(features_of_image, eoi, axis=0)

    # Append the color histogram feature to the feature vector along the axis=0
    features_of_image = np.append(features_of_image, ch, axis=0)

    # Append the image class index to the feature vector
    return np.append(features_of_image, np.array([image_class_index]), axis=0)


## **Creating a data frame and storing features for photo data**

### **fake data**

In [None]:
dir_path = '/content/drive/MyDrive/ML/Copy of fake'
features_of_images = list()  # List to store extracted features
labels = list()  # List to store corresponding labels

# Iterate over files in the specified directory
for filename in os.listdir(dir_path):
    # Check if the file has one of the supported image extensions
    if (filename.endswith("jpeg") or filename.endswith(".jpg") or
            filename.endswith(".png") or filename.endswith(".jfif")):
        # Read the image file and resize it to size_image x size_image
        img = cv2.imread(os.path.join(dir_path, filename))
        img = cv2.resize(img, (size_image, size_image))
        img_arr = np.array(img)  # Convert image to a numpy array

        # Extract features from the original image and append them to the features_of_images list
        features_of_images.append(get_features(img))
        labels.append(0)  # Append the corresponding label (0 in this case)

        # Apply transformations (rotations and flips) to augment the dataset
        for i in range(3):
            img_arr = cv2.rotate(img_arr, cv2.ROTATE_90_CLOCKWISE)
            features_of_images.append(get_features(img))
            labels.append(0)

        img_arr = cv2.rotate(img_arr, cv2.ROTATE_90_CLOCKWISE)

        for i in range(2):
            img_arr = cv2.flip(img_arr, i)
            features_of_images.append(get_features(img))
            labels.append(0)
    else:
        # Print the filename if it doesn't have a supported image extension
        print(filename)


desktop.ini


### **real data**

In [None]:
dir_path = '/content/drive/MyDrive/ML/Copy of real'

# Iterate over files in the specified directory
for filename in os.listdir(dir_path):
    # Check if the file has one of the supported image extensions
    if (filename.endswith("jpeg") or filename.endswith(".jpg") or
            filename.endswith(".png") or filename.endswith(".jfif")):
        try:
            # Read the image file and resize it to size_image x size_image
            img = cv2.imread(os.path.join(dir_path, filename))
            img = cv2.resize(img, (size_image, size_image))
            img_arr = np.array(img)  # Convert image to a numpy array

            # Extract features from the original image and append them to the features_of_images list
            features_of_images.append(get_features(img))
            labels.append(1)  # Append the corresponding label (1 in this case)

            # Apply transformations (rotations and flips) to augment the dataset
            for i in range(3):
                img_arr = cv2.rotate(img_arr, cv2.ROTATE_90_CLOCKWISE)
                features_of_images.append(get_features(img))
                labels.append(1)

            img_arr = cv2.rotate(img_arr, cv2.ROTATE_90_CLOCKWISE)

            for i in range(2):
                img_arr = cv2.flip(img_arr, i)
                features_of_images.append(get_features(img))
                labels.append(1)
        except Exception as e:
            # Print the error message and filename if an exception occurs during processing
            print(e, filename)
    else:
        # Print the filename if it doesn't have a supported image extension
        print(filename)


In [None]:
import csv

# Specify the output CSV file path
output_file = '/content/drive/MyDrive/ML/features/features.csv'

# # Combine the features and labels into a single list of rows
# data = list(zip(features_of_images, labels))

# Write the data to the CSV file
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    # writer.writerow(['Features', 'Labels'])  # Write header row
    writer.writerows(features_of_images)  # Write the data rows


In [None]:
df_label = pd.DataFrame(labels)

In [None]:
df_label.to_csv('/content/drive/MyDrive/ML/features/labels.csv')

### **PCA**

In [None]:
features_of_images = pd.read_csv('/content/drive/MyDrive/ML/features/features.csv')

In [None]:
labels = pd.read_csv('/content/drive/MyDrive/ML/features/labels.csv')


In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

X_train, X_test, y_train, y_test = train_test_split(np.array(features_of_images), np.array(labels), test_size=0.2, random_state=42)

features_of_images = []
labels = []

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components = 500)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
x_dataset = np.append(X_train, X_test, axis=0)
y_dataset = np.append(y_train, y_test, axis=0)

In [None]:
np.shape(x_dataset)

(20496, 500)

In [None]:
df_pca = pd.DataFrame(x_dataset)

In [None]:
df_pca.to_csv('/content/drive/MyDrive/ML/features/features_PCA500.csv', index =false)

In [None]:
df_ypca = pd.DataFrame(y_dataset)
df_ypca.to_csv('/content/drive/MyDrive/ML/features/features_PCA500_labels.csv' , index =false)

# **Data cleaning neural network features**

## **read data**

In [None]:
df = pd.read_csv('/content/drive/MyDrive/ML/Copy of features.csv')
df_label = pd.read_csv('/content/drive/MyDrive/ML/Copy of labels.csv')

In [None]:
df

Unnamed: 0,0.5229696035385132,0.29987668991088867,1.8546563386917114,-0.12764957547187805,0.8136504292488098,-0.14820626378059387,0.08609316498041153,-0.09961701184511185,-0.0603422187268734,2.0590877532958984,...,-0.10570526123046875,-0.1393490880727768,-0.05884527415037155,2.431875228881836,0.06206633150577545,-0.24589484930038452,0.08688231557607651,-0.1262047439813614,0.21802791953086853,0.1690087914466858
0,0.013961,0.122444,-0.132735,-0.118628,0.173108,-0.136272,-0.084335,-0.131051,0.161909,0.824182,...,-0.113773,-0.165884,-0.113729,0.221492,-0.142989,-0.146476,0.226548,-0.138845,0.201534,0.249506
1,0.538563,0.509407,1.775910,-0.127933,1.709609,-0.140871,0.127283,-0.092119,-0.083881,1.833654,...,-0.083452,-0.139409,-0.090185,1.966274,-0.121026,-0.134771,0.824233,-0.107500,0.530300,0.368836
2,-0.074457,-0.116068,-0.196760,-0.011271,-0.168376,1.241772,-0.116697,-0.029380,-0.040485,-0.107858,...,0.860665,-0.084137,0.326265,-0.254838,-0.131203,0.494691,-0.008144,-0.004061,-0.208142,-0.168128
3,-0.051129,-0.174848,-0.028430,1.820420,-0.089466,2.193471,0.114552,-0.029080,3.765160,-0.053691,...,5.197541,1.984923,1.948692,-0.195730,-0.029222,3.375637,0.439649,0.881086,-0.012821,-0.026665
4,-0.093650,-0.197940,-0.089017,0.313116,-0.197691,0.928758,-0.100166,0.045868,2.074560,-0.115514,...,1.319904,0.426353,0.341274,-0.213587,-0.061767,0.886355,0.284618,0.416096,-0.151859,-0.145942
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3412,0.188845,0.181740,0.000457,-0.133928,0.575991,-0.167212,0.093093,-0.111037,-0.112299,0.514472,...,-0.075440,-0.164179,-0.101755,0.973446,0.586366,-0.112277,-0.200143,-0.107301,0.880406,0.564792
3413,0.069198,-0.091497,0.124390,-0.169842,0.235213,-0.221722,-0.084336,-0.168707,-0.071297,0.257700,...,-0.099730,-0.154313,-0.100462,0.484658,0.865008,-0.115266,-0.130826,-0.148095,0.845961,0.496394
3414,0.220922,-0.025789,0.154959,-0.162007,0.248210,-0.193766,-0.148826,-0.153745,-0.106148,1.394486,...,-0.100166,-0.174926,-0.096002,0.072601,-0.136642,-0.139678,0.284274,-0.143948,0.323482,0.430027
3415,-0.099168,-0.126265,-0.089775,0.245519,-0.126568,1.240803,-0.120931,-0.040557,1.584138,-0.139105,...,1.359986,0.430271,0.511444,-0.260776,-0.101833,1.298561,0.006774,0.390692,-0.125927,-0.158199


In [None]:
df_label

Unnamed: 0,810101213_fake_stable_mountain_2.png
0,810701006_fake_stable_jungle_2.jpeg
1,810199494_fake_stable_mountain_4.jpeg
2,810199567_real_none_jungle_6.jpeg
3,810101149_real_none_jungle_1.jpg
4,810199436_real_none_jungle_8.jpeg
...,...
3412,810101201_fake_dallemini_jungle_5.jpeg
3413,810198375_fake_stable_mountain_4.jpg
3414,810199427_fake_stable_mountain_5.jpeg
3415,810197636_real_none_jungle_7.jpeg


## **Cleaning labels**

In [None]:
df_labels = pd.DataFrame()

In [None]:
from_source = ['real', 'fake']
from_class = {'real': 0, 'fake': 1}
def get_labels(path):
  #path = listToString(path)
  splited_path = (path.replace("-", "_").replace("sea5", "sea_5").lower().split("_"))

  if (len(splited_path) != 5):
    print(splited_path)
  image_class = splited_path[1]
  #print(image_class)
  if image_class not in from_class:
    print( image_class, path)

  return from_class[image_class]

In [None]:
labels = list()
for filename in range(len(df_label)):
    if (df_label['810101213_fake_stable_mountain_2.png'][filename].endswith("jpeg") or df_label['810101213_fake_stable_mountain_2.png'][filename].endswith(".jpg")   or
        df_label['810101213_fake_stable_mountain_2.png'][filename].endswith(".png") or df_label['810101213_fake_stable_mountain_2.png'][filename].endswith(".jfif") or
        df_label['810101213_fake_stable_mountain_2.png'][filename].endswith(".webp")):

        try:
          labels.append(get_labels((df_label['810101213_fake_stable_mountain_2.png'][filename])))
        except Exception as e:
          print(e, filename)
    else:
      print(filename)
      print(df_label['810101213_fake_stable_mountain_2.png'][filename])