In [13]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import f1_score

In [14]:
fake_image_dir = "techosmotr/train/fictivniye(fictitious)/"
real_image_dir = "techosmotr/train/pravilniye(correct)/"

In [15]:
def collect_image_paths_and_preprocess(directory, label):
    image_paths = []
    images = []
    for subfolder in os.listdir(directory):
        subfolder_path = os.path.join(directory, subfolder)
        if os.path.isdir(subfolder_path):
            for img in os.listdir(subfolder_path):
                image_path = os.path.join(subfolder_path, img)
                image = cv2.imread(image_path)  # Read the image
                if image is not None:
                    image = cv2.resize(image, (224, 224))
                    image = image / 255.0  # Normalize pixel values
                    images.append(image)
                    image_paths.append((image, label))
    return images, image_paths


In [16]:
fake_images, fake_image_paths = collect_image_paths_and_preprocess(fake_image_dir, 1)
real_images, real_image_paths = collect_image_paths_and_preprocess(real_image_dir, 0)


In [17]:
data = pd.DataFrame({'image': [img[0] for img in fake_image_paths + real_image_paths],
                     'label': [img[1] for img in fake_image_paths + real_image_paths]})


In [18]:
train_data, valid_data = train_test_split(data, test_size=0.2, random_state=42)

In [19]:
model = keras.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(16, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(16, activation='relu'),
    # Binary classification, so using sigmoid activation
    layers.Dense(1, activation='sigmoid')
])


In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])


In [21]:
train_images = np.array([img for img in train_data['image']])
train_labels = np.array(train_data['label'])
valid_images = np.array([img for img in valid_data['image']])
valid_labels = np.array(valid_data['label'])


In [22]:
model.fit(train_images, train_labels, validation_data=(
    valid_images, valid_labels), epochs=7, batch_size=64, shuffle = True)


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x183ea896c50>

In [23]:
valid_predictions = model.predict(valid_images)
valid_predictions = (valid_predictions > 0.5).astype(int)  # Convert to binary predictions

f1 = f1_score(valid_labels, valid_predictions)
print("F1 Score:", f1)


F1 Score: 0.9114832535885168


In [24]:
def preprocess_test_images(image_paths):
    images = []
    for image_path in image_paths:
        image = cv2.imread(image_path)
        if image is not None:
            image = cv2.resize(image, (224, 224))
            image = image / 255.0
            images.append(image)
    return np.array(images)


# Define the path to test images
test_data_dir = "techosmotr/test_data"
test_image_paths = [os.path.join(test_data_dir, img)
                    for img in os.listdir(test_data_dir)]

# Preprocess the test images
test_images = preprocess_test_images(test_image_paths)

# Predict on test data
test_predictions = model.predict(test_images)
test_predictions = (test_predictions > 0.5).astype(
    int)  # Convert to binary predictions

# Create a DataFrame with the results
results = pd.DataFrame({'file_index': [os.path.basename(img).split(
    '.')[0] for img in test_image_paths], 'class': test_predictions[:, 0]})

# Save the results to a CSV file
results.to_csv('results.csv', index=False)

print("Prediction completed and results saved to results.csv")


Prediction completed and results saved to results.csv
