In [1]:
!pip install --quiet img2vec_pytorch
!pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m89.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spopt 0.6.0 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.4.2


In [2]:
from img2vec_pytorch import Img2Vec
from PIL import Image
import pandas as pd
from os.path import basename
from glob import glob
import time

# Paths to the directories containing images
GLOB_AI = '/kaggle/input/ai-generated-images-vs-real-images/AiArtData/AiArtData/*'
GLOB_REAL = '/kaggle/input/ai-generated-images-vs-real-images/RealArt/RealArt/*'

start_time = time.time()
# Initialize Img2Vec model (resnet-18)
img2vec = Img2Vec(cuda=False, model='resnet-18', layer='default', layer_output_size=512)

def get_from_glob(glob_path: str, tag: str) -> list:
    """
    Extracts features (vectors) from images in the specified directory glob_path.
    Each image's tag (e.g., 'ai' or 'real') is associated based on the input tag parameter.

    Args:
        glob_path (str): Path pattern for glob to find image files.
        tag (str): Tag to associate with the images (e.g., 'ai' or 'real').

    Returns:
        list: List of pandas Series, each containing 'tag', 'name', and 'value' (vector) of an image.
    """
    result = []
    for input_file in glob(glob_path):
        name = basename(input_file)
        try:
            with Image.open(input_file) as image:
                # Get vector representation of the image using Img2Vec
                image = image.convert('RGB')
                vector = img2vec.get_vec(image, tensor=True).numpy().reshape(512,)
                result.append(pd.Series(data=[tag, name, vector], index=['tag', 'name', 'value']))
        except Exception as e:
            print(f"Error processing image {name}: {e}")
    return result

# Get image features and create DataFrame
ai_images = get_from_glob(GLOB_AI, 'ai')
real_images = get_from_glob(GLOB_REAL, 'real')

# Combine results into a DataFrame
df = pd.DataFrame(data=ai_images + real_images)
# Display the first few rows of the DataFrame
print("Dataframe Size: ", df.size)
print(df.head())

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 161MB/s]


Dataframe Size:  2919
  tag                                               name  \
0  ai        Various-AI-portraits-generated-by-Fotor.jpg   
1  ai                                       images77.jpg   
2  ai  1000_F_563719058_JXnzcPV4GRpWqmF5sqnqmbJ7ow3ca...   
3  ai             MidJourney-content-policy-1024x576.jpg   
4  ai  an-ai-jungle-landscape-made-by-ai-landscape-ge...   

                                               value  
0  [0.48017105, 0.86461204, 1.6611716, 0.91596454...  
1  [1.4642806, 0.71890783, 1.1184235, 0.57213616,...  
2  [1.8749822, 0.03606455, 1.5152544, 1.4411087, ...  
3  [0.99166214, 1.0530527, 1.0515265, 1.0998495, ...  
4  [0.94869894, 0.46799046, 0.40774786, 0.9549222...  


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


X_train, X_test, y_train, y_test = train_test_split(df['value'].apply(pd.Series), df['tag'], test_size=0.2, random_state=45)

# Initialize the logistic regression model
model = LogisticRegression(max_iter=10000)

# Train the model on the training data
model.fit(X_train, y_train)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Training complete. Elapsed time: {elapsed_time:.2f} seconds")

# Calculate and print the accuracy score on the test data
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f'accuracy: {accuracy*100:.2f}%')

Training complete. Elapsed time: 83.17 seconds
accuracy: 71.28%


In [4]:
from sklearn.metrics import classification_report
print(classification_report(y_true = y_test, y_pred = model.predict(X_test), digits = 4))

              precision    recall  f1-score   support

          ai     0.7383    0.7383    0.7383       107
        real     0.6818    0.6818    0.6818        88

    accuracy                         0.7128       195
   macro avg     0.7101    0.7101    0.7101       195
weighted avg     0.7128    0.7128    0.7128       195

