<a href="https://colab.research.google.com/github/rohitarer/PRODIGY_ML_03/blob/main/PRODIGY_ML_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [68]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [69]:
# Prepare the submission file
sample_submission = pd.read_csv('./sampleSubmission.csv')
sample_submission['label'] = test_predictions
sample_submission.to_csv('submission.csv', index=False)

In [70]:
# Load the dataset
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            img = cv2.resize(img, (64, 64))  # Resize to a smaller size for faster processing
            images.append(img)
            label = 1 if 'dog' in filename else 0  # Assuming filenames contain 'dog' or 'cat'
            labels.append(label)
    return np.array(images), np.array(labels)

In [71]:
# Path to the dataset folder
train_folder = './train'
test_folder = './test'

In [72]:
# Load training images and labels
X, y = load_images_from_folder(train_folder)

In [73]:
# Balance the dataset by duplicating minority class images (for demonstration purposes)
unique, counts = np.unique(y, return_counts=True)
if len(unique) == 1:
    minority_class = unique[0]
    minority_count = counts[0]
    X_minority = X[y == minority_class]
    y_minority = y[y == minority_class]
    X = np.concatenate([X, X_minority[:100]])
    y = np.concatenate([y, np.ones(100, dtype=int)])

In [74]:
# Flatten the images
X_flatten = X.reshape(X.shape[0], -1)

In [75]:
# Check the distribution of classes
unique, counts = np.unique(y, return_counts=True)
print(f'Class distribution before split: {dict(zip(unique, counts))}')

Class distribution before split: {0: 101, 1: 100}


In [76]:
# Split into training and validation sets, ensuring stratification
X_train, X_val, y_train, y_val = train_test_split(X_flatten, y, test_size=0.2, random_state=42, stratify=y)

In [77]:
# Check the distribution of classes after split
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_val, counts_val = np.unique(y_val, return_counts=True)
print(f'Class distribution in training set: {dict(zip(unique_train, counts_train))}')
print(f'Class distribution in validation set: {dict(zip(unique_val, counts_val))}')

Class distribution in training set: {0: 80, 1: 80}
Class distribution in validation set: {0: 21, 1: 20}


In [78]:
# Ensure that both classes are present in the training set
if len(unique_train) < 2:
    raise ValueError("The training set contains only one class. Adjust the dataset or split parameters.")

In [79]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [80]:
# Train the SVM model
svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train, y_train)

In [81]:
# Predict on the validation set
y_pred = svm.predict(X_val)

In [82]:
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

Validation Accuracy: 2.44%


In [83]:
# Load and preprocess test images
X_test, _ = load_images_from_folder(test_folder)
X_test_flatten = X_test.reshape(X_test.shape[0], -1)
X_test = scaler.transform(X_test_flatten)
X_test

array([[ 1.52302589,  1.29055606,  0.95468461, ..., -0.36909338,
        -0.42282908, -0.63823273],
       [-0.75204885, -0.11692343,  1.39283857, ..., -1.07380628,
        -1.24234743, -1.15313294],
       [ 0.26730282, -0.21955214, -0.76771373, ...,  0.0396401 ,
        -0.33805132, -0.81914361],
       ...,
       [ 0.47412779,  0.27893018, -0.08781965, ...,  0.71616448,
        -0.15436617,  0.04366215],
       [-0.50090424, -0.16090716,  0.22946425, ..., -0.31271635,
        -0.1684958 , -0.08158385],
       [ 2.35032579,  2.22887572,  1.96697135, ...,  1.66047976,
         1.54118904,  1.17087612]])

In [84]:
# Predict on the test set
test_predictions = svm.predict(X_test)
test_predictions

array([1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0])