In [1]:
pip install opencv-python



In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Set the file path file_path = '/content/drive/MyDrive/creditcard.csv'

Mounted at /content/drive


In [3]:
import cv2
import numpy as np
import os

In [4]:
# Define the directory where your images are located
sunflower = '/content/drive/MyDrive/sunflowers'
not_sunflower = '/content/drive/MyDrive/not_sunflowers'

In [5]:
# Define the target size for resizing
target_size = (224, 224)  # Example target size, adjust as needed

In [6]:
# Function to preprocess images
def preprocess_image(img_path, target_size):
    # Read image using OpenCV
    img = cv2.imread(img_path)
    # Resize image to the target size
    img_resized = cv2.resize(img, target_size)
    # Convert image to float32
    img_resized = img_resized.astype(np.float32)
    # Normalize pixel values to be between 0 and 1
    img_normalized = img_resized / 255.0
    return img_normalized

# Define lists to store data and labels
data = []
labels = []

# Iterate through each image in the "sunflowers" directory
for filename in os.listdir(sunflower):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # Assuming images are in jpg or png format
        img_path = os.path.join(sunflower, filename)
        # Preprocess the image
        preprocessed_img = preprocess_image(img_path, target_size)
        # Append the preprocessed image to data
        data.append(preprocessed_img)
        # Append the label for sunflower to labels
        labels.append(1)  # Sunflower label is 1

# Iterate through each image in the "not_sunflowers" directory
for filename in os.listdir(not_sunflower):
    if filename.endswith(".jpg") or filename.endswith(".png"): # Assuming images are in jpg or png format
        img_path = os.path.join(not_sunflower, filename)
        # Preprocess the image
        preprocessed_img = preprocess_image(img_path, target_size)
        # Append the preprocessed image to data
        data.append(preprocessed_img)
        # Append the label for not sunflower to labels
        labels.append(0)  # Not sunflower label is 0


In [7]:
# Convert lists to NumPy arrays
data = np.array(data)
labels = np.array(labels)

# Print the shapes of data and labels
print("Data shape:", data.shape)
print("Labels shape:", labels.shape)

# Print the number of unique labels/classes
num_classes = len(np.unique(labels))
print("Number of classes:", num_classes)

# Print the unique classes and their counts
unique_classes, class_counts = np.unique(labels, return_counts=True)
for i, class_label in enumerate(unique_classes):
    print("Class", class_label, ":", class_counts[i])

# Print a sample of data and corresponding label
print("Sample data:", data[0])  # Print the first sample data
print("Sample label:", labels[0])  # Print the corresponding label of the first sample


Data shape: (223, 224, 224, 3)
Labels shape: (223,)
Number of classes: 2
Class 0 : 86
Class 1 : 137
Sample data: [[[0.34117648 0.23529412 0.19607843]
  [0.34117648 0.23529412 0.19607843]
  [0.34509805 0.23921569 0.2       ]
  ...
  [0.35686275 0.26666668 0.23529412]
  [0.35686275 0.26666668 0.23529412]
  [0.3529412  0.2627451  0.23137255]]

 [[0.34117648 0.23529412 0.19607843]
  [0.34117648 0.23529412 0.19607843]
  [0.34509805 0.23921569 0.2       ]
  ...
  [0.35686275 0.26666668 0.23529412]
  [0.35686275 0.26666668 0.23529412]
  [0.3529412  0.2627451  0.23137255]]

 [[0.34509805 0.23921569 0.2       ]
  [0.34509805 0.23921569 0.2       ]
  [0.34901962 0.24313726 0.20392157]
  ...
  [0.36078432 0.27058825 0.23921569]
  [0.36078432 0.27058825 0.23921569]
  [0.35686275 0.26666668 0.23529412]]

 ...

 [[0.02352941 0.04313726 0.03529412]
  [0.02352941 0.04313726 0.03529412]
  [0.02745098 0.04705882 0.03921569]
  ...
  [0.03137255 0.05098039 0.04705882]
  [0.03137255 0.05098039 0.04705882]


In [8]:
from sklearn.model_selection import train_test_split

In [9]:

# Split the dataset into training, validation, and testing sets
# Here, we'll split the data into 80% training, 10% validation, and 10% testing sets
# You can adjust the test_size and validation_size parameters as needed
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.125, random_state=42)

# Optionally, you can print the shapes of the data splits to verify the sizes
print("Train data shape:", train_data.shape)
print("Train labels shape:", train_labels.shape)
print("Validation data shape:", val_data.shape)
print("Validation labels shape:", val_labels.shape)
print("Test data shape:", test_data.shape)
print("Test labels shape:", test_labels.shape)


Train data shape: (155, 224, 224, 3)
Train labels shape: (155,)
Validation data shape: (23, 224, 224, 3)
Validation labels shape: (23,)
Test data shape: (45, 224, 224, 3)
Test labels shape: (45,)


In [10]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [11]:
# Define and train models
svm_model = SVC(kernel='linear')  # Support Vector Machine with linear kernel
svm_model.fit(train_data.reshape(train_data.shape[0], -1), train_labels)

svm_pred = svm_model.predict(val_data.reshape(val_data.shape[0], -1))
svm_accuracy = accuracy_score(val_labels, svm_pred)
print("SVM Accuracy:", svm_accuracy)


SVM Accuracy: 0.8695652173913043


In [12]:
rf_model = RandomForestClassifier(n_estimators=100)  # Random Forest Classifier
rf_model.fit(train_data.reshape(train_data.shape[0], -1), train_labels)

rf_pred = rf_model.predict(val_data.reshape(val_data.shape[0], -1))
rf_accuracy = accuracy_score(val_labels, rf_pred)
print("Random Forest Accuracy:", rf_accuracy)

Random Forest Accuracy: 0.782608695652174


In [13]:
knn_model = KNeighborsClassifier(n_neighbors=5)  # K-Nearest Neighbors Classifier
knn_model.fit(train_data.reshape(train_data.shape[0], -1), train_labels)

knn_pred = knn_model.predict(val_data.reshape(val_data.shape[0], -1))
knn_accuracy = accuracy_score(val_labels, knn_pred)
print("KNN Accuracy:", knn_accuracy)

KNN Accuracy: 0.6956521739130435


In [14]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [15]:
# Define the CNN architecture
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])


In [16]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(val_data, val_labels))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_data, test_labels)
print("Test Accuracy:", test_accuracy)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)       0

In [18]:
# Flatten the image data
num_samples = train_data.shape[0]
num_features = np.prod(train_data.shape[1:])
train_data_flat = train_data.reshape(num_samples, num_features)

# Create the ensemble model
ensemble_clf = VotingClassifier(estimators=[
    ('rf', rf_clf),
    ('knn', knn_clf),
    ('svc', svc_clf)
], voting='soft')  # Using soft voting for probabilities

# Train the ensemble model
ensemble_clf.fit(train_data_flat, train_labels)

# Flatten the test data as well
num_samples_test = test_data.shape[0]
test_data_flat = test_data.reshape(num_samples_test, num_features)

# Evaluate the ensemble model
ensemble_preds = ensemble_clf.predict(test_data_flat)
ensemble_accuracy = accuracy_score(test_labels, ensemble_preds)
print("Ensemble Accuracy:", ensemble_accuracy)


Ensemble Accuracy: 0.7333333333333333
