<a href="https://colab.research.google.com/github/liissiigur/IDS-project/blob/main/project_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Connecting to GitHub
!git clone https://github.com/liissiigur/IDS-project.git
%cd /content/IDS-project/IDS_data/
!ls

fatal: destination path 'IDS-project' already exists and is not an empty directory.


In [2]:
#installs
!pip install tensorflow



In [75]:
#imports
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import cv2
from google.colab.patches import cv2_imshow
import numpy as np
from sklearn.preprocessing import LabelEncoder
import random

In [43]:
#Specifying folders
train_folder = "train"
val_folder = "valid"
test_folder = "test"

In [65]:
#A function to read in the picture data and corresponding labels for the training and validation data
#The training and validation data are in separate folders, which both have subfolders of different classes
#Each subfolder contains images of this class and the folder name represetns the class (e.g "Apple___healthy" has images of healthy apple leaves)
#Actually the function is a generator that yields data in in batches, because there are a lot of pictures and otherwise the system ran out of RAM

def image_data(folder, batch_size=32):
    # Collect all image paths and labels
    image_paths = []
    labels = []

    class_labels = []  # All unique class labels

    # The class names (folder names) must be encoded to numerical values
    for imfolder in os.listdir(folder):
        folder_path = os.path.join(folder, imfolder)
        if os.path.isdir(folder_path):
            class_labels.append(imfolder)
    label_encoder = LabelEncoder()
    label_encoder.fit(class_labels)  # Assign a number to each class

    # Collect image paths and labels from all subfolders
    for imfolder in os.listdir(folder): #Go through all folders in the train folder (each folder represents a category)
        folder_path = os.path.join(folder, imfolder)
        if os.path.isdir(folder_path):
            for imfile in os.listdir(folder_path): #Go through all of the files (photos) in the subfolder
                image_path = os.path.join(folder_path, imfile)
                if os.path.isfile(image_path):
                    image_paths.append(image_path)
                    labels.append(imfolder)


    # Shuffle the data by shuffling the indexes
    indices = list(range(len(image_paths)))
    random.shuffle(indices)

    # Yield data in batches
    X_batch = []
    y_batch = []

    for i in indices:
        image_path = image_paths[i]
        label = labels[i]

        img = cv2.imread(image_path) #Load in the image data (256x256x3 matrix) with cv2
        if img is not None:
            if img.shape != (256, 256, 3):
                img = cv2.resize(img, (256, 256)) #If an image doesnt have the regular size, resize it
            img = img / 255.0  # Normalize the image to the range [0, 1], because neural networks work better with numbers in this range
            X_batch.append(img)
            y_batch.append(label)

        # Full batch
        if len(X_batch) >= batch_size:
            X_batch = np.array(X_batch) #Convert to numpy array
            y_batch = label_encoder.transform(y_batch)  #Convert string labels to integers
            y_batch = np.array(y_batch)
            yield X_batch, y_batch
            X_batch = []
            y_batch = []

    #All remaining images that aren't part of a full batch
    if X_batch:
        X_batch = np.array(X_batch)
        y_batch = label_encoder.transform(y_batch) #Convert again
        y_batch = np.array(y_batch)
        yield X_batch, y_batch

# Using the function for training and validation data
train_data = image_data(train_folder)
val_data = image_data(val_folder)



In [59]:
#Defining a convolutional neural network model - need kihid on siin üsna suvaliselt valitud, tuleks testida eri variante, et kas miski on parem
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(os.listdir(train_folder)), activation='softmax') #Output layer is softmax with as many neurons as there are classes. For each class, the probability of the image belonging to this class is given.
])

#Compiling the model - jällegi suvaliselt mingi optimiseerija jms valitud, otsida parimat
model.compile(optimizer=optimizers.Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Estimate number of batches per epoch
train_steps = len(os.listdir(train_folder)) // 32
val_steps = len(os.listdir(val_folder)) // 32

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [72]:
history = model.fit(
    train_data,
    steps_per_epoch=train_steps,
    epochs=100,
    validation_data=val_data,
    validation_steps=val_steps
)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455ms/step - accuracy: 0.0312 - loss: 4.1759 - val_accuracy: 0.0312 - val_loss: 3.9785
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.0000e+00 - loss: 4.0120 - val_accuracy: 0.0312 - val_loss: 3.5083
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - accuracy: 0.0000e+00 - loss: 3.8243 - val_accuracy: 0.0625 - val_loss: 3.5131
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step - accuracy: 0.0000e+00 - loss: 3.5294 - val_accuracy: 0.0312 - val_loss: 3.5075
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step - accuracy: 0.0938 - loss: 3.5307 - val_accuracy: 0.0000e+00 - val_loss: 3.4978
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - accuracy: 0.0625 - loss: 3.4970 - val_accuracy: 0.0312 - val_loss: 3.4986
Epoch 7/100
[1m1/

KeyboardInterrupt: 

In [None]:
loss, accuracy = model.evaluate(val_data, steps=val_steps)