In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator

In [10]:
BATCH_SIZE = 32
IMAGE_SIZE = 256
CHANNELS=3
EPOCHS=2

In [11]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "Grape",
    seed=123,
    shuffle=True,
    image_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

Found 5241 files belonging to 5 classes.


In [12]:
len(dataset)

164

In [13]:
class_name = dataset.class_names
class_name

['Grape___Black_rot',
 'Grape___Esca_(Black_Measles)',
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)',
 'Grape___healthy',
 'yelllow']

In [14]:
import os

In [15]:
# isfile() is used to check whether it is a file or directory
# os.scandir() is a function used to get names of both files and directory in a directory

In [16]:
count = 0
dir_path = r'E:\MIT\Sem-2\Mini Project\data set\Grape\Grape___Leaf_blight_(Isariopsis_Leaf_Spot)'
for path in os.scandir(dir_path):
    if path.is_file(): 
        count += 1
print('file count for leaf blight disease:', count)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'E:\\MIT\\Sem-2\\Mini Project\\data set\\Grape\\Grape___Leaf_blight_(Isariopsis_Leaf_Spot)'

In [None]:
count = 0
dir_path = r'E:\MIT\Sem-2\Mini Project\data set\Grape\Grape___Esca_(Black_Measles)'
for path in os.scandir(dir_path):
    if path.is_file():
        count += 1
print('file count for Esca disease:', count)

In [None]:
count = 0
dir_path = r'E:\MIT\Sem-2\Mini Project\data set\Grape\Grape___Black_rot'
for path in os.scandir(dir_path):
    if path.is_file():
        count += 1
print('file countfor black rot disease:', count)

In [None]:
count = 0
dir_path = r'E:\MIT\Sem-2\Mini Project\data set\Grape\Grape___healthy'
for path in os.scandir(dir_path):
    if path.is_file():
        count += 1
print('file countfor healthy leaves:', count)

In [None]:
count = 0
dir_path = r'E:\MIT\Sem-2\Mini Project\data set\Grape\yelllow'
for path in os.scandir(dir_path):
    if path.is_file():
        count += 1
print('file count for sunlight deficiency:', count)


    

In [None]:
#80% -> training
#20% ->  10% validation and 10% test

In [None]:
# spliting data for training
train_size = 0.8
len(dataset)*train_size

In [None]:
# spliting data for training
train_ds = dataset.take(126)
len(train_ds)

In [None]:
# Skiping training data that will give test data

test_ds = dataset.skip(126)
len(test_ds)

In [None]:
# validation size is 0.1(10%)
val_size = 0.1
len(dataset)*val_size
# no. of photos for validation per Batch or EPOCH

In [None]:
val_ds = test_ds.take(15)
len(val_ds)

In [None]:
test_ds = test_ds.skip(15)
len(test_ds)

In [None]:
# this function performs above task ( splits dataset into training , testing and validation)
def get_dataset_partitions_tf(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    ds_size = len(ds)
    
    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds

In [None]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

In [None]:
len(train_ds)

In [None]:
len(val_ds)

In [None]:

len(test_ds)

## Cache, Shuffle, and Prefetch the Dataset

In [None]:
# cashing -> it will read the image form the disk and then for the next iteration when you need the same image it will keep that image in memory
            #this improves the performance of pipeine
# shuffle -> shuffle the images
# prefatch -> if you are using gpu and cpu if gpu is busy in training prefatch will load the next set of batch from the disk that will improve the performance
# Autotune -> tensorflow determines how many batches to load while GPU is training

# due to this training will Fast   



In [None]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)


# Model Builiding

In [None]:
# converting image from RGB to gray scale and then changing the resolution of the image to 256 * 256

resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
  layers.experimental.preprocessing.Rescaling(1.0/255),
])

## Data Augmentation
### Data Augmentation is needed when we have less data, this boosts the accuracy of our model by augmenting the data.

In [None]:
# if the image is rotated or different in contrast then your model will not perfrom better so we using data augmentation

# data augmentation => to identify  unknown entry like rotated image , different conrast, verticalllyn rotated, horizontally flippped

data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])

In [None]:
#applying data augmentation to the training data and prefetching the augmented data to improve training performance.
train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y)
).prefetch(buffer_size=tf.data.AUTOTUNE)

In [18]:
input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 3

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))


In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 62, 62, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 30, 30, 128)      0

In [20]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [21]:
history = model.fit(
    train_ds,
    batch_size=BATCH_SIZE,
    validation_data=val_ds,
    verbose=1,
    epochs=EPOCHS,
)


NameError: name 'train_ds' is not defined

In [None]:
scores=model.evaluate(test_ds)

In [None]:
scores

In [None]:
history

In [None]:
history.params

In [None]:
history.history['accuracy']

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label='Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label='Training Loss')
plt.plot(range(EPOCHS), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
#predicting class of image
#we are doing shuffling hence image is/are changing
for images_batch,labels_batch in test_ds.take(1):
    
    first_image = images_batch[0].numpy().astype('uint8')
    first_label = labels_batch[0].numpy()
    
    
    print("first image to predict")
    plt.imshow(first_image)
    print("first images's actual label:",class_name[first_label])
    
    batch_prediction= model.predict(images_batch)
    print("predicted label:",class_name[np.argmax(batch_prediction[0])])

In [None]:
#for batch data predictions
def predict(model,img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array,0) #to create batch
    
    predictions = model.predict(img_array)
    
    
    predicted_class = class_name[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

In [None]:
plt.figure(figsize=(15,15))
for images,labels in test_ds.take(1):
    for i in range(9):
        ax=plt.subplot(3,3,i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class,confidence=predict(model,images[i].numpy())
        actual_class = class_name[labels[i]]
        plt.title(f"Actual :{actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
        
        plt.axis("off")