In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# ** Import Libraries**

In [None]:
import numpy as np
import tensorflow as tf
import keras
from keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model, model_from_json
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Dropout, Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
from sklearn.metrics import confusion_matrix, average_precision_score, recall_score, precision_score, accuracy_score, classification_report
from tensorflow.keras.layers import GlobalAveragePooling2D
import itertools
import shutil
import matplotlib.pyplot as plt
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import time
import pandas as pd
# The following line is specific to Jupyter notebooks
%matplotlib inline

# # # # # Split Data Into Train And Test
# # # # # # 

In [None]:
# Define paths
source_dir = '/kaggle/input/satellite-image-classification/data/'
dest_dir = 'dataset/'
categories = ['cloudy', 'desert', 'green_area', 'water']
split_ratio = 0.2  # 80% for training, 20% for testing

# Create train and test directories
for category in categories:
    os.makedirs(os.path.join(dest_dir, 'train', category), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'test', category), exist_ok=True)

# Split data and copy files
for category in categories:
    source_category_dir = os.path.join(source_dir, category)
    files = os.listdir(source_category_dir)
    
    # Ensure reproducibility
    np.random.seed(42)
    np.random.shuffle(files)
    
    # Split files
    train_files, test_files = train_test_split(files, test_size=split_ratio)

    # Copy training files
    for file_name in train_files:
        source_file = os.path.join(source_category_dir, file_name)
        dest_file = os.path.join(dest_dir, 'train', category, file_name)
        shutil.copy(source_file, dest_file)
        
    # Copy testing files
    for file_name in test_files:
        source_file = os.path.join(source_category_dir, file_name)
        dest_file = os.path.join(dest_dir, 'test', category, file_name)
        shutil.copy(source_file, dest_file)

print("Data split and copied successfully.")

# # # Display Sample from Data
# 

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path to your training data
train_data_dir = 'dataset/train/'

# Initialize the ImageDataGenerator (here, we're just rescaling the images)
train_datagen = ImageDataGenerator(rescale=1./255)

# Load images from the directory
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(224, 224),  # Resize images to this target size
    batch_size=32,  # Adjust based on your needs
    class_mode='categorical',  # Assuming you're doing categorical classification
    shuffle=True  # Shuffle the data
)

# Fetch a batch of images and labels
images, labels = next(train_generator)

# Convert the one-hot encoded labels to the class index (integer)
class_indices = np.argmax(labels, axis=1)

# Get the class labels (names) from the generator
class_labels = list(train_generator.class_indices.keys())

# Plot the first image in the batch
plt.figure(figsize=(6, 6))
plt.imshow(images[0])
plt.title(f"Sample Image: {class_labels[class_indices[0]]}")
plt.axis('off')  # Hide the axis
plt.show()


In [None]:
train_path = '/kaggle/working/dataset/train'
test_path  = '/kaggle/working/dataset/test'
print(os.listdir(train_path))
print(os.listdir(test_path))

In [None]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest',
    validation_split=0.2) 

In [None]:
classes = ['green_area', 'water', 'desert', 'cloudy']

In [None]:
batchSize=32

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(224, 224),
    batch_size=batchSize,
    classes=classes,
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_path, # same directory as training data
    target_size=(224, 224),
    batch_size=batchSize,
    classes=classes,
    subset='validation') # set as validation data

test_generator = ImageDataGenerator().flow_from_directory(
    test_path, 
    target_size=(224,224), 
    classes=classes,
    shuffle= False,
    batch_size = batchSize)# set as test data

In [None]:
print ("In train_generator ")
for i in range(len (train_generator.class_indices)):
    print(classes[i],":\t",list(train_generator.classes).count(i))
print ("") 

print ("In validation_generator ")
for i in range(len (validation_generator.class_indices)):
    print(classes[i],":\t",list(validation_generator.classes).count(i))
print ("") 

print ("In test_generator ")
for i in range(len (test_generator.class_indices)):
    print(classes[i],":\t",list(test_generator.classes).count(i))

In [None]:
def plots(ims, figsize = (22,22), rows=4, interp=False, titles=None, maxNum = 9):
    if type(ims[0] is np.ndarray):
        ims = np.array(ims).astype(np.uint8)
        if(ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
           
    f = plt.figure(figsize=figsize)
    #cols = len(ims) //rows if len(ims) % 2 == 0 else len(ims)//rows + 1
    cols = maxNum // rows if maxNum % 2 == 0 else maxNum//rows + 1
    #for i in range(len(ims)):
    for i in range(maxNum):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=20)
        plt.imshow(ims[i], interpolation = None if interp else 'none')   
        
 #===================================================================================       
train_generator.reset()
imgs, labels = train_generator.next()

#print(labels)

labelNames=[]
labelIndices=[np.where(r==1)[0][0] for r in labels]
#print(labelIndices)

for ind in labelIndices:
    for labelName,labelIndex in train_generator.class_indices.items():
        if labelIndex == ind:
            #print (labelName)
            labelNames.append(labelName)

In [None]:
plots(imgs, rows=4, titles = labelNames, maxNum=8)

In [None]:
input_shape = (224, 224, 3)

# Load the base model
base_model = InceptionV3(weights='imagenet', 
                                include_top=False, 
                                input_shape=(224, 224,3))
base_model.trainable = False

# Add new layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(len(classes), activation='softmax')(x)

# Create the model
model = Model(inputs=base_model.input, outputs=predictions)

# Print the model summary
model.summary()

In [None]:
model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
checkpoint = ModelCheckpoint('best_model.h5', verbose=1, monitor='val_loss', save_best_only=True, mode='auto')

# EarlyStopping to stop training when the validation loss has not improved after 5 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

# ReduceLROnPlateau to reduce the learning rate when the validation loss has stopped improving
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, mode='auto', min_lr=0.00001)

In [None]:
train_generator.reset()
validation_generator.reset()

# Fit the model
history = model.fit(
    train_generator,
    epochs=10,  # Adjust based on your needs
    validation_data=validation_generator,
    callbacks=[checkpoint, early_stopping, reduce_lr]
)

In [None]:
validation_generator.reset()
# Assuming 'batchSize' is defined earlier in your code
steps = (validation_generator.samples + batchSize - 1) // batchSize
score = model.evaluate(validation_generator, steps=steps)

print(f"For validation  data set; Loss: {score[0]}, Accuracy: {score[1]}")


In [None]:
test_generator.reset()
# Assuming 'batchSize' is defined earlier in your code
steps = (test_generator.samples + batchSize - 1) // batchSize
score = model.evaluate(test_generator, steps=steps)

print(f"For test data set; Loss: {score[0]}, Accuracy: {score[1]}")


In [None]:
test_generator.reset()
testStep = (test_generator.samples + (batchSize-1)) // batchSize
print("testStep: ", testStep)
predictions = model.predict(test_generator, steps = testStep ,  verbose = 1)


In [None]:
len(predictions)

In [None]:
predicted_class_indices=np.argmax(predictions,axis=1)
print(predicted_class_indices)
len(predicted_class_indices)



In [None]:
labels = (test_generator.class_indices)
print(labels)

In [None]:
labels = dict((v,k) for k,v in labels.items())
print(labels)

In [None]:
predictedLables= [labels[k] for k in predicted_class_indices]
print(predictedLables)
len(predictedLables)

In [None]:
actualLables= [labels[k] for k in test_generator.classes]
print(actualLables)
len(actualLables)


In [None]:
accuracy_score(actualLables, predictedLables)

In [None]:
print(classification_report(actualLables, predictedLables))