In [2]:
import os
import rasterio
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE
from rasterio.plot import show
from rasterio.features import geometry_mask
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Function to normalize bands
def normalize_band(band):
    return band / np.max(band)

# Function to calculate NDVI
def calculate_ndvi(red_band, nir_band):
    ndvi = (nir_band - red_band) / (nir_band + red_band)
    return ndvi

# Function to load all valid TIFF files from a folder
def load_tiff_files(folder_path):
    tiff_files = []
    for file in os.listdir(folder_path):
        if file.endswith('.tif') or file.endswith('.tiff'):
            tiff_files.append(os.path.join(folder_path, file))
    return tiff_files

# Function to read Sentinel-1 data
def read_sentinel1_data(folder_path):
    sentinel1_files = load_tiff_files(folder_path)
    sentinel1_data = []
    sentinel1_meta = None
    for file in sentinel1_files:
        try:
            with rasterio.open(file) as src:
                sentinel1_data.append(src.read())  # Read all bands
                if sentinel1_meta is None:
                    sentinel1_meta = src.meta
        except rasterio.errors.RasterioIOError as e:
            print(f"Error reading {file}: {e}")
    sentinel1_data = np.concatenate(sentinel1_data, axis=0)
    return sentinel1_data, sentinel1_meta

# Function to read Sentinel-2 data
def read_sentinel2_data(folder_path):
    sentinel2_files = load_tiff_files(folder_path)
    sentinel2_data = []
    sentinel2_meta = None
    for file in sentinel2_files:
        try:
            with rasterio.open(file) as src:
                sentinel2_data.append(src.read())
                if sentinel2_meta is None:
                    sentinel2_meta = src.meta
        except rasterio.errors.RasterioIOError as e:
            print(f"Error reading {file}: {e}")
    return sentinel2_data, sentinel2_meta

# Paths to Sentinel data folders and shapefile
sentinel1_folder_path = r"C:\Users\rishi\OneDrive\Desktop\sentinel_1"
sentinel2_folder_path = r"C:\Users\rishi\OneDrive\Desktop\sentinel_2"
shapefile_path = r"C:\Users\rishi\OneDrive\Desktop\merged for training\crop_data_merged.shp"

# Read and process Sentinel data
sentinel1_data, sentinel1_meta = read_sentinel1_data(sentinel1_folder_path)
sentinel2_data, sentinel2_meta = read_sentinel2_data(sentinel2_folder_path)

# Extract individual bands from Sentinel-2 data and combine them
sentinel2_red = []
sentinel2_nir = []
for data in sentinel2_data:
    sentinel2_red.append(data[3].astype(float))  # Red band (Band 4)
    sentinel2_nir.append(data[7].astype(float))  # Near Infrared band (Band 8))
sentinel2_red = np.concatenate(sentinel2_red, axis=0)
sentinel2_nir = np.concatenate(sentinel2_nir, axis=0)

# Normalize Sentinel-2 bands
sentinel2_red_norm = normalize_band(sentinel2_red)
sentinel2_nir_norm = normalize_band(sentinel2_nir)

# Calculate NDVI using Sentinel-2 bands
ndvi = calculate_ndvi(sentinel2_red_norm, sentinel2_nir_norm)

# Load the shapefile using GeoPandas
gdf = gpd.read_file(shapefile_path)

# Ensure the coordinate reference systems match
gdf = gdf.to_crs(sentinel1_meta['crs'])

# Extract features from each polygon in the shapefile
patch_size = 64  # Size of patches to extract
features = []
labels = []

for idx, row in gdf.iterrows():
    geom = row['geometry']
    label = row['layer']  
    
    # Create a mask for the polygon
    mask = geometry_mask([geom], transform=sentinel1_meta['transform'], invert=True, out_shape=(sentinel1_meta['height'], sentinel1_meta['width']))
    
    # Extract patches of Sentinel-1 and Sentinel-2 data
    for i in range(0, sentinel1_meta['height'], patch_size):
        for j in range(0, sentinel1_meta['width'], patch_size):
            if mask[i:i+patch_size, j:j+patch_size].sum() > 0:  # Ensure there is some data in the patch
                sentinel1_patch = sentinel1_data[:, i:i+patch_size, j:j+patch_size]
                sentinel2_patch = np.array([sentinel2_red[i:i+patch_size, j:j+patch_size], sentinel2_nir[i:i+patch_size, j:j+patch_size]])
                ndvi_patch = ndvi[i:i+patch_size, j:j+patch_size]
                
                if sentinel1_patch.shape[1] == patch_size and sentinel1_patch.shape[2] == patch_size:
                    combined_patch = np.concatenate((sentinel1_patch, sentinel2_patch, np.expand_dims(ndvi_patch, axis=0)), axis=0)
                    features.append(combined_patch)
                    labels.append(label)

features = np.array(features)
labels = np.array(labels)

# Encode labels to integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Standardize features
scaler = StandardScaler()
for i in range(features.shape[1]):
    features[:, i, :, :] = scaler.fit_transform(features[:, i, :, :].reshape(-1, features[:, i, :, :].shape[-1])).reshape(features[:, i, :, :].shape)

# Handle class imbalance with SMOTE
original_shape = features.shape
features_flat = features.reshape(features.shape[0], -1)
smote = SMOTE(random_state=42)
features_resampled, labels_resampled = smote.fit_resample(features_flat, labels_encoded)
features_resampled = features_resampled.reshape(-1, original_shape[1], original_shape[2], original_shape[3])

# Convert labels to categorical
labels_resampled = to_categorical(labels_resampled)

# Reshape features for RNN input (batch_size, timesteps, input_dim)
features_resampled_rnn = features_resampled.reshape(features_resampled.shape[0], patch_size, -1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_resampled_rnn, labels_resampled, test_size=0.2, random_state=42)

# Define the RNN model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(labels_resampled.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print(classification_report(y_true_classes, y_pred_classes, target_names=label_encoder.classes_))



  super().__init__(**kwargs)


Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 342ms/step - accuracy: 0.3305 - loss: 1.3721 - val_accuracy: 0.4808 - val_loss: 1.2509
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 105ms/step - accuracy: 0.5365 - loss: 1.2061 - val_accuracy: 0.4615 - val_loss: 1.2059
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 110ms/step - accuracy: 0.5116 - loss: 1.1334 - val_accuracy: 0.4808 - val_loss: 1.2265
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 105ms/step - accuracy: 0.5519 - loss: 1.0569 - val_accuracy: 0.4808 - val_loss: 1.2225
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 108ms/step - accuracy: 0.5547 - loss: 1.0101 - val_accuracy: 0.5000 - val_loss: 1.2118
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 116ms/step - accuracy: 0.6232 - loss: 0.9360 - val_accuracy: 0.4615 - val_loss: 1.2746
Epoch 7/20
[1m7/7[0m [32m━━━━━━━━━━━