# **Convert the Image into CSV**

## **Import Required Libraries**

In [1]:
# !pip install rasterio

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rasterio
import pickle
import warnings
warnings.filterwarnings("ignore")

## **Prepare the Image for Classification**

In [3]:
# Read the image with Rasterio
image = rasterio.open("/content/drive/MyDrive/ML & DL/Flood Data/Maldah_Flood_Parameters.tif")

In [4]:
# Store the image parameters in separate variables
bandNum = image.count
height = image.height
width = image.width
crs = image.crs
transform = image.transform
shape = (height, width)

In [5]:
print("Band Number:", bandNum)
print("Image Height:", height)
print("Image Width:", width)
print("CRS:", crs)
print("Transform:\n", transform)
print("Shape:", shape)

Band Number: 18
Image Height: 3267
Image Width: 2351
CRS: EPSG:32645
Transform:
 | 30.00, 0.00, 576780.00|
| 0.00,-30.00, 2825010.00|
| 0.00, 0.00, 1.00|
Shape: (3267, 2351)


In [6]:
# Create an empty pandas dataframe to store the pixel values
image_bands = pd.DataFrame()

In [7]:
# Joining the pixel values of different bands into the dataframe
for i in image.indexes:
    temp = image.read(i)
    temp = pd.DataFrame(data=np.array(temp).flatten(), columns=[i])
    image_bands = temp.join(image_bands)

In [8]:
image_bands

Unnamed: 0,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
0,,,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7680712,,,,,,,,,,,,,,,,,,
7680713,,,,,,,,,,,,,,,,,,
7680714,,,,,,,,,,,,,,,,,,
7680715,,,,,,,,,,,,,,,,,,


In [9]:
# Store all the band names in a list
bandNames = ["Elevation", "Slope", "Dist_to_River", "Drainage_Density",
             "Geomorphology", "Lithology", "Relief_Amplitude", "Rainfall",
             "MFI", "NDVI", "MNDWI", "SPI", "STI", "TPI", "TRI", "TWI",
             "LULC", "Clay_Content"];

In [10]:
# Change the column names
image_bands.columns = bandNames[::-1]
image_bands.head()

Unnamed: 0,Clay_Content,LULC,TWI,TRI,TPI,STI,SPI,MNDWI,NDVI,MFI,Rainfall,Relief_Amplitude,Lithology,Geomorphology,Drainage_Density,Dist_to_River,Slope,Elevation
0,,,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,


## **Data Preprocessing**

In [11]:
# Fill the null values of the Clay_Content column with 0
image_bands.fillna(0, inplace=True)

In [12]:
image_bands

Unnamed: 0,Clay_Content,LULC,TWI,TRI,TPI,STI,SPI,MNDWI,NDVI,MFI,Rainfall,Relief_Amplitude,Lithology,Geomorphology,Drainage_Density,Dist_to_River,Slope,Elevation
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7680712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7680713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7680714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7680715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Rename the values of the categorical variables
# Define the values for the geomorphology
geomorpholoy_dict = {1: "Active_Flood_Plain",
                     2: "Embankment",
                     3: "Older_Alluvial_Plain",
                     4: "Older_Flood_Plain",
                     5: "Pond",
                     6: "River",
                     7: "WatBod_Lake",
                     8: "Younger_Alluvial_Plain"}

# Define the values for the lithology
lithology_dict = {1: "Cl_wi_S_Si_Ir_N",
                  2: "Fe_Ox_S_Si_Cl",
                  3: "S_Si_Gr",
                  4: "S_Si_Cl",
                  5: "S_Si_Cl_wi_Cal_Co"}

# Define the values for the LULC
lulc_dict = {1: "Waterbodies",
             2: "Natural_Vegetation",
             3: "Agricultural_Field",
             4: "Bare_Ground",
             5: "Built_UP_Area"}

In [14]:
image_bands.replace({"Geomorphology": geomorpholoy_dict, "Lithology": lithology_dict, "LULC": lulc_dict},
                     inplace=True)

In [15]:
# Apply OHE on 'Geomorphology', 'Lithology' and 'LULC' Columns*
image_bands = pd.get_dummies(image_bands, columns=["Geomorphology", "Lithology", "LULC"])
image_bands.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7680717 entries, 0 to 7680716
Data columns (total 36 columns):
 #   Column                                Dtype  
---  ------                                -----  
 0   Clay_Content                          float64
 1   TWI                                   float64
 2   TRI                                   float64
 3   TPI                                   float64
 4   STI                                   float64
 5   SPI                                   float64
 6   MNDWI                                 float64
 7   NDVI                                  float64
 8   MFI                                   float64
 9   Rainfall                              float64
 10  Relief_Amplitude                      float64
 11  Drainage_Density                      float64
 12  Dist_to_River                         float64
 13  Slope                                 float64
 14  Elevation                             float64
 15  Geomorphology_0

## **Select the Best Features**

In [16]:
# Define the best features in a list
selected_features = ['Dist_to_River', 'TWI', 'Rainfall', 'Clay_Content', 'TRI', 'NDVI',
                     'MFI', 'Elevation', 'MNDWI', 'Drainage_Density',
                     'Geomorphology_Active_Flood_Plain',
                     'Geomorphology_Older_Alluvial_Plain', 'Geomorphology_Older_Flood_Plain',
                     'Lithology_Cl_wi_S_Si_Ir_N', 'Lithology_Fe_Ox_S_Si_Cl',
                     'Lithology_S_Si_Cl', 'Lithology_S_Si_Cl_wi_Cal_Co',
                     'LULC_Agricultural_Field', 'LULC_Built_UP_Area',
                     'LULC_Natural_Vegetation']
len(selected_features)

20

In [17]:
image_bands = image_bands[selected_features]
image_bands

Unnamed: 0,Dist_to_River,TWI,Rainfall,Clay_Content,TRI,NDVI,MFI,Elevation,MNDWI,Drainage_Density,Geomorphology_Active_Flood_Plain,Geomorphology_Older_Alluvial_Plain,Geomorphology_Older_Flood_Plain,Lithology_Cl_wi_S_Si_Ir_N,Lithology_Fe_Ox_S_Si_Cl,Lithology_S_Si_Cl,Lithology_S_Si_Cl_wi_Cal_Co,LULC_Agricultural_Field,LULC_Built_UP_Area,LULC_Natural_Vegetation
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7680712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
7680713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
7680714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0
7680715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0


## **Export the Data as CSV**

In [19]:
output_folder = "/content/drive/MyDrive/ML & DL/"
file_name = "Image_CSV.csv"

In [21]:
# image_bands.to_csv(output_folder+file_name)