### libraries

In [17]:
import rasterio
import random
import csv
import geopandas as gpd
from shapely.geometry import Point


### Sampling the coordinates within the given boundary defined by shapefile
#### we will sample 1000 points, 250  belonging to each of the classes, we can increase it based upon the requirements and availability of computation resources

In [19]:
#sampling the points

def sample_points_within_shapefile(tif_file, shapefile, num_total_samples=200, max_attempts=1000):
    # Read shapefile boundary
    boundary = gpd.read_file(shapefile).unary_union
    
    # Get boundary extent
    minx, miny, maxx, maxy = boundary.bounds
    
    with rasterio.open(tif_file) as src:
        rows, cols = src.shape
        sampled_points = []
        samples_per_class = [0, 0, 0, 0]  # Counter for samples per class

        while len(sampled_points) < num_total_samples:
            # Choose a random value between 0 and 3
            value = random.randint(0, 3)

            # Check if we already have enough samples for this class
            if samples_per_class[value] >= num_total_samples / 4:
                continue

            attempts = 0
            while samples_per_class[value] < num_total_samples / 4 and attempts < max_attempts:
                attempts += 1
                # Generate random coordinates within shapefile boundary extent
                x = random.uniform(minx, maxx)
                y = random.uniform(miny, maxy)
                
                # Check if the point is within the boundary geometry
                if boundary.contains(Point(x, y)):
                    try:
                        col, row = src.index(x, y)
                        pixel_value = src.read(1, window=((row, row+1), (col, col+1)))[0][0]
                        if pixel_value == value:
                            sampled_points.append((x, y, pixel_value))
                            samples_per_class[value] += 1
                            break  # Move to the next class
                    except IndexError:
                        pass  # Skip points outside the raster bounds or with no data

    return sampled_points

if __name__ == "__main__":
    tif_file = "/home/sushil/data_f2/lct_para_tmf.tif"  # TIF file path
    shapefile = "/home/sushil/data_f2/bound/para_boundary2252.shp"  # shapefile path
    output_file = "/home/sushil/data_f2/output/sampled_points_within_boundary.csv"

    num_total_samples = 200
    sampled_points = sample_points_within_shapefile(tif_file, shapefile, num_total_samples)
    
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['X', 'Y', 'Class'])
        for point in sampled_points:
            writer.writerow([point[0], point[1], point[2]])

    print(f"{len(sampled_points)} points sampled within the boundary defined by the shapefile and saved to {output_file}.")


200 points sampled within the boundary defined by the shapefile and saved to /home/sushil/data_f2/output/sampled_points_within_boundary.csv.


### getting the values of our features
#### the sampled points will now be used to get values of different features i.e. elevation, land_cover, population densityetc. 

In [20]:
# Function to read elevation from GeoTIFF file based on coordinates
def get_elevation_from_tif(tif_file, x, y):
    with rasterio.open(tif_file) as src:
        # Transforming the coordinates to pixel coordinates
        px, py = src.index(x, y)
        # Reading elevation value from the pixel coordinates
        elevation = src.read(1, window=((py, py+1), (px, px+1)))
    return elevation[0]

# Path to the CSV file containing coordinates
csv_file = '/home/sushil/data_f2/output/sampled_points_within_boundary.csv'
# Path to the GeoTIFF file containing elevation data
tif_file = '/home/sushil/data_f2/elevation_new.tif'
# Path to save the output CSV file
output_csv_file = '/home/sushil/data_f2/output/elevation_output.csv'

# Open the CSV file to read coordinates and create a new CSV file to write elevations
with open(csv_file, 'r') as input_file, open(output_csv_file, 'w', newline='') as output_file:
    reader = csv.DictReader(input_file)
    writer = csv.writer(output_file)
    # Write header for the output CSV file
    writer.writerow(['X', 'Y', 'Elevation'])
    
    for row in reader:
        # Extracting coordinates from the CSV file
        x = float(row['X'])
        y = float(row['Y'])
        # Getting elevation based on coordinates
        elevation = get_elevation_from_tif(tif_file, x, y)
        # Write the coordinates and elevation to the output CSV file
        writer.writerow([x, y, elevation])

print("Output CSV file saved successfully.")


Output CSV file saved successfully.


In [21]:
# Function to read land cover from GeoTIFF file based on coordinates
def get_land_cover_from_tif(tif_file, x, y):
    try:
        with rasterio.open(tif_file) as src:
            # Transforming the coordinates to pixel coordinates
            px, py = src.index(x, y)
            # Reading land cover value from the pixel coordinates
            land_cover = src.read(1, window=((py, py+1), (px, px+1)))
        return land_cover[0]
    except IndexError:
        print(f"No land cover data available at coordinates ({x}, {y})")
        return None
    except Exception as e:
        print(f"Error reading land cover: {e}")
        return None

# Path to the CSV file containing coordinates
csv_file = '/home/sushil/data_f2/output/sampled_points_within_boundary.csv'
# Path to the GeoTIFF file containing land cover data
tif_file = '/home/sushil/data_f2/land_repo.tif'
# Path to save the output CSV file
output_csv_file = '/home/sushil/data_f2/output/land_cover_output.csv'

# Open the CSV file to read coordinates and create a new CSV file to write land cover data
with open(csv_file, 'r') as input_file, open(output_csv_file, 'w', newline='') as output_file:
    reader = csv.DictReader(input_file)
    writer = csv.writer(output_file)
    # Write header for the output CSV file
    writer.writerow(['X', 'Y', 'Land Cover'])
    
    for row in reader:
        # Extracting coordinates from the CSV file
        try:
            x = float(row['X'])
            y = float(row['Y'])
        except ValueError:
            print("Invalid coordinates found in CSV.")
            continue
        
        # Getting land cover based on coordinates
        land_cover = get_land_cover_from_tif(tif_file, x, y)
        if land_cover is not None:
            # Write the coordinates and land cover to the output CSV file
            writer.writerow([x, y, land_cover])

print("Output CSV file saved successfully.")


Output CSV file saved successfully.


In [22]:
# Function to read population from GeoTIFF file based on coordinates
def get_population_from_tif(tif_file, x, y):
    try:
        with rasterio.open(tif_file) as src:
            # Transforming the coordinates to pixel coordinates
            px, py = src.index(x, y)
            # Reading population value from the pixel coordinates
            population = src.read(1, window=((py, py+1), (px, px+1)))
        return population[0]
    except IndexError:
        print(f"No population data available at coordinates ({x}, {y})")
        return None
    except Exception as e:
        print(f"Error reading population: {e}")
        return None

# Path to the CSV file containing coordinates
csv_file = '/home/sushil/data_f2/output/sampled_points_within_boundary.csv'
# Path to the GeoTIFF file containing population data
tif_file = '/home/sushil/data_f2/pop_repo.tif'
# Path to save the output CSV file
output_csv_file = '/home/sushil/data_f2/output/population_output.csv'

# Open the CSV file to read coordinates and create a new CSV file to write population data
with open(csv_file, 'r') as input_file, open(output_csv_file, 'w', newline='') as output_file:
    reader = csv.DictReader(input_file)
    writer = csv.writer(output_file)
    # Write header for the output CSV file
    writer.writerow(['X', 'Y', 'Population'])
    
    for row in reader:
        # Extracting coordinates from the CSV file
        try:
            x = float(row['X'])
            y = float(row['Y'])
        except ValueError:
            print("Invalid coordinates found in CSV.")
            continue
        
        # Getting population based on coordinates
        population = get_population_from_tif(tif_file, x, y)
        if population is not None:
            # Write the coordinates and population to the output CSV file
            writer.writerow([x, y, population])

print("Output CSV file saved successfully.")


Output CSV file saved successfully.


In [None]:
#Now we have the required data to train the model. We will first arrange this in a csv file named Book.csv and then proceed with model training and testing


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Concatenate

# Load the data
data = pd.read_csv('Book.csv')

# Separate features and labels
X_land_cover = data['Land Cover']
X_numeric = data[['Population', 'Elevation', 'HubDist']]
y = data['class']

# Convert 'Land Cover' feature to categorical
X_land_cover = X_land_cover.astype('category')

# One-hot encode 'Land Cover' feature
encoder = OneHotEncoder(sparse=False)
X_land_cover_encoded = encoder.fit_transform(X_land_cover.values.reshape(-1, 1))

# Min-Max scaling for numeric features
scaler = MinMaxScaler()
X_numeric_scaled = scaler.fit_transform(X_numeric)

# Concatenate encoded 'Land Cover' feature with scaled numeric features
X = np.concatenate([X_numeric_scaled, X_land_cover_encoded], axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


### extra
#### following cell changes the coordinate system from one to other. we can use QGIS also for this

In [23]:
#for projection:
#import rasterio
#from rasterio.warp import calculate_default_transform, reproject, Resampling
#from rasterio.crs import CRS

# Define input and output file paths
#input_file = '/home/sushil/data_2/elevation_clip.tif'
#output_file = '/home/sushil/data/elevation_new.tif'

# Define the target CRS (EPSG:32722)
#target_crs = CRS.from_epsg(32722)

# Open the input GeoTIFF file
#with rasterio.open(input_file) as src:
    # Retrieve metadata
    #transform, width, height = calculate_default_transform(
        #src.crs, target_crs, src.width, src.height, *src.bounds)
    #kwargs = src.meta.copy()
    #kwargs.update({
        #'crs': target_crs,
        #'transform': transform,
        #'width': width,
        #'height': height
   # })

    # Create output file
   # with rasterio.open(output_file, 'w', **kwargs) as dst:
        # Reproject the raster data
       # for i in range(1, src.count + 1):
            #reproject(
               # source=rasterio.band(src, i),
               # destination=rasterio.band(dst, i),
               # src_transform=src.transform,
               # src_crs=src.crs,
               # dst_transform=transform,
               # dst_crs=target_crs,
               # resampling=Resampling.nearest)


In [24]:
# extra libraries
#import os
#from shutil import copy2
#import urllib.request
#from zipfile import ZipFile
#import forestatrisk as far