## GPS

### Extract GPS coordinates from .jpg

In [10]:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS

def get_exif(filename):
    exif = Image.open(filename)._getexif()

    if exif is not None:
        # Create a copy of the keys before iterating
        keys = list(exif.keys())
        for key in keys:
            name = TAGS.get(key, key)
            exif[name] = exif.pop(key)

        if 'GPSInfo' in exif:
            # Create a copy of the GPSInfo keys before iterating
            gps_keys = list(exif['GPSInfo'].keys())
            for key in gps_keys:
                name = GPSTAGS.get(key, key)
                exif['GPSInfo'][name] = exif['GPSInfo'].pop(key)

    return exif

exif = get_exif('D:/Yehmh/test_py/DJI_0021_for_test.JPG')
print(exif)


{'GPSInfo': {'GPSVersionID': b'\x02\x03\x00\x00', 'GPSLatitudeRef': 'N', 'GPSLatitude': (23.0, 46.0, 40.6655), 'GPSLongitudeRef': 'E', 'GPSLongitude': (121.0, 29.0, 15.6631), 'GPSAltitudeRef': b'\x00', 'GPSAltitude': 110.397}, 'ResolutionUnit': 2, 'ExifOffset': 182, 'ImageDescription': 'DCIM\\100MEDIA\\DJI_0021.JPG', 'Make': 'DJI\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'Model': 'FC6310\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'Software': 'v01.07.1641\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'Orientation': 1, 'DateTime': '2023:02:02 09:16:41', 'YCbCrPositioning': 1, 'XResolution': 72.0, 'YResolution': 72.0, 'XPComment': b'T\x00y\x00p\x00e\x00=\x00N\x00,\x00 \x00M\x00o\x00d\x00e\x00=\x00P\x00,\x00 \x00D\x00E\x00=\x00N\x00o\x00n\x00e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00

### traps GPS + which species

In [3]:
import pandas as pd
import twd97

# Read the CSV files into pandas DataFrames
seedtrapGPS_df = pd.read_csv("D:/Yehmh/test_py/seedtrapGPS.csv")  # Assuming the file name is "traps.csv"
# PlantsAroundTraps_df = pd.read_csv("D:/Yehmh/test_py/PlantsAroundTraps_ZF.csv")  # Assuming the file name is "species.csv"
PlantsAroundTraps_df = pd.read_csv("D:/Yehmh/test_py/PlantsAroundTraps_ZF.csv", encoding='latin1')


# Group species data by trap location and find the most common species in each trap
main_species_df = PlantsAroundTraps_df.groupby('Plot')['SpeciesID'].agg(lambda x: x.value_counts().idxmax()).reset_index()
# print(seedtrapGPS_df)

# Merge the main species data with trap locations
merged_df = pd.merge(seedtrapGPS_df, main_species_df, on='Plot', how='left')
# print(merged_df)

# Apply conversion function to each row of the DataFrame
merged_df['TWD97_X'], merged_df['TWD97_Y'] = zip(*merged_df.apply(lambda row: twd97.fromwgs84(row['latitude'], row['longitude']), axis=1))

# Display the merged DataFrame
print("\nMerged DataFrame:")
print(merged_df)

# Now, you can save the merged DataFrame to a new CSV file if needed
# merged_df.to_csv("traps_main_species_twd97.csv", index=False)



Merged DataFrame:
     Plot   latitude   longitude  SpeciesID        TWD97_X       TWD97_Y
0    T1S1  23.796614  121.491990        NaN  300132.554107  2.632586e+06
1    T1S2  23.796413  121.492345        5.0  300168.802512  2.632564e+06
2    T1S3  23.796279  121.492650        2.0  300199.920424  2.632549e+06
3    T1S4  23.796107  121.493068        1.0  300242.587235  2.632530e+06
4    T1S5  23.795898  121.493360        2.0  300272.450657  2.632507e+06
5    T1S6  23.795762  121.493709        5.0  300308.062925  2.632492e+06
6    T1S7  23.795618  121.494023        2.0  300340.102146  2.632476e+06
7    T1S8  23.795506  121.494466        2.0  300385.303754  2.632464e+06
8    T1S9  23.795326  121.494791        5.0  300418.406486  2.632444e+06
9   T1S10  23.795189  121.495153        7.0  300455.354170  2.632429e+06
10   T2S1  23.792507  121.492907        6.0  300227.566492  2.632131e+06
11   T2S2  23.792311  121.493176        6.0  300255.051336  2.632110e+06
12   T2S3  23.792181  121.493561

In [2]:
import pandas as pd
import twd97

# Read the CSV files into pandas DataFrames
seedtrapGPS_df = pd.read_csv("D:/Yehmh/test_py/seedtrapGPS.csv")
PlantsAroundTraps_df = pd.read_csv("D:/Yehmh/test_py/PlantsAroundTraps_ZF.csv", encoding='latin1')

# Group species data by trap location and find the most common species in each trap
main_species_df = PlantsAroundTraps_df.groupby('Plot')['SpeciesID'].agg(lambda x: x.value_counts().idxmax()).reset_index()

# Calculate the counts of each species in each plot
species_counts = PlantsAroundTraps_df.groupby(['Plot', 'SpeciesID']).size().reset_index(name='Count')
# print(species_counts)

# Calculate the total count of species in each plot
total_counts = species_counts.groupby('Plot')['Count'].sum().reset_index(name='TotalCount')
# print(total_counts)

# Merge total counts with species counts
species_counts = pd.merge(species_counts, total_counts, on='Plot', how='left')

# Calculate the percentage of the main species in each plot
species_counts['Percentage'] = (species_counts['Count'] / species_counts['TotalCount']) * 100
print(species_counts[species_counts['Percentage'] >= 80])

# Now, you can save the merged DataFrame to a new CSV file if needed
species_counts.to_csv("species_percentage.csv", index=False)

     Plot  SpeciesID  Count  TotalCount  Percentage
4    T1S3          2      1           1  100.000000
5    T1S4          1      4           4  100.000000
6    T1S5          2      1           1  100.000000
8    T1S6          5     11          13   84.615385
13   T1S8          2      8           9   88.888889
14   T1S9          5      2           2  100.000000
15   T2S1          6     17          17  100.000000
16  T2S10          5      9          11   81.818182
18   T2S2          6     20          20  100.000000
19   T2S3          6     22          22  100.000000
20   T2S4          6     19          19  100.000000
21   T2S5          6     25          25  100.000000
22   T2S6          6     21          21  100.000000
23   T2S7          6     20          20  100.000000
24   T2S8          6     13          13  100.000000
28   T3S1          4     30          30  100.000000
29  T3S10          6      6           6  100.000000
30   T3S2          4     29          29  100.000000
31   T3S3   

### GPS (10 meters around traps)

In [29]:
def points_around_center(center, distance):
    """
    Generate four points around the given center point, forming a square with sides aligned with the cardinal directions.

    Args:
        center (tuple): Tuple containing the latitude and longitude of the center point.
        distance (float): Distance in meters from the center point to each of the four points.

    Returns:
        List of tuples, each containing latitude and longitude of a point around the center.
    """
    lat, lon = center
    
    # Calculate latitude and longitude adjustments for north, south, east, and west points
    lat_adj = distance / 111111  # 1 degree of latitude is approximately 111111 meters
    lon_adj = distance / (111111 * (1 / abs((lat))))  # Longitude adjustment depends on latitude
    
    # Generate the four points
    north_point = (lat + lat_adj, lon)
    south_point = (lat - lat_adj, lon)
    east_point = (lat, lon + lon_adj)
    west_point = (lat, lon - lon_adj)
    
    return [north_point, south_point, east_point, west_point]

# Example usage
center_point = (40.7128, -74.0060)  # New York City
distance_meters = 10
points = points_around_center(center_point, distance_meters)

# Print the generated points
for i, point in enumerate(points):
    print(f"Point {i+1}: Latitude = {point[0]}, Longitude = {point[1]}")


Point 1: Latitude = 40.71289000009, Longitude = -74.006
Point 2: Latitude = 40.712709999910004, Longitude = -74.006
Point 3: Latitude = 40.7128, Longitude = -74.00233584433585
Point 4: Latitude = 40.7128, Longitude = -74.00966415566415


## Dataset

In [4]:
import os

base_folder = "D:/Yehmh/test_py/202301/P00073_transect_234/5m_5m/known/"

# Create folders T1S1 to T1S10
for j in range(2, 5):
    for i in range(1, 11):
        folder_name = f"T{j}S{i}"
        folder_path = os.path.join(base_folder, folder_name)
        os.makedirs(folder_path)

print("Folders created successfully!")



Folders created successfully!


In [6]:
import os
import numpy as np
import pandas as pd
from osgeo import gdal
from osgeo import osr
import math

# Open the original TIFF file
folder_path = "D:/Yehmh/test_py/202301/P00073_transect_234/"
file_name = "202301P00073_RGB_transparent_mosaic_group1.tif"
tif_file = gdal.Open(os.path.join(folder_path, file_name))

# Read the CSV file
traps_info = pd.read_csv("traps_main_species_twd97.csv")
# Filter the DataFrame to include only rows where Plot starts with "T1S"
# traps_info = traps_info[traps_info['Plot'].str.startswith("T1S")]

# Get the geotransform information
geotransform = tif_file.GetGeoTransform()
xmin = geotransform[0] # TM2 (m)
ymax = geotransform[3] # TM2 (m)
res = geotransform[1] # meters/pixel

# Get the raster size
x_size = tif_file.RasterXSize # pixels
y_size = tif_file.RasterYSize # pixels
xlen = res * x_size # meters
ylen = res * y_size # meters

# Define the size of the blocks
block_size = 5 # meters by meters

test_1 = 0
test_2 = 0

# Iterate over the bounding boxes of each 1m x 1m block
for y in np.arange(ymax, ymax - y_size * res, -block_size):
    for x in np.arange(xmin, xmin + x_size * res, block_size):
        # Calculate the coordinates of the block
        x_min_block, y_max_block = x, y
        x_max_block, y_min_block = x + block_size, y - block_size
        block_coord = [x + block_size/2, y - block_size/2]

        # Read the pixel values for the block
        pixel_values = tif_file.ReadAsArray(
            int((x - xmin) / res), int((ymax - y) / res), int(block_size / res), int(block_size / res))
        
        if pixel_values is None:
            # print("Error: Failed to read pixel values.", test_1)            
            # print(int(block_coord[0]), int(block_coord[1]), test_1)
            # test_1 = test_1 + 1
            continue  # Skip processing this block and continue to the next one

        # plt.figure()
        # plt.imshow(pixel_values[0], cmap='gray')

        # Find the closest plot
        min_distance = float('inf')
        closest_t1s = None
        for index, row in traps_info.iterrows():
            plot_name = row['Plot']
            plot_coord = [row['TWD97_X'], row['TWD97_Y']]
            distance = math.dist(plot_coord, block_coord)
            if distance < min_distance:
                min_distance = distance
                closest_plot = plot_name

        # Move the file to the appropriate folder
        if min_distance < 10:  # If distance is less than 10 meters
            folder = os.path.join("known", closest_plot)
            print(closest_plot, min_distance, int(block_coord[0]), int(block_coord[1]))
        else:
            folder = "unknown"
            # print(min_distance)
            # continue
        
        # Create a new dataset for the 1m x 1m area
        driver = gdal.GetDriverByName('GTiff')

        if driver is None:
            print("Error: Failed to get GDAL driver.")
            exit(1)        
        
        out_tif = driver.Create(
            os.path.join(folder_path, f"{block_size}m_{block_size}m", folder, f"{int(block_coord[0])}_{int(block_coord[1])}.tif"),
            int(block_size/res), int(block_size/res), tif_file.RasterCount, gdal.GDT_Byte
        )

        if out_tif is None:
            # print("Error: Failed to create output TIFF dataset.")
            # exit(1)
            print("Error: Failed to create output TIFF dataset.", test_2)
            test_2 = test_2 + 1
            continue  # Skip processing this block and continue to the next one
        
        # Set the geotransform
        out_tif.SetGeoTransform((x_min_block, res, 0, y_max_block, 0, -res))
        srs = osr.SpatialReference()            # establish encoding
        srs.ImportFromEPSG(3826)                # TWD97 lat/long
        out_tif.SetProjection(srs.ExportToWkt()) # export coords to file

        # Print the dimensions of the array
        # print("Array dimensions:", pixel_values.shape)

        # Write the pixel values to the new dataset
        for band_num in range(tif_file.RasterCount):
            band_values = pixel_values[band_num]
            out_tif.GetRasterBand(band_num + 1).WriteArray(band_values)
        
        # write to disk
        out_tif.FlushCache()

        # Close the new dataset
        out_tif = None

# Close the original TIFF file
tif_file = None

T2S1 9.369823786108052 300225 2632140
T2S1 9.65234675002896 300230 2632140
T2S1 8.111695123671273 300220 2632135
T2S1 4.601499514231284 300225 2632135
T2S1 5.152474917896171 300230 2632135
T2S1 9.051088209692113 300235 2632135
T2S1 7.012830938796889 300220 2632130
T2S1 2.134009788221522 300225 2632130
T2S1 3.1509042791401463 300230 2632130
T2S1 8.080989900772188 300235 2632130
T2S1 9.08625323072725 300220 2632125
T2S1 6.1590744250236 300225 2632125
T2S1 6.580911621732829 300230 2632125
T2S1 9.93391150419293 300235 2632125
T2S2 7.287419773051078 300250 2632115
T2S2 5.799369530299731 300255 2632115
T2S2 8.00992427862349 300260 2632115
T2S2 9.478949675188806 300245 2632110
T2S2 4.514054379942167 300250 2632110
T2S2 0.9502036336429026 300255 2632110
T2S2 5.606165083700379 300260 2632110
T2S2 6.135705904090769 300250 2632105
T2S2 4.2629903755236676 300255 2632105
T2S2 6.978487439418412 300260 2632105
T2S2 9.243553804583314 300255 2632100
T2S3 6.263313696880593 300290 2632100
T2S3 5.19252332

In [3]:
# delete unused files

from PIL import Image
import os

def count_pixels(image):
    """
    Count white and black pixels in the image.
    """
    width, height = image.size
    white_count = 0
    black_count = 0
    for y in range(height):
        for x in range(width):
            pixel = image.getpixel((x, y))
            if pixel == (255, 255, 255, 255):  # Assuming white pixels are (255, 255, 255)
                white_count += 1
            elif pixel == (0, 0, 0, 0):       # Assuming black pixels are (0, 0, 0)
                black_count += 1
    return white_count, black_count

def delete_if_mostly_white_or_black(image_path):
    """
    Delete the image if it contains more than half white or black pixels.
    """
    image = Image.open(image_path)
    white_count, black_count = count_pixels(image)
    total_pixels = image.width * image.height
    if white_count > total_pixels / 2 or black_count > total_pixels / 2:
        os.remove(image_path)
        print(f"Deleted {image_path}")

def main(folder_path):
    """
    Main function to iterate through TIFF files in the folder.
    """
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".tif") or file_name.endswith(".tiff"):
            file_path = os.path.join(folder_path, file_name)
            delete_if_mostly_white_or_black(file_path)

if __name__ == "__main__":
    folder_path = "D:/Yehmh/test_py/202301/P00069/5m_5m"
    main(folder_path)


Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630428.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630433.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630438.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630443.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630448.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630453.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630458.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630463.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630468.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630473.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630478.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630483.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630488.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630493.tif
Deleted D:/Yehmh/test_py/202301/P00069/5m_5m\298614_2630498.tif
Deleted D:/Yehmh/test_py/202301/P00069/5

## Dataset (pure unknown)

In [2]:
import os
import numpy as np
import pandas as pd
from osgeo import gdal
from osgeo import osr
import math

# Open the original TIFF file
folder_path = "D:/Yehmh/test_py/202301/P00070/"
file_name = "202301P00070_RGB_transparent_mosaic_group1.tif"
tif_file = gdal.Open(os.path.join(folder_path, file_name))

# Get the geotransform information
geotransform = tif_file.GetGeoTransform()
xmin = geotransform[0] # TM2 (m)
ymax = geotransform[3] # TM2 (m)
res = geotransform[1] # meters/pixel

# Get the raster size
x_size = tif_file.RasterXSize # pixels
y_size = tif_file.RasterYSize # pixels
xlen = res * x_size # meters
ylen = res * y_size # meters

# Define the size of the blocks
block_size = 5 # meters by meters

test_1 = 0
test_2 = 0

# Iterate over the bounding boxes of each 1m x 1m block
for y in np.arange(ymax, ymax - y_size * res, -block_size):
    for x in np.arange(xmin, xmin + x_size * res, block_size):
        # Calculate the coordinates of the block
        x_min_block, y_max_block = x, y
        x_max_block, y_min_block = x + block_size, y - block_size
        block_coord = [x + block_size/2, y - block_size/2]

        # Read the pixel values for the block
        pixel_values = tif_file.ReadAsArray(
            int((x - xmin) / res), int((ymax - y) / res), int(block_size / res), int(block_size / res))
        
        if pixel_values is None:
            # print("Error: Failed to read pixel values.", test_1)            
            # print(int(block_coord[0]), int(block_coord[1]), test_1)
            # test_1 = test_1 + 1
            continue  # Skip processing this block and continue to the next one
        
        # Create a new dataset for the 1m x 1m area
        driver = gdal.GetDriverByName('GTiff')

        if driver is None:
            print("Error: Failed to get GDAL driver.")
            exit(1)        
        
        out_tif = driver.Create(
            os.path.join(folder_path, f"{block_size}m_{block_size}m", f"{int(block_coord[0])}_{int(block_coord[1])}.tif"),
            int(block_size/res), int(block_size/res), tif_file.RasterCount, gdal.GDT_Byte
        )

        if out_tif is None:
            # print("Error: Failed to create output TIFF dataset.")
            # exit(1)
            print("Error: Failed to create output TIFF dataset.", test_2)
            test_2 = test_2 + 1
            continue  # Skip processing this block and continue to the next one
        
        # Set the geotransform
        out_tif.SetGeoTransform((x_min_block, res, 0, y_max_block, 0, -res))
        srs = osr.SpatialReference()            # establish encoding
        srs.ImportFromEPSG(3826)                # TWD97 lat/long
        out_tif.SetProjection(srs.ExportToWkt()) # export coords to file

        # Write the pixel values to the new dataset
        for band_num in range(tif_file.RasterCount):
            band_values = pixel_values[band_num]
            out_tif.GetRasterBand(band_num + 1).WriteArray(band_values)
        
        # write to disk
        out_tif.FlushCache()

        # Close the new dataset
        out_tif = None

# Close the original TIFF file
tif_file = None