In [1]:
import numpy as np
import rasterio
from pathlib import Path
import joblib
import warnings

warnings.filterwarnings("ignore")

In [4]:
# load interpolated data for gfs and eumet 
eligibleCI_file = "GFS/eligibleCI_list.joblib"
cloudfiltered_list = "EUMET/cloudfiltered_list.joblib"

eligibleCI_list = joblib.load(eligibleCI_file)
cloudfiltered_list = joblib.load(cloudfiltered_list)

print("eligible CI : ", len(eligibleCI_list), eligibleCI_list[0].shape)
print("cloud filtered : ", len(cloudfiltered_list), cloudfiltered_list[0].shape)

eligible CI :  21 (3207, 3062)
cloud filtered :  240 (3207, 3062)


In [5]:
from datetime import datetime, timedelta

def extract_timestamp_from_filename(filename):
    parts = filename.split('_')
    date_str = parts[1]  # Extract date part (e.g., "10JUL2024")
    time_str = parts[2][:4]  # Extract time part (e.g., "2215")
    full_datetime = f"{date_str}_{time_str}"
    return datetime.strptime(full_datetime, "%d%b%Y_%H%M")

def convert_ist_to_utc(ist_time):
    ist_offset = timedelta(hours=5, minutes=30)
    return ist_time - ist_offset

def find_nearest_gfs_time(utc_time):
    gfs_hours = [0, 6, 12, 18]
    current_day = utc_time.replace(hour=0, minute=0, second=0, microsecond=0)
    next_day = current_day + timedelta(days=1)
    
    gfs_times = [current_day.replace(hour=h) for h in gfs_hours] + [next_day.replace(hour=0)]
    
    return min(gfs_times, key=lambda x: abs(x - utc_time))

def generate_gfs_filename(gfs_time):
    return f"gfs.0p25.{gfs_time.strftime('%Y%m%d%H')}.f000.grib2.nc"

def map_insats_to_gfs(insat_filename):
    insat_time = extract_timestamp_from_filename(insat_filename)
    utc_time = convert_ist_to_utc(insat_time)
    nearest_gfs_time = find_nearest_gfs_time(utc_time)
    return generate_gfs_filename(nearest_gfs_time)

# Test the mapping function
# insat_filename = "3RIMG_11JUL2024_2315_L1C_SGP_V01R00_IMG_TIR1.tif"
# print(map_insats_to_gfs(insat_filename))

In [None]:
import os

def get_file_index(folder_path, file_name):
    try:
        # List all files in the folder
        files = sorted(os.listdir(folder_path))  
        
        # Find the index of the specified file
        if file_name in files and file_name.startswith("gfs"):
            return files.index(file_name)
        else:
            return -1  # File not found
    except Exception as e:
        print(f"Error: {e}")
        return -1

In [10]:
folder = Path('INSAT')
gfs_path = Path('GFS/GFS_RHT')

# Get all file paths in the folder
insat_files = [file for file in folder.iterdir() if file.is_file() and file.name.startswith("3RIMG")]
print(f"Total files: {len(insat_files)}")
insat_files.sort()

first_filter_files =[]

for insat_filepath in insat_files:
    print(f"Opened: {insat_filepath.name}")
    print(f"Resp GFS file: {gfs_path}/{map_insats_to_gfs(insat_filepath.name)}")
    mapped_gfs_file = get_file_index(gfs_path, map_insats_to_gfs(insat_filepath.name))
    print(f"Index: {mapped_gfs_file}")

    eligibleCI = eligibleCI_list[mapped_gfs_file]
    
    with rasterio.open(insat_filepath) as src:
        insat_data = src.read(1)  # Read the first band
        
        result_array = np.zeros_like(insat_data)

        # Assign the values from insat_data where eligibleCI equals 2, i.e., unstable regions
        result_array[eligibleCI==2] = insat_data[eligibleCI == 2]
        first_filter_files.append(result_array)

Total files: 720
Opened: 3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_TIR1.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_TIR2.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_WV.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_TIR1.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_TIR2.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_WV.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0115_L1C_SGP_V01R00_IMG_TIR1.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.2024070918.f000.grib2.nc
Index: 0
Opened: 3RIMG_10JUL2024_0115_L1C_SGP_V01R00_IMG_TIR2.tif
Resp GFS file: GFS/GFS_RHT/gfs.0p25.20240

In [11]:
# save files after first filter in a joblib file
joblib.dump(first_filter_files, "bin/first_filter_files.joblib")

['bin/first_filter_files.joblib']

In [12]:
print("First filter files: ", len(first_filter_files))
second_filter_files = []

for i in range(0, len(first_filter_files), 3):
    cloud_filter = cloudfiltered_list[i // 3]
    for j in range(3):
        if i + j < len(first_filter_files):
            applied_filter = first_filter_files[i + j].copy()
            applied_filter[~np.isfinite(cloud_filter)] = 0
            second_filter_files.append(applied_filter)

print("Second filter files: ", len(second_filter_files))

First filter files:  720
Second filter files:  720


In [13]:
# save files after applying the second filter in a joblib file
joblib.dump(second_filter_files, "bin/second_filter_files.joblib")

['bin/second_filter_files.joblib']

In [14]:
import os

# Path to the folder containing the files
folder_path = 'INSAT'
# Get a list of all the files in the folder
files = [file for file in os.listdir(folder_path) if file.startswith("3RIMG")]
print(f"Number of files: {len(files)}")
files.sort()

batch_size = 3

BT_list = [] # TIR1
BTD1_list = [] # WV - TIR1 
BTD2_list = [] # TIR2 - TIR1

# dispConstant = 2.8977719e-3

files.sort()
# Iterate over the files in batches of 3
for i in range(0, len(files), batch_size):
    batch = files[i:i + batch_size]

    file1 = os.path.join(folder_path, batch[0])
    file2 = os.path.join(folder_path, batch[1])
    file3 = os.path.join(folder_path, batch[2])
    print("\n", file1, file2, file3)

    with rasterio.open(file1) as src:
        TIR1 = src.read(1)
    with rasterio.open(file2) as src:
        TIR2 = src.read(1)
    with rasterio.open(file3) as src:
        WV = src.read(1)
    
    # T = b / λ_max
    # BT = (dispConstant / TIR1) - 273.15
    TIR1[second_filter_files[i] == 0] = 0

    BTD1 = WV - TIR1
    # BTD1 = (dispConstant/btd1) -273.15
    BTD1[second_filter_files[i] == 0] = 0

    BTD2 = TIR2 - TIR1
    # BTD2 = (dispConstant/btd2) -273.15
    BTD2[second_filter_files[i] == 0] = 0

    BT_list.append(TIR1)
    BTD1_list.append(BTD1)
    BTD2_list.append(BTD2)
    '''
        The minimum value of 179.85948 for all bands is quite low. This might indicate some issues with data quality or calibration.
        The large range in BTD2 (-26.89212 to 21.873734) suggests significant temperature differences between TIR1 and TIR2, which is plausible but might warrant further investigation.
        The BTD1 range extending to negative values indicates that some pixels are colder than others, which is expected in mixed scenes.
    '''

Number of files: 720

 INSAT/3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0015_L1C_SGP_V01R00_IMG_WV.tif

 INSAT/3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0045_L1C_SGP_V01R00_IMG_WV.tif

 INSAT/3RIMG_10JUL2024_0115_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0115_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0115_L1C_SGP_V01R00_IMG_WV.tif

 INSAT/3RIMG_10JUL2024_0145_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0145_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0145_L1C_SGP_V01R00_IMG_WV.tif

 INSAT/3RIMG_10JUL2024_0215_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0215_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0215_L1C_SGP_V01R00_IMG_WV.tif

 INSAT/3RIMG_10JUL2024_0245_L1C_SGP_V01R00_IMG_TIR1.tif INSAT/3RIMG_10JUL2024_0245_L1C_SGP_V01R00_IMG_TIR2.tif INSAT/3RIMG_10JUL2024_0245_L1C_SGP_V01R00_

In [15]:
print("BT", len(BT_list), BT_list[0].shape, BT_list[0])
print("\n\nBTD1", len(BTD1_list), BTD1_list[0].shape, BTD1_list[0])
print("\n\nBTD2", len(BTD2_list), BTD2_list[0].shape, BTD2_list[0])

BT 240 (3207, 3062) [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


BTD1 240 (3207, 3062) [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


BTD2 240 (3207, 3062) [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [16]:
BTD1_trend = []
BTD1_trend.append(np.zeros_like(BTD1))

for i in range(1, len(BTD1_list)):
    BTD1_diff = BTD1_list[i] - BTD1_list[i-1]
    
    # print(f"Shape of BTD1_diff: {BTD1_diff.shape}", BTD1_diff.min(), BTD1_diff.max())
    BTD1_trend.append(BTD1_diff)

print(len(BTD1_trend))

240


In [17]:
BTD2_trend = []
BTD2_trend.append(np.zeros_like(BTD2))

for i in range(1, len(BTD2_list)):
    BTD2_diff = BTD2_list[i] - BTD2_list[i-1]

    # print(f"Shape of BTD2_diff: {BTD2_diff.shape}", BTD2_diff.min(), BTD2_diff.max())
    BTD2_trend.append(BTD2_diff)

print(len(BTD2_trend))

240


In [18]:
BT_trend = []
BT_trend.append(np.zeros_like(TIR1))

for i in range(1, len(BT_list)):
    BT_diff = BT_list[i] - BT_list[i-1]

    # print(f"Shape of BT_diff: {BT_diff.shape}", BT_diff.min(), BT_diff.max())
    BT_trend.append(BT_diff)

print(len(BT_trend))

240


In [4]:
from collections import Counter

def calculate_score(index, BT_list, BTD1_list, BTD2_list, BTD1_trend, BTD2_trend, BT_trend):
    scores = np.zeros_like(BT_list[index])
    
    condition1 = (BT_list[index] >= 248.15) & (BT_list[index] <= 253.15)
    condition2 = (BTD1_list[index] >= -10) & (BTD1_list[index] <= 0)
    condition3 = (BTD2_list[index] >= -3) & (BTD2_list[index] <= 0)

    condition4 = abs(BTD1_trend[index]) >= 3
    condition5 = abs(BTD2_trend[index]) >= 3
    condition6 = (BT_trend[index] >= -50) & (BT_trend[index] <= -4)

    scores += condition1 * 5
    scores += condition2 * 5
    scores += condition3 * 5
    scores += condition4 * 5
    scores += condition5 * 5
    scores += condition6 * 5

    print("Maximum score: ", np.max(scores))
    print("Minimum score: ", np.min(scores))

    PreCI_mask = np.where((scores<=15) & (scores>=10), 0,       # Mid Probability → 0
                np.where((scores<=20) & (scores>=15), 2,        # High Probability → 2
                1))                                             # low Probability → 1

    print(f"PreCI_mask data shape {Counter(PreCI_mask.flatten())}")
    return PreCI_mask

# calculate_score(239)

In [20]:
# dump BT_list, BTD1_list, BTD2_list, BTD1_trend, BTD2_trend, BT_trend in joblib files
joblib.dump(BT_list, "bin/attr/BT_list.joblib")
joblib.dump(BTD1_list, "bin/attr/BTD1_list.joblib")
joblib.dump(BTD2_list, "bin/attr/BTD2_list.joblib")
joblib.dump(BTD1_trend, "bin/attr/BTD1_trend.joblib")
joblib.dump(BTD2_trend, "bin/attr/BTD2_trend.joblib")
joblib.dump(BT_trend, "bin/attr/BT_trend.joblib")

['bin/attr/BT_trend.joblib']

In [2]:
def flatten_and_combine(arr_list):
    # Flatten each numpy array and combine them
    return np.concatenate([arr.flatten() for arr in arr_list])

folder = Path('bin/attr')
# Get all file paths in the folder
attr_files = [file for file in folder.iterdir() if file.is_file() and file.name.startswith("B") and file.name.endswith(".joblib")]

for file in attr_files:
    data = joblib.load(file)
    print(f"\nLoaded: {file.name}")
    print(f"Shape: {data[0].shape}")

    # Flatten the data and dump it into a new file
    flat_data = flatten_and_combine(data)
    print(f"Flattened shape: {flat_data.shape}")
    print(f"Saving to: flat_{file.name}")
    joblib.dump(flat_data, f"bin/attr/flat_{file.name}")


Loaded: BT_list.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BT_list.joblib

Loaded: BTD2_trend.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BTD2_trend.joblib

Loaded: BT_trend.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BT_trend.joblib

Loaded: BTD2_list.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BTD2_list.joblib

Loaded: BTD1_trend.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BTD1_trend.joblib

Loaded: BTD1_list.joblib
Shape: (3207, 3062)
Flattened shape: (2356760160,)
Saving to: flat_BTD1_list.joblib


In [5]:
PreCI_mask_list = []

BT_list = joblib.load("bin/attr/BT_list.joblib")
print("Loaded BT_list: ", len(BT_list), BT_list[0].shape)
BTD1_list = joblib.load("bin/attr/BTD1_list.joblib")
print("Loaded BTD1_list: ", len(BTD1_list), BTD1_list[0].shape)
BTD2_list = joblib.load("bin/attr/BTD2_list.joblib")
print("Loaded BTD2_list: ", len(BTD2_list), BTD2_list[0].shape)
BTD1_trend = joblib.load("bin/attr/BTD1_trend.joblib")
print("Loaded BTD1_trend: ", len(BTD1_trend), BTD1_trend[0].shape)
BTD2_trend = joblib.load("bin/attr/BTD2_trend.joblib")
print("Loaded BTD2_trend: ", len(BTD2_trend), BTD2_trend[0].shape)
BT_trend = joblib.load("bin/attr/BT_trend.joblib")
print("Loaded BT_trend: ", len(BT_trend), BT_trend[0].shape)

for i in range(len(BT_list)):
    PreCI_mask = calculate_score(i, BT_list, BTD1_list, BTD2_list, BTD1_trend, BTD2_trend, BT_trend)
    PreCI_mask_list.append(PreCI_mask)

print("PreCI_mask_list: ", len(PreCI_mask_list), PreCI_mask_list[0].shape)

Loaded BT_list:  240 (3207, 3062)
Loaded BTD1_list:  240 (3207, 3062)
Loaded BTD2_list:  240 (3207, 3062)
Loaded BTD1_trend:  240 (3207, 3062)
Loaded BTD2_trend:  240 (3207, 3062)
Loaded BT_trend:  240 (3207, 3062)
Maximum score:  10.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9017466, 1: 802368})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9227881, 1: 409528, 2: 182425})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9234848, 1: 471198, 2: 113788})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9232521, 1: 473754, 2: 113559})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9225981, 1: 477912, 2: 115941})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9216913, 1: 424173, 2: 178748})
Maximum score:  25.0
Minimum score:  0.0
PreCI_mask data shape Counter({0: 9198253, 1: 484423, 2: 137158})
Maximum score:  25.0
Minimum score:  0.0
PreCI_

In [6]:
def flatten_and_combine(arr_list):
    # Flatten each numpy array and combine them
    return np.concatenate([arr.flatten() for arr in arr_list])

joblib.dump(PreCI_mask_list, "bin/attr/PreCI_mask_list.joblib")
print("Saved PreCI_mask_list")

# flatten PreCI_mask_list and save it in a joblib file
flat_PreCI_mask_list = flatten_and_combine(PreCI_mask_list)
joblib.dump(flat_PreCI_mask_list, "bin/attr/flat_PreCI_mask_list.joblib")
print("Saved flat_PreCI_mask_list")

Saved PreCI_mask_list
Saved flat_PreCI_mask_list
