In [2]:
import pandas as pd
import os
from datetime import datetime
import rasterio
import pyproj
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [6]:
# PREPROCESSING FUNCTION 

def get_data(path, date):
    with rasterio.open(path) as src:
        image_array = src.read()
        df = pd.DataFrame(image_array.reshape(-1, src.count), columns=[f'band_{i+1}' for i in range(src.count)])
        df['x'] = [src.xy(row, col)[0] for row in range(src.height) for col in range(src.width)]
        df['y'] = [src.xy(row, col)[1] for row in range(src.height) for col in range(src.width)]
        
    projected_proj = pyproj.Proj(init='epsg:5070')  
    wgs84_proj = pyproj.Proj(init='epsg:4326')
    lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
    df['latitude'] = lat
    df['longitude'] = lon    
    df['lat_long'] = df['latitude'].astype(str) + ',' + df['longitude'].astype(str)
    df.set_index('lat_long', inplace=True)
    df = df[['band_2']]
    df.rename(columns={"band_2":date}, inplace=True)
    return df


def dt_dataframe(local_dir):
    date_dataframe = pd.DataFrame()
    files = os.listdir(local_dir)
    for x in files:
        dates = x.split('-')[0] +'/'+ x.split('-')[1] +'/'+ x.split("-")[2].split('.')[0]
        date_obj = datetime.strptime(dates, "%d/%m/%Y").date()
        data = {'filename':[x],
              'date':[date_obj]}
        temp_date_dataframe = pd.DataFrame(data)
        date_dataframe = pd.concat([date_dataframe,temp_date_dataframe], axis=0)

    date_dataframe['date'] = pd.to_datetime(date_dataframe['date'])
    date_dataframe.sort_values(by='date', ascending=True, inplace=True)
    return date_dataframe

In [3]:
local_dir = 'tiff_files'

In [7]:
date_dataframe = dt_dataframe(local_dir)
date_dataframe

Unnamed: 0,filename,date
0,18-02-2020.tif,2020-02-18
0,05-03-2020.tif,2020-03-05
0,21-03-2020.tif,2020-03-21
0,06-04-2020.tif,2020-04-06
0,22-04-2020.tif,2020-04-22
...,...,...
0,06-04-2024.tif,2024-04-06
0,22-04-2024.tif,2024-04-22
0,08-05-2024.tif,2024-05-08
0,24-05-2024.tif,2024-05-24


In [8]:
final_df = pd.DataFrame()
for index, row in tqdm(date_dataframe.iterrows(), total=date_dataframe.shape[0], desc="Processing files"):
    path = os.path.join(local_dir, row['filename'])
    date = row['date']
    temp = get_data(path, date)
    final_df = pd.concat([final_df, temp], axis=1)

final_df.replace(-32768, 0, inplace=True)
final_df.reset_index(inplace=True)
final_df

Processing files: 100%|██████████| 100/100 [00:14<00:00,  6.73it/s]


Unnamed: 0,lat_long,2020-02-18 00:00:00,2020-03-05 00:00:00,2020-03-21 00:00:00,2020-04-06 00:00:00,2020-04-22 00:00:00,2020-05-08 00:00:00,2020-05-24 00:00:00,2020-06-09 00:00:00,2020-06-25 00:00:00,...,2024-01-17 00:00:00,2024-02-02 00:00:00,2024-02-18 00:00:00,2024-03-05 00:00:00,2024-03-21 00:00:00,2024-04-06 00:00:00,2024-04-22 00:00:00,2024-05-08 00:00:00,2024-05-24 00:00:00,2024-06-09 00:00:00
0,"0.60177487312766,35.82284080806805",7222,7478,7224,7038,7190,7623,7850,8349,8324,...,7336,7403,5833,6524,5817,6487,6901,7647,7590,8363
1,"0.5992047872300739,35.823195587022916",7443,7205,7295,6885,7883,7789,7615,7784,7659,...,7779,7190,6585,6968,5598,6459,7466,7752,7776,8665
2,"0.5966346490738244,35.823550351718566",7183,7224,7455,7560,7946,8283,8032,7945,8583,...,6621,7090,6104,7454,5835,7651,8055,8315,8245,7966
3,"0.5940644586539345,35.823905102155834",5610,5830,6419,6771,6595,7308,6920,7084,6539,...,5901,5707,5231,6444,5031,6902,7091,7555,7105,6548
4,"0.5914942159654276,35.8242598383356",5656,5431,5590,4897,5973,5748,5667,6248,5882,...,5738,5068,4644,5034,3742,5550,6524,6873,7185,6001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2803,"0.39727868724947596,35.7819605648886",1,0,0,1,3,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2804,"0.3947047785569723,35.78231559808637",1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2805,"0.39213081720925885,35.78267061703858",0,0,0,1,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,1
2806,"0.38955680320123814,35.78302562174612",0,0,0,1,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,1


In [9]:
final_df.to_csv('Processed_data.csv')