In [54]:
import pandas as pd
import boto3
import os
from datetime import datetime
import rasterio
import pyproj
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [48]:
s3 = boto3.client('s3')
bucket_name = 'modis13q1-ndvi'
directory = 'Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/'
objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=directory)

In [14]:
for obj in objects['Contents']:
    key = obj['Key']
    local_file_path = os.path.join('downloaded_modis_tiff_files/', key)
    os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
    s3.download_file(bucket_name, key, local_file_path)
    print(f'Downloaded {key} to {local_file_path}')

Downloaded Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2001.tif to downloaded_modis_tiff_files/Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2001.tif
Downloaded Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2002.tif to downloaded_modis_tiff_files/Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2002.tif
Downloaded Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2003.tif to downloaded_modis_tiff_files/Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2003.tif
Downloaded Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2004.tif to downloaded_modis_tiff_files/Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa/01-01-2004.tif
Downloaded Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa

In [49]:
# PREPROCESSING FUNCTION 

def get_data(path, date):
    with rasterio.open(path) as src:
        image_array = src.read()
        df = pd.DataFrame(image_array.reshape(-1, src.count), columns=[f'band_{i+1}' for i in range(src.count)])
        df['x'] = [src.xy(row, col)[0] for row in range(src.height) for col in range(src.width)]
        df['y'] = [src.xy(row, col)[1] for row in range(src.height) for col in range(src.width)]
        
    projected_proj = pyproj.Proj(init='epsg:5070')  
    wgs84_proj = pyproj.Proj(init='epsg:4326')
    lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
    df['latitude'] = lat
    df['longitude'] = lon    
    df['lat_long'] = df['latitude'].astype(str) + ',' + df['longitude'].astype(str)
    df.set_index('lat_long', inplace=True)
    df = df[['band_2']]
    df.rename(columns={"band_2":date}, inplace=True)
    return df


def dt_dataframe(local_dir):
    date_dataframe = pd.DataFrame()
    files = os.listdir(local_dir)
    for x in files:
        dates = x.split('-')[0] +'/'+ x.split('-')[1] +'/'+ x.split("-")[2].split('.')[0]
        date_obj = datetime.strptime(dates, "%d/%m/%Y").date()
        data = {'filename':[x],
              'date':[date_obj]}
        temp_date_dataframe = pd.DataFrame(data)
        date_dataframe = pd.concat([date_dataframe,temp_date_dataframe], axis=0)

    date_dataframe['date'] = pd.to_datetime(date_dataframe['date'])
    date_dataframe.sort_values(by='date', ascending=True, inplace=True)
    return date_dataframe

In [50]:
local_dir = 'downloaded_modis_tiff_files/Modis data - Kenya/tiff files - Kenya/Abakaile Ward|Dadaab Sub County|Garissa'

In [51]:
date_dataframe = dt_dataframe(local_dir)
date_dataframe

Unnamed: 0,filename,date
0,18-02-2000.tif,2000-02-18
0,05-03-2000.tif,2000-03-05
0,21-03-2000.tif,2000-03-21
0,06-04-2000.tif,2000-04-06
0,22-04-2000.tif,2000-04-22
...,...,...
0,21-03-2024.tif,2024-03-21
0,06-04-2024.tif,2024-04-06
0,22-04-2024.tif,2024-04-22
0,08-05-2024.tif,2024-05-08


In [52]:
final_df = pd.DataFrame()
for index, row in tqdm(date_dataframe.iterrows(), total=date_dataframe.shape[0], desc="Processing files"):
    path = os.path.join(local_dir, row['filename'])
    date = row['date']
    temp = get_data(path, date)
    final_df = pd.concat([final_df, temp], axis=1)

final_df.replace(-32768, 0, inplace=True)
final_df.reset_index(inplace=True)
final_df

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = pyproj.transform(projected_proj, wgs84_proj, df['x'].values, df['y'].values)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = pyproj.transform(projected_proj, wgs8

Unnamed: 0,lat_long,2000-02-18 00:00:00,2000-03-05 00:00:00,2000-03-21 00:00:00,2000-04-06 00:00:00,2000-04-22 00:00:00,2000-05-08 00:00:00,2000-05-24 00:00:00,2000-06-09 00:00:00,2000-06-25 00:00:00,...,2024-01-01 00:00:00,2024-01-17 00:00:00,2024-02-02 00:00:00,2024-02-18 00:00:00,2024-03-05 00:00:00,2024-03-21 00:00:00,2024-04-06 00:00:00,2024-04-22 00:00:00,2024-05-08 00:00:00,2024-05-24 00:00:00
0,"0.45703193185870356,40.197005160631605",2734,2668,2760,2385,540,3342,2819,2931,2575,...,4524,3645,3339,3072,2560,2602,4564,5061,4797,3677
1,"0.4544397303738519,40.197271389099406",2603,2539,2430,1971,2737,2546,2336,1714,2079,...,3607,2811,2635,2602,2421,2505,3292,4402,3623,2786
2,"0.45184747636337713,40.197537606799045",3135,3132,3204,5097,5192,4158,3646,3595,3093,...,4896,4056,3420,2820,3020,2617,4711,5467,5335,3836
3,"0.44925516982216074,40.197803813731184",3165,3040,2945,5121,2703,4794,3336,3339,2749,...,4189,3392,3208,2851,2766,2956,3863,4086,4038,3782
4,"0.4466628107451197,40.19807000989648",2930,2781,2735,4320,4371,3452,3230,3098,2747,...,4309,3367,3096,2817,2880,2927,4557,4864,4155,4001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45811,"-0.09850324485499101,39.76985525248099",0,0,0,0,1,1,0,1,0,...,0,0,0,0,0,3,0,0,0,0
45812,"-0.10110493421799299,39.77012893321677",0,0,0,3,1,1,0,1,0,...,0,0,0,0,1,3,0,1,0,0
45813,"-0.1037066772132266,39.77040260293766",0,0,0,1,3,0,0,1,0,...,0,0,3,3,3,3,0,1,0,1
45814,"-0.10630847384591233,39.770676261644255",0,0,0,1,3,1,0,1,1,...,0,0,1,0,0,3,1,1,0,1


In [35]:
final_df

Unnamed: 0,lat_long,2000-02-18 00:00:00,2000-03-05 00:00:00,2000-03-21 00:00:00,2000-04-06 00:00:00,2000-04-22 00:00:00,2000-05-08 00:00:00,2000-05-24 00:00:00,2000-06-09 00:00:00,2000-06-25 00:00:00,...,2024-01-01 00:00:00,2024-01-17 00:00:00,2024-02-02 00:00:00,2024-02-18 00:00:00,2024-03-05 00:00:00,2024-03-21 00:00:00,2024-04-06 00:00:00,2024-04-22 00:00:00,2024-05-08 00:00:00,2024-05-24 00:00:00
0,"0.45703193185870356,40.197005160631605",2734,2668,2760,2385,540,3342,2819,2931,2575,...,4524,3645,3339,3072,2560,2602,4564,5061,4797,3677
1,"0.4544397303738519,40.197271389099406",2603,2539,2430,1971,2737,2546,2336,1714,2079,...,3607,2811,2635,2602,2421,2505,3292,4402,3623,2786
2,"0.45184747636337713,40.197537606799045",3135,3132,3204,5097,5192,4158,3646,3595,3093,...,4896,4056,3420,2820,3020,2617,4711,5467,5335,3836
3,"0.44925516982216074,40.197803813731184",3165,3040,2945,5121,2703,4794,3336,3339,2749,...,4189,3392,3208,2851,2766,2956,3863,4086,4038,3782
4,"0.4466628107451197,40.19807000989648",2930,2781,2735,4320,4371,3452,3230,3098,2747,...,4309,3367,3096,2817,2880,2927,4557,4864,4155,4001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45811,"-0.09850324485499101,39.76985525248099",0,0,0,0,1,1,0,1,0,...,0,0,0,0,0,3,0,0,0,0
45812,"-0.10110493421799299,39.77012893321677",0,0,0,3,1,1,0,1,0,...,0,0,0,0,1,3,0,1,0,0
45813,"-0.1037066772132266,39.77040260293766",0,0,0,1,3,0,0,1,0,...,0,0,3,3,3,3,0,1,0,1
45814,"-0.10630847384591233,39.770676261644255",0,0,0,1,3,1,0,1,1,...,0,0,1,0,0,3,1,1,0,1
