# Data Importation

In [None]:
# %pip install dotenv
# %pip install seaborn
# %pip install rasterio --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import time

import ee 
import geemap
import os 
from dotenv import load_dotenv
import seaborn as sns

In [27]:
api_key = os.environ.get('API_KEY')
if api_key is not None:
    print("API Key successfully loaded.")
else:
    print("Error: API key environment variable is not set.")

API Key successfully loaded.


In [28]:
ee.Authenticate()


Successfully saved authorization token.


In [2]:
# Initialize Earth Engine
ee.Initialize(project='ruwa-bench')

In [3]:
# Load CHIRPS daily data
chirps = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY')

In [4]:
start_date = '2020-01-01'
end_date = '2023-12-31'
north = [7.67, 10.50, 10.58, 13.00]
south = [2.70, 6.37, 4.33, 6.70]

In [8]:
# Function to extract value for each image
def get_daily_timeseries(region, start_date, end_date):
        collection = chirps.filterDate(start_date, end_date).filterBounds(region)

        def extract_value(image):
            value = image.reduceRegion(
                reducer = ee.Reducer.mean(),
                geometry = region,
                scale = 5500,
                maxPixels = 1e9
            ).get('precipitation')

            return ee.Feature(None, {
                'date': image.date().format('YYYY-MM-dd'),
                'rainfall': value
            })
        
        features = collection.map(extract_value)
        return features
      

In [9]:
years = [2020, 2021, 2022]
kano_area = ee.Geometry.Rectangle(north)
for year in years:
    start_date_value = f'{year}-01-01'
    end_date_value = f'{year}-12-31'
    features = get_daily_timeseries(kano_area, start_date_value, end_date_value)

    task = ee.batch.Export.table.toDrive(
        collection = ee.FeatureCollection(features),
        description=f'kano_chirps_daily_timeseries_{year}',
        fileFormat='CSV'
    )
    
    task.start()

    print(f'Task Status: {task.status()}')

    # Monitor until complete
    while task.active():
        print(f'Processing... Status: {task.status()["state"]}')
        time.sleep(10)  # Check every 10 seconds

    print('Task completed!')
    print(f'Final status: {task.status()}')
    

Task Status: {'state': 'READY', 'description': 'kano_chirps_daily_timeseries_2020', 'priority': 100, 'creation_timestamp_ms': 1766377306056, 'update_timestamp_ms': 1766377306056, 'start_timestamp_ms': 0, 'task_type': 'EXPORT_FEATURES', 'id': '5NSXTHQWMJWZ4B6O5QWDASSM', 'name': 'projects/ruwa-bench/operations/5NSXTHQWMJWZ4B6O5QWDASSM'}
Processing... Status: READY
Processing... Status: RUNNING
Task completed!
Final status: {'state': 'COMPLETED', 'description': 'kano_chirps_daily_timeseries_2020', 'priority': 100, 'creation_timestamp_ms': 1766377306056, 'update_timestamp_ms': 1766377318281, 'start_timestamp_ms': 1766377313815, 'task_type': 'EXPORT_FEATURES', 'destination_uris': ['https://drive.google.com/'], 'attempt': 1, 'batch_eecu_usage_seconds': 1.7230812311172485, 'id': '5NSXTHQWMJWZ4B6O5QWDASSM', 'name': 'projects/ruwa-bench/operations/5NSXTHQWMJWZ4B6O5QWDASSM'}
Task Status: {'state': 'READY', 'description': 'kano_chirps_daily_timeseries_2021', 'priority': 100, 'creation_timestamp_m

In [10]:
years = [2020, 2021, 2022]
lagos_area = ee.Geometry.Rectangle(south)
for year in years:
    start_date_value = f'{year}-01-01'
    end_date_value = f'{year}-12-31'
    features = get_daily_timeseries(lagos_area, start_date_value, end_date_value)

    task = ee.batch.Export.table.toDrive(
        collection = ee.FeatureCollection(features),
        description=f'lagos_chirps_daily_timeseries_{year}',
        fileFormat='CSV'
    )
    
    task.start()

    print(f'Task Status: {task.status()}')

    # Monitor until complete
    while task.active():
        print(f'Processing... Status: {task.status()["state"]}')
        time.sleep(10)  # Check every 10 seconds

    print('Task completed!')
    print(f'Final status: {task.status()}')

Task Status: {'state': 'READY', 'description': 'lagos_chirps_daily_timeseries_2020', 'priority': 100, 'creation_timestamp_ms': 1766377559178, 'update_timestamp_ms': 1766377559178, 'start_timestamp_ms': 0, 'task_type': 'EXPORT_FEATURES', 'id': 'DLWRABATKSSSVUJ5B4DTQWVB', 'name': 'projects/ruwa-bench/operations/DLWRABATKSSSVUJ5B4DTQWVB'}
Processing... Status: READY
Task completed!
Final status: {'state': 'COMPLETED', 'description': 'lagos_chirps_daily_timeseries_2020', 'priority': 100, 'creation_timestamp_ms': 1766377559178, 'update_timestamp_ms': 1766377567527, 'start_timestamp_ms': 1766377562642, 'task_type': 'EXPORT_FEATURES', 'destination_uris': ['https://drive.google.com/'], 'attempt': 1, 'batch_eecu_usage_seconds': 0.7187332510948181, 'id': 'DLWRABATKSSSVUJ5B4DTQWVB', 'name': 'projects/ruwa-bench/operations/DLWRABATKSSSVUJ5B4DTQWVB'}
Task Status: {'state': 'READY', 'description': 'lagos_chirps_daily_timeseries_2021', 'priority': 100, 'creation_timestamp_ms': 1766377570067, 'update_

Downloaded data would be in the google drive. The data format is a GeoTiff.

### GeoTiff
A GeoTIFF is an image file with geographic information embedded in it. It includes:
- Rainfall data as pixel values
- Coordinate system information (latitude and longitude)
- Stores spatial reference and projection data
- Industry standard for Earth Observation data