<a href="https://colab.research.google.com/github/tatendakasirori/AI_4_ALL_project/blob/main/tif_to_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.3/22.3 MB[0m [31m87.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1.2 cligj-0.7.2 rasterio-1.4.3


In [9]:
import rasterio
import pandas as pd
import numpy as np
from pathlib import Path
from google.colab import files
import re

# Upload the entire folder (or multiple files)
print("Upload all your .tif files:")
uploaded = files.upload()

# Process each .tif file
all_dataframes = []

for filename in uploaded.keys():
    if filename.endswith('.tif'):
        print(f"\n{'='*60}")
        print(f"Processing: {filename}")
        print(f"{'='*60}")

        with rasterio.open(filename) as src:
            # Read all bands
            data = src.read()

            # Get lat/lon for each pixel
            rows, cols = np.meshgrid(range(src.height), range(src.width), indexing='ij')
            xs, ys = rasterio.transform.xy(src.transform, rows.flatten(), cols.flatten())

            # Create DataFrame
            df = pd.DataFrame({
                'latitude': ys,
                'longitude': xs,
                'raw_radiance': data[0].flatten(),
                'lunar_irradiance': data[1].flatten(),
                'gapfilled_ntl': data[2].flatten(),
                'latest_high_quality_retrieval': data[3].flatten(),
                'quality_flag': data[4].flatten(),
                'cloud_mask': data[5].flatten(),
                'snow_flag': data[6].flatten()
            })

            # Extract region and dates using regex (handles underscores in region names)
            # Pattern: (Region)_VIIRS_(date)_to_(date).tif
            match = re.search(r'(.+)_VIIRS_(\d{4}-\d{2}-\d{2})_to_(\d{4}-\d{2}-\d{2})', filename)

            if match:
                region = match.group(1)  # Everything before _VIIRS_
                start_date = match.group(2)  # First date
                end_date = match.group(3)  # Second date

                print(f"  Region: {region}")
                print(f"  Start: {start_date}")
                print(f"  End: {end_date}")
            else:
                print(f"  ⚠️  Warning: Could not parse filename")
                region = "Unknown"
                start_date = "Unknown"
                end_date = "Unknown"

            df['region'] = region
            df['start_date'] = start_date
            df['end_date'] = end_date
            df['filename'] = filename

            all_dataframes.append(df)

            print(f"  ✓ Processed {len(df):,} pixels")

# Combine all files into one big CSV
print(f"\n{'='*60}")
print("Combining all files...")
print(f"{'='*60}")

combined_df = pd.concat(all_dataframes, ignore_index=True)

# Save combined CSV
combined_df.to_csv('all_viirs_data_combined.csv', index=False)

print(f"\n✅ ALL DONE!")
print(f"   Total files processed: {len(all_dataframes)}")
print(f"   Total rows: {len(combined_df):,}")
print(f"   File saved: all_viirs_data_combined.csv")

# Also save individual CSVs
for df in all_dataframes:
    region = df['region'].iloc[0]
    start = df['start_date'].iloc[0]
    end = df['end_date'].iloc[0]
    csv_name = f"{region}_{start}_to_{end}.csv"
    df.to_csv(csv_name, index=False)
    print(f"   Saved: {csv_name}")

# Download the combined file
print("\nDownloading combined CSV...")
files.download('all_viirs_data_combined.csv')

Upload all your .tif files:


Saving New_Jersey_VIIRS_2021-10-01_to_2021-10-14.tif to New_Jersey_VIIRS_2021-10-01_to_2021-10-14 (2).tif
Saving New_Jersey_VIIRS_2022-11-01_to_2022-11-24.tif to New_Jersey_VIIRS_2022-11-01_to_2022-11-24 (1).tif
Saving New_Jersey_VIIRS_2023-03-01_to_2023-06-14.tif to New_Jersey_VIIRS_2023-03-01_to_2023-06-14 (1).tif
Saving New_Jersey_VIIRS_2024-03-01_to_2024-06-14.tif to New_Jersey_VIIRS_2024-03-01_to_2024-06-14 (1).tif
Saving New_Jersey_VIIRS_2024-08-01_to_2024-11-14.tif to New_Jersey_VIIRS_2024-08-01_to_2024-11-14 (1).tif
Saving New_Jersey_VIIRS_2025-08-01_to_2025-10-13.tif to New_Jersey_VIIRS_2025-08-01_to_2025-10-13 (1).tif
Saving New_Jersey_VIIRS_2023-08-01_to_2023-11-14.tif to New_Jersey_VIIRS_2023-08-01_to_2023-11-14 (1).tif
Saving New_Jersey_VIIRS_2025-03-01_to_2025-06-14.tif to New_Jersey_VIIRS_2025-03-01_to_2025-06-14 (1).tif

Processing: New_Jersey_VIIRS_2021-10-01_to_2021-10-14 (2).tif
  Region: New_Jersey
  Start: 2021-10-01
  End: 2021-10-14
  ✓ Processed 202,539 pixels



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>