# **<span style="color: #2e86de;">S5P TROPOMI Data Access Hub API Test - Pipeline 1</span>**

## **<span style="color: #27ae60;">Load Toolkit Functions</span>**


In [1]:
import sys
import subprocess
import pkg_resources
# Path to the Jupyter notebook
notebook_path = "E:/SentinelNO2-Detection/NB_2_Sentinal_Tools.ipynb"
script_path = "E:/SentinelNO2-Detection/NB_2_Sentinal_Tools.py"
# Convert Jupyter notebook to Python script
def convert_notebook_to_script(notebook, script):
    try:
        result = subprocess.run(["jupyter", "nbconvert", "--to", "script", notebook], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✅ Successfully converted {notebook} to {script}")
        else:
            print(f"❌ Error converting {notebook}: {result.stderr}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error converting {notebook}: {e}")
# List of libraries to check/install/upgrade
libraries = [
    'pandas', 
    'geopandas', 
    'shapely'
]
# Function to install a package with a progress bar
def install_package(package):
    try:
        result = subprocess.run([sys.executable, "-m", "pip", "install", package], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✅ Successfully installed {package}")
        else:
            print(f"❌ Error installing {package}: {result.stderr}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error installing {package}: {e}")
# Function to upgrade a package with a progress bar
def upgrade_package(package):
    try:
        result = subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", package], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"⬆️ Successfully upgraded {package}")
        else:
            print(f"❌ Error upgrading {package}: {result.stderr}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error upgrading {package}: {e}")
# Function to check if a package is installed and install/upgrade it
def check_and_install(library):
    try:
        pkg_resources.get_distribution(library)
        print(f"✅ {library} is already installed.")
        upgrade_package(library)
    except pkg_resources.DistributionNotFound:
        print(f"📦 {library} is not installed. Installing now...")
        install_package(library)
# Function to log the status
def log_status(library, status):
    with open('library_installation_log.txt', 'a') as log_file:
        log_file.write(f"{library}: {status}\n")
# Enhanced check and install/upgrade packages with logging
for library in libraries:
    check_and_install(library)
    log_status(library, "Checked and processed")
# Convert the Jupyter notebook to a Python script
convert_notebook_to_script(notebook_path, script_path)
# Import all the necessary libraries
def import_libraries():
    globals().update(locals())
    import pandas as pd
    import geopandas as gpd
    from shapely import wkt
    from NB_2_Sentinal_Tools import sentinel_api_query, date_from_week, add_days, geometry_to_wkt, filter_swath_set, get_place_boundingbox, show_colormap
# Try to import all libraries and handle any potential import errors
try:
    import_libraries()
    print("✅ All libraries are imported successfully!")
except ImportError as e:
    print(f"❌ Error importing libraries: {e}")
    log_status('import', f"Error: {e}")
print("✅ All specified libraries are checked, installed, upgraded, and imported successfully!")

  import pkg_resources


✅ pandas is already installed.
⬆️ Successfully upgraded pandas
✅ geopandas is already installed.
⬆️ Successfully upgraded geopandas
✅ shapely is already installed.
⬆️ Successfully upgraded shapely
✅ Successfully converted E:/SentinelNO2-Detection/NB_2_Sentinal_Tools.ipynb to E:/SentinelNO2-Detection/NB_2_Sentinal_Tools.py
✅ pandas is already installed.
⬆️ Successfully upgraded pandas
✅ geopandas is already installed.
⬆️ Successfully upgraded geopandas
✅ netCDF4 is already installed.
⬆️ Successfully upgraded netCDF4
✅ numpy is already installed.
⬆️ Successfully upgraded numpy
✅ matplotlib is already installed.
⬆️ Successfully upgraded matplotlib
✅ requests is already installed.
⬆️ Successfully upgraded requests
✅ xmltodict is already installed.
⬆️ Successfully upgraded xmltodict
✅ shapely is already installed.
⬆️ Successfully upgraded shapely
🌟 All libraries are imported successfully!
✅ All libraries are checked, installed, upgraded, and imported successfully!
✅ All libraries are import

## **<span style="color: #8e44ad;">Obtain Target Country GeoDataFrame</span>**

In [6]:
import os
import sys
import logging
import requests
import subprocess
from pathlib import Path
# Setup logging
logging.basicConfig(
    filename="setup_log.log",
    filemode="w",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
# Define the base directory path
base_path = Path("E:/SentinelNO2-Detection/Geo_Data_Downloading")
# Project structure and dependencies
folders = ["data"]
files = {
    "requirements.txt": "flask\nrequests\n",
}
# Function to create folders and files
def create_project_structure():
    try:
        # Ensure the base directory exists
        base_path.mkdir(parents=True, exist_ok=True)       
        # Create folders
        for folder in folders:
            folder_path = base_path / folder
            folder_path.mkdir(exist_ok=True)
            logging.info(f"Created folder: {folder_path}")
        # Create and write files
        for file, content in files.items():
            file_path = base_path / file
            with open(file_path, "w") as f:
                f.write(content)
            logging.info(f"Created file: {file_path}")
    except Exception as e:
        logging.error(f"Error creating project structure: {e}")
# Function to check and install Node.js and npm if not present
def check_and_install_node():
    try:
        node_installed = subprocess.run(["node", "-v"], capture_output=True)
        npm_installed = subprocess.run(["npm", "-v"], capture_output=True)
        if node_installed.returncode != 0 or npm_installed.returncode != 0:
            logging.info("Node.js or npm not detected. Downloading installer.")
            # Download Node.js installer (Windows example)
            installer_url = "https://nodejs.org/dist/v18.12.1/node-v18.12.1-x64.msi"
            installer_path = base_path / "node_installer.msi"
            response = requests.get(installer_url)
            with open(installer_path, "wb") as file:
                file.write(response.content)
            subprocess.run(["msiexec", "/i", str(installer_path), "/quiet"])
            logging.info("Node.js and npm installed successfully.")
        else:
            logging.info("Node.js and npm are already installed.")
    except Exception as e:
        logging.error(f"Error checking/installing Node.js: {e}")
# Function to install Python packages from requirements.txt
def install_python_packages():
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(base_path / "requirements.txt")])
        logging.info("Python packages installed successfully.")
    except Exception as e:
        logging.error(f"Error installing Python packages: {e}")
# Function to upgrade outdated Python packages
def upgrade_packages():
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
        outdated_packages = subprocess.check_output([sys.executable, "-m", "pip", "list", "--outdated", "--format=freeze"]).decode().splitlines()
        for package in outdated_packages:
            package_name = package.split("==")[0]
            subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", package_name])
            logging.info(f"Upgraded package: {package_name}")
    except Exception as e:
        logging.error(f"Error upgrading packages: {e}")
# Function to update requirements.txt with installed packages not already listed
def update_requirements_file():
    try:
        # Read existing requirements
        req_file_path = base_path / "requirements.txt"
        with open(req_file_path, "r") as f:
            existing_packages = set(line.strip() for line in f if line.strip())
        # Get all installed packages
        installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "freeze"]).decode().splitlines()
        installed_packages_set = {pkg.split("==")[0] for pkg in installed_packages}
        # Identify new packages not in requirements.txt
        new_packages = installed_packages_set - existing_packages
        # Append new packages to requirements.txt
        with open(req_file_path, "a") as f:
            for pkg in installed_packages:
                pkg_name = pkg.split("==")[0]
                if pkg_name in new_packages:
                    f.write(pkg + "\n")
                    logging.info(f"Added {pkg} to requirements.txt")
    except Exception as e:
        logging.error(f"Error updating requirements.txt: {e}")
# Main setup function
def main_setup():
    logging.info("Starting project setup...")
    create_project_structure()
    check_and_install_node()
    install_python_packages()
    upgrade_packages()
    update_requirements_file()
    logging.info("Project setup completed successfully.")
    print("Project setup completed successfully! Check setup_log.log for details.")
# Call main setup function directly
main_setup()

Project setup completed successfully! Check setup_log.log for details.


In [9]:
import subprocess
import sys
import os
import time
import logging
import requests
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
# Configure logging
logging.basicConfig(
    filename="download_log.log",
    filemode="w",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
# Create a separate error log
error_log_file = "error_log.log"
logging.getLogger().addHandler(logging.FileHandler(error_log_file))
# URLs to download
urls = {
    "osm_pbf": [
        "https://download.geofabrik.de/asia/india/central-zone-latest.osm.pbf",
        "https://download.geofabrik.de/asia/india/eastern-zone-latest.osm.pbf",
        "https://download.geofabrik.de/asia/india/north-eastern-zone-latest.osm.pbf",
        "https://download.geofabrik.de/asia/india/northern-zone-latest.osm.pbf",
        "https://download.geofabrik.de/asia/india/southern-zone-latest.osm.pbf",
        "https://download.geofabrik.de/asia/india/western-zone-latest.osm.pbf"
    ],
    "shp_zip": [
        "https://download.geofabrik.de/asia/india/central-zone-latest-free.shp.zip",
        "https://download.geofabrik.de/asia/india/eastern-zone-latest-free.shp.zip",
        "https://download.geofabrik.de/asia/india/north-eastern-zone-latest-free.shp.zip",
        "https://download.geofabrik.de/asia/india/northern-zone-latest-free.shp.zip",
        "https://download.geofabrik.de/asia/india/southern-zone-latest-free.shp.zip",
        "https://download.geofabrik.de/asia/india/western-zone-latest-free.shp.zip"
    ],
    "osm_bz2": [
        "https://download.geofabrik.de/asia/india/central-zone-latest.osm.bz2",
        "https://download.geofabrik.de/asia/india/eastern-zone-latest.osm.bz2",
        "https://download.geofabrik.de/asia/india/north-eastern-zone-latest.osm.bz2",
        "https://download.geofabrik.de/asia/india/northern-zone-latest.osm.bz2",
        "https://download.geofabrik.de/asia/india/southern-zone-latest.osm.bz2",
        "https://download.geofabrik.de/asia/india/western-zone-latest.osm.bz2"
    ]
}
# Download path configuration
download_directory = "E:/SentinelNO2-Detection/Geo_Data_Downloading/data"
os.makedirs(download_directory, exist_ok=True)
# Setup Chrome WebDriver with configured download path
def setup_browser(download_directory):
    chrome_options = Options()
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_argument("--no-sandbox")
    prefs = {"download.default_directory": download_directory}
    chrome_options.add_experimental_option("prefs", prefs)
    # Initialize WebDriver with ChromeDriverManager
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver
# Download file with progress indicator
def download_with_progress(url, download_directory):
    filename = url.split("/")[-1]
    file_path = os.path.join(download_directory, filename)
    if os.path.exists(file_path):
        logging.info(f"{filename} already exists. Skipping download.")
        return True
    try:
        # Start the download process
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an error for bad responses
        file_size = int(response.headers.get('Content-Length', 0))
        logging.info(f"Downloading {filename} ({file_size / (1024 ** 2):.2f} MB)...")
        # Display progress
        with open(file_path, "wb") as file, tqdm(
                desc=filename,
                total=file_size,
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
        ) as progress:
            for chunk in response.iter_content(chunk_size=4096):
                if chunk:
                    file.write(chunk)
                    progress.update(len(chunk))
        logging.info(f"{filename} downloaded successfully.")
        update_progress_file(filename)
        return True
    except Exception as e:
        logging.error(f"Error downloading {filename}: {e}")
        return False
# Update the progress file with downloaded filenames
def update_progress_file(filename):
    with open("progress.txt", "a") as progress_file:
        progress_file.write(f"{filename}\n")
# Verify all required files are downloaded
def verify_downloads():
    downloaded_files = set()
    if os.path.exists("progress.txt"):
        with open("progress.txt", "r") as progress_file:
            downloaded_files = {line.strip() for line in progress_file}
    all_required_files = {url.split("/")[-1] for category in urls.values() for url in category}
    missing_files = all_required_files - downloaded_files
    if missing_files:
        logging.warning("Missing files: " + ", ".join(missing_files))
        print("Missing files that need to be re-downloaded:", missing_files)
    else:
        print("All required files have been successfully downloaded.")
# Main function for managing downloads
def initiate_downloads():
    driver = setup_browser(download_directory)
    try:
        # Download each file URL
        for category, links in urls.items():
            for url in links:
                driver.get(url)  # Open URL in Chrome for visibility
                download_with_progress(url, download_directory)
                time.sleep(1)
    finally:
        driver.quit()  # Close Chrome WebDriver
        verify_downloads()
# Run the download process
if __name__ == "__main__":
    initiate_downloads()

All required files have been successfully downloaded.


In [None]:
import os
import zipfile
import logging
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap, MarkerCluster
from sklearn.cluster import KMeans
import numpy as np
from scipy import stats
from datetime import datetime

# Configure paths and logging
data_dir = r"E:\SentinelNO2-Detection\Geo_Data_Downloading\data"
log_file_path = os.path.join(data_dir, "file_log.txt")

logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize log file
def initialize_log():
    """Initialize the log file."""
    try:
        if os.path.exists(log_file_path):
            os.remove(log_file_path)
        logging.info("Log initialized. Starting directory scan and file processing...")
    except Exception as e:
        print(f"Error initializing log: {e}")

# Scan and extract files
def scan_and_extract_files():
    """Scan the directory and extract ZIP files if needed."""
    files_in_directory = os.listdir(data_dir)
    used_files, unused_files = [], []
    
    for file in files_in_directory:
        file_path = os.path.join(data_dir, file)
        
        # Extract ZIP files
        try:
            if zipfile.is_zipfile(file_path):
                with zipfile.ZipFile(file_path, 'r') as zip_ref:
                    if not all(os.path.exists(os.path.join(data_dir, name)) for name in zip_ref.namelist()):
                        zip_ref.extractall(data_dir)
                        logging.info(f"Extracted ZIP file: {file}")
                    else:
                        logging.info(f"ZIP file already extracted: {file}")
        
            # Identify usable files
            if file.endswith(('.pickle', '.shp', '.geojson', '.csv')):
                used_files.append(file)
            else:
                unused_files.append(file)
        except Exception as e:
            logging.error(f"Failed to process file {file}: {e}")
    
    logging.info(f"Used files: {used_files}")
    logging.info(f"Unused files: {unused_files}")
    return used_files

# Load geospatial data
def load_geospatial_data(files):
    """Load geospatial data from supported file types into GeoDataFrames."""
    geodataframes = []
    for file in files:
        file_path = os.path.join(data_dir, file)
        try:
            if file.endswith('.pickle'):
                gdf = gpd.GeoDataFrame(pd.read_pickle(file_path))
            elif file.endswith(('.geojson', '.shp')):
                gdf = gpd.read_file(file_path)
            elif file.endswith('.csv'):
                df = pd.read_csv(file_path)
                if 'longitude' in df.columns and 'latitude' in df.columns:
                    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
                else:
                    logging.warning(f"CSV file {file} lacks latitude/longitude columns.")
                    continue
            else:
                continue
            geodataframes.append(gdf)
            logging.info(f"Loaded file: {file}")
        except Exception as e:
            logging.error(f"Error loading file {file}: {e}")
    return geodataframes

# Advanced analysis
def advanced_analysis(gdfs):
    """Perform advanced analysis including spatial joins, heatmaps, clustering, and statistics."""
    for i, gdf in enumerate(gdfs):
        try:
            # Spatial join with previous GDFs if available
            if i > 0:
                gdfs[0] = gpd.sjoin(gdfs[0], gdf, how="inner", op="intersects")
                logging.info("Spatial join completed.")
            
            # Calculate area and centroid
            gdf["centroid"] = gdf.centroid
            gdf["area"] = gdf.geometry.area
            logging.info("Centroids and areas calculated.")
            
            # Generate density heatmap
            m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)
            heat_data = [[point.xy[1][0], point.xy[0][0]] for point in gdf["centroid"]]
            HeatMap(heat_data).add_to(m)
            m.save(os.path.join(data_dir, f"heatmap_{i}.html"))
            logging.info("Density heatmap saved.")
            
            # Clustering and K-Means Clustering
            marker_cluster = MarkerCluster()
            for idx, row in gdf.iterrows():
                folium.Marker(location=[row["centroid"].y, row["centroid"].x]).add_to(marker_cluster)
            marker_cluster.add_to(m)
            m.save(os.path.join(data_dir, f"cluster_map_{i}.html"))
            logging.info("Cluster map saved.")
            
            # Perform K-Means clustering if sufficient data points exist
            if len(gdf) >= 3:
                coords = np.array([[point.xy[0][0], point.xy[1][0]] for point in gdf["centroid"]])
                kmeans = KMeans(n_clusters=3).fit(coords)
                gdf['cluster'] = kmeans.labels_
                logging.info("K-Means clustering completed.")
            
            # Zonal Statistics
            if "zone_column" in gdf.columns:
                zonal_stats = gdf.groupby("zone_column").agg({'area': 'sum'}).rename(columns={'area': 'total_area'})
                logging.info(f"Zonal statistics computed: {zonal_stats}")

            # Spatial Autocorrelation (Moran's I)
            try:
                morans_i = stats.moran(gdf["area"], gdf["centroid"])
                logging.info(f"Spatial Autocorrelation (Moran's I): {morans_i}")
            except Exception as e:
                logging.warning("Moran’s I calculation failed: " + str(e))

        except Exception as e:
            logging.error(f"Error in advanced analysis for GDF {i}: {e}")

# Save visualizations and reports
def save_visualizations_and_reports(gdfs):
    """Generate and save visualizations, including geospatial plots and interactive maps."""
    for i, gdf in enumerate(gdfs):
        try:
            plt.figure(figsize=(10, 8))
            gdf.plot(marker='o', color='red', markersize=5)
            plt.title("Geospatial Data Plot")
            plt.xlabel("Longitude")
            plt.ylabel("Latitude")
            plt.savefig(os.path.join(data_dir, f"plot_{i}.png"))
            logging.info(f"Geospatial plot for GDF {i} saved.")
            
            # Interactive Map with Layer Control
            m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)
            for _, row in gdf.iterrows():
                folium.CircleMarker(
                    location=[row["centroid"].y, row["centroid"].x],
                    radius=5,
                    popup=row.get("name", "Location"),
                    color="blue"
                ).add_to(m)
            folium.LayerControl().add_to(m)
            m.save(os.path.join(data_dir, f"interactive_map_{i}.html"))
            logging.info("Interactive map with layer control saved.")
        
        except Exception as e:
            logging.error(f"Error saving visualizations for GDF {i}: {e}")

# Time series analysis
def time_series_analysis(gdfs):
    """Analyze data over time if temporal data is available."""
    for gdf in gdfs:
        if "timestamp" in gdf.columns:
            try:
                gdf["timestamp"] = pd.to_datetime(gdf["timestamp"])
                time_series = gdf.set_index("timestamp").resample("M").mean()
                time_series.plot(title="Temporal Trends", xlabel="Time", ylabel="Value")
                plt.savefig(os.path.join(data_dir, "time_series_analysis.png"))
                logging.info("Time series analysis plot saved.")
            except Exception as e:
                logging.error("Error in time series analysis: " + str(e))

# Cleanup log file if empty
def cleanup_log():
    try:
        if os.path.exists(log_file_path) and os.path.getsize(log_file_path) == 0:
            os.remove(log_file_path)
            logging.info("Log file deleted as no issues were found.")
    except Exception as e:
        logging.error("Error cleaning up log file: " + str(e))

# Main execution
initialize_log()
files = scan_and_extract_files()
gdfs = load_geospatial_data(files)
advanced_analysis(gdfs)
save_visualizations_and_reports(gdfs)
time_series_analysis(gdfs)
cleanup_log()


Error initializing log: [WinError 32] The process cannot access the file because it is being used by another process: 'E:\\SentinelNO2-Detection\\Geo_Data_Downloading\\data\\file_log.txt'
