Monthly NO2 Concentrations in Atmosphere	cit.018	https://avdc.gsfc.nasa.gov/pub/data/satellite/Aura/OMI/V03/L3/OMNO2D_HR/OMNO2D_HRM/

In [6]:
import numpy as np
import pandas as pd
import rasterio as rio
from netCDF4 import Dataset
pd.options.display.max_rows = 10000
pd.options.display.max_colwidth = 10000

import boto3
import requests
from urllib.request import urlopen
import shutil
from contextlib import closing

from matplotlib import pyplot as plt
%matplotlib inline
import os
import sys
import threading
from glob import glob
from datetime import datetime

Remote server files

In [4]:
online_folder = "https://avdc.gsfc.nasa.gov/pub/data/satellite/Aura/OMI/V03/L3/OMNO2D_HR/OMNO2D_HRM/"

res = requests.get(online_folder)
res = res.text
res = res.split("\n")
listing = []
for line in res:
    listing.append(line.rstrip())
    
df1 = pd.DataFrame(listing)
keep = df1.apply(lambda row: "OMI_trno2_0.10x0.10" in row[0], axis=1)
df2 = df1.loc[keep] 
nc_files = df2.apply(lambda row: ".nc" in row[0], axis=1)
dat_files = df2.apply(lambda row: ".dat" in row[0], axis=1)
df_nc_long = df2.loc[nc_files]
df_dat_long = df2.loc[dat_files]

def pull_file_name(string, file_type):
    ix = string.find("a href")
    file_name = string[ix+8:ix+38+len(file_type)]
    date = file_name.split("_")[3]
    year = date[0:4]
    month = date[4:]
    return(file_name, year, month)

df_nc = pd.DataFrame(columns=["file_name", "year", "month"])
df_dat = pd.DataFrame(columns=["file_name", "year", "month"])
df_nc["file_name"], df_nc["year"], df_nc["month"] = zip(*df_nc_long.apply(lambda row: pull_file_name(row[0], "nc"), axis=1))
df_dat["file_name"], df_dat["year"], df_dat["month"] = zip(*df_dat_long.apply(lambda row: pull_file_name(row[0], "dat"), axis=1))

In [5]:
df_dat

Unnamed: 0,file_name,year,month
0,OMI_trno2_0.10x0.10_200501_V3.dat,2005,1
1,OMI_trno2_0.10x0.10_200502_V3.dat,2005,2
2,OMI_trno2_0.10x0.10_200503_V3.dat,2005,3
3,OMI_trno2_0.10x0.10_200504_V3.dat,2005,4
4,OMI_trno2_0.10x0.10_200505_V3.dat,2005,5
5,OMI_trno2_0.10x0.10_200506_V3.dat,2005,6
6,OMI_trno2_0.10x0.10_200507_V3.dat,2005,7
7,OMI_trno2_0.10x0.10_200508_V3.dat,2005,8
8,OMI_trno2_0.10x0.10_200509_V3.dat,2005,9
9,OMI_trno2_0.10x0.10_200510_V3.dat,2005,10


Download Local Files

In [7]:
local_folder = "/Users/nathansuberi/Desktop/RW_Data/Rasters/no2concentrations/"

try:
    os.mkdir(local_folder)
except FileExistsError:
    print("Folder already exists")

most_recent = df_dat.iloc[-1]["file_name"]

print(most_recent)

local_orig = local_folder + most_recent

with(closing(urlopen(online_folder + most_recent))) as r:
    with(open(local_orig, 'wb')) as f:
        shutil.copyfileobj(r, f)

Folder already exists
OMI_trno2_0.10x0.10_201701_V3.dat


In [48]:
local_edit = local_orig[:-4] + "_edit.tif"

with rio.open(local_orig, 'r') as src:
   
    data = src.read()[0]
    
    rows = data.shape[0]
    columns = data.shape[1]
    
    # Return lat info
    south_lat = -90
    north_lat = 90

    # Return lon info
    west_lon = -180
    east_lon = 180
    
    print(rows)
    print(columns)
    
    transform = rasterio.transform.from_bounds(west_lon, south_lat, east_lon, north_lat, columns, rows)
    # Profile
    profile = {
        'driver':'GTiff', 
        'height':rows, 
        'width':columns, 
        'count':1, 
        'dtype':np.float32, 
        'crs':'EPSG:4326', 
        'transform':transform, 
        'compress':'lzw', 
        'nodata': -1
    }
    
    with rio.open(local_edit, "w", **profile) as dst:
        dst.write(data.astype(profile["dtype"]), 1)

1800
3600


Define s3 location based on most recent observation

In [34]:
file_name = df_dat.iloc[-1].file_name
year = file_name[20:24]
month = file_name[24:26]

s3_upload = boto3.client("s3")
s3_download = boto3.resource("s3")

s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/raster/*"

s3_file = "*.tif"

s3_key_orig = s3_folder + s3_file
s3_key_edit = s3_key_orig[0:-4] + "_edit.tif"


class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write(
                    "\r%s  %s / %s  (%.2f%%)" % (
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [52]:
# Original
s3_upload.upload_file(local_orig, s3_bucket, s3_key_orig,
                         Callback=ProgressPercentage(local_orig))

# Edit
s3_upload.upload_file(local_edit, s3_bucket, s3_key_edit,
                         Callback=ProgressPercentage(local_edit))

/Users/nathansuberi/Desktop/RW_Data/Rasters/no2concentrations/OMI_trno2_0.10x0.10_201701_V3_edit.tif  20434453 / 20434453.0  (100.00%)