In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

import sys
try:
    import cartopy.crs as ccrs
except ImportError:
    pass

if 'cartopy.crs' not in sys.modules:
    get_cartopy = False
    print('You have not imported the {} module'.format('cartopy.crs'))
else:
    get_cartopy = True

You have not imported the cartopy.crs module


In [1]:
class Observer:
    """
    Container for methods of collecting satellite measurements and regridding them to match simulation grid.
    """

    def __init__(
        self,
        emulator,
        start_date=(2017, 7, 1),
        end_date=(2017, 7, 1),
        meanTargetName='AOD_550_Dark_Target_Deep_Blue_Combined_Mean',
        sdTargetName='AOD_550_Dark_Target_Deep_Blue_Combined_Standard_Deviation'
    ):
        """
        Arguments
        
        emulator : Emulator
            The emulator contains parameters which are useful for defining the region from which measurements are desired.
        date : tuple
            Date of measurements to use.
        meanTargetName : string
            AOD measurements
        sdTargetName : string
            AOD variance


        Value

        None
        """

        # Collect useful parameters for defining the region of interest
        self.emulator = emulator

        self.lat1 = emulator.lat1
        self.lat2 = emulator.lat2
        self.lon1 = emulator.lon1
        self.lon2 = emulator.lon2

        self.start_year, self.start_month, self.start_day = start_date
        self.end_year, self.end_month, self.end_day = end_date        

        # Keep names of variables to collect
        self.meanTargetName = meanTargetName
        self.sdTargetName = sdTargetName

        # Find relevant files for the date
        self.obs_files = c3.FileSystem.inst().listFiles("azure://modis-daily-level3/").files
        self.file_urls = [file.url for file in self.obs_files]

        # Collect netCDF4 samples
        self.samples, self.urls = self.__get_samples__(*start_date, *end_date)
        names = [url[27:30] + url[41:44] for url in self.urls]

        # Produce as pandas Data Frames
        self.datasets = dict(zip(
            names,
            [
                self.__get_dataset__(sample, url) for (sample, url) in zip(self.samples, self.urls)
            ]
        ))

        self.rgdatasets = dict(zip(
            names,
            [
                self.regrid(name) for name in names
            ]
        ))

        # Get max mean for plotting purposes
        all_dfs = pd.concat(
            self.rgdatasets.values(), axis=0
        ).reset_index(
            drop=True
        )
        self.max_mean = np.max(all_dfs.meanResponse)

        return


    def regrid(
        self,
        whichSet,
        method="nearest"
    ):
        """
        MOD = Terra, ~10:30am, MYD = Aqua, ~1:30pm local time

        Arguments

        instrument : string
            "Terra" or "Aqua."
        method : string
            Consider "nearest" or "cubic."


        Value

        pandas DataFrame
            Regridded measurements. 
        """

        df = self.datasets[whichSet]

        points = np.array(df.loc[:, 'latitude':'longitude'])
        mean = np.array(df.loc[:, 'meanResponse'])
        sd = np.array(df.loc[:, 'sdResponse'])

        # Define the points of the regrid-onto mesh
        x_range = [
            x
            for x in [0.9375 + (2.8125 - 0.9375)*k for k in range(-24, 21)]
            if x >= self.lon1 and x <= self.lon2
        ]
        y_range = [
            y
            for y in [0.625 + (1.875 - 0.625)*k for k in range(-24, 8)]
            if y >= self.lat1 and y <= self.lat2
        ]

        # Create the target mesh
        xi = np.array([[y, x] for x in x_range for y in y_range])
        dfg = pd.DataFrame(xi, columns=['latitude', 'longitude'])
        dfg['time'] = np.median(df['time']) - (1 + 1/6)

        # Perform the regridding
        dfg['meanResponse'] = griddata(points, mean, xi, method=method)
        dfg['sdResponse'] = griddata(points, sd, xi, method=method)

        return dfg



    """
    Visualization
    """



    def plot_observations(
        self,
        missingness=False,
        folder=None,
        save=False
    ):
        """
        """

        BBox = [self.lon1, self.lon2, self.lat1, self.lat2]

        for whichSet, df in self.rgdatasets.items():

            data = df[
                (df.longitude >= BBox[0]) &
                (df.longitude <= BBox[1]) &
                (df.latitude >= BBox[2]) &
                (df.latitude <= BBox[3])
            ]

            projection = ccrs.PlateCarree(central_longitude=0)
            fig = plt.figure(figsize=(10,10), facecolor='yellow')

            # Draw island
            ax = fig.add_subplot(1, 1, 1, projection=projection)
            ax.coastlines()

            # Produce gridlines, coordinate labels
            ax.set_extent(BBox, ccrs.PlateCarree())
            ax.gridlines(draw_labels=True, crs=projection)

            scatter_sd = ax.scatter(
                data.longitude, data.latitude, zorder=1, alpha=0.1, c='k',
                s=data.sdResponse*10000
            )

            # Add points along flight path and color for altitude
            scatter = ax.scatter(
                data.longitude, data.latitude, zorder=1, alpha=1,
                c=data.meanResponse, cmap="Reds",
                vmax=self.max_mean
            )

            cbar = plt.colorbar(scatter, shrink=0.4)
            cbar.set_label(self.meanTargetName)

            plt.title(whichSet)

            # plt.text(180, -90, url[-16:-3], fontsize='xx-large', ha='right', va='bottom', color='red')

            # folder='deep_blue_combined_mean/'        
            # img_title = folder + variable_name + str(url[-16:-3])
            if save:
                plt.savefig(folder + str(whichSet))
            plt.show()

        return



    """
    Helper functions
    """



    def __get_samples__(
        self,
        start_year=2017,
        start_month=7,
        start_day=1,
        end_year=2017,
        end_month=7,
        end_day=7,
    ):
        """
        Collect netCDF4 samples for the given date.

        Value

        tuple of lists
            First, the selected netCDF4 samples. Second, the urls to find them again.
        """
        # Get day of year
        from datetime import date

        my_start_date = date(start_year, start_month, start_day)
        my_end_date = date(end_year, end_month, end_day)
        days_of_the_year = list(range(
            my_start_date.timetuple().tm_yday,
            my_end_date.timetuple().tm_yday + 1
        ))
        days_of_the_year = [str(day) for day in days_of_the_year]

        # Get sample(s) from that day
        my_urls = [url for url in self.file_urls if url[41:44] in days_of_the_year]
        samples = [c3.NetCDFUtil.openFile(my_url) for my_url in my_urls]

        return (samples, my_urls)


    def __get_dataset__(
        self,
        sample,
        url
    ):
        """
        Value

        pandas DataFrame
        """

        # Design the data frame used to carry measurements
        lat = sample["latitude"][:].data
        lon = sample["longitude"][:].data

        data = pd.DataFrame()
        data["latitude"] = [l for l in lat for n in range(0, len(lon))]
        data["longitude"] = [l for l in lon]*len(lat)

        data["time"] = self.__get_hours_from_url__(url)

        meanResponse = sample[self.meanTargetName][:].data
        sdResponse = sample[self.sdTargetName][:].data

        data["meanResponse"] = meanResponse.flatten()
        data["sdResponse"] = sdResponse.flatten()

        data.loc[data["meanResponse"] < 0, "meanResponse"] = float('NaN')
        data.loc[data["sdResponse"] < 0, "sdResponse"] = float('NaN')

        data = data.loc[
            (data.latitude >= self.lat1) &
            (data.latitude <= self.lat2) &
            (data.longitude >= self.lon1) &
            (data.longitude <= self.lon2)
        ].reset_index(drop=True)
        
        return data


    def __get_hours_from_url__(
        self,
        url
    ):
        day = url[41:44]
        instrument = url[27:30]
        hours = float(day)*24 + (instrument=='MOD')*10.5 + (instrument=='MYD')*13.5
        return hours