In [1]:
import pickle
from glob import glob

import numpy as np
import pandas as pd

In [2]:
import datetime

def extract_date_from_file_name(FILE_NAME):
    return datetime.datetime.strptime(FILE_NAME.split('/')[-1].split('.')[1:][0][1:], "%Y%j")

In [3]:
import configparser


config = configparser.ConfigParser()
config.read("./config.ini")

PATH = config['path']['hdf_path_2020'] + "*"
FILE_LIST = glob(PATH)

In [4]:
pkl_obj = pickle.load(open("mumbai_geodata.pkl", "rb"))
MUMBAI_B_MIN_LON, MUMBAI_B_MAX_LON = pkl_obj['min_lon'], pkl_obj['max_lon']
MUMBAI_B_MIN_LAT, MUMBAI_B_MAX_LAT = pkl_obj['min_lat'], pkl_obj['max_lat']
boundary = pkl_obj['boundary']
mask = pkl_obj['mumbai_mask'] 

SDS_NAME = "Optical_Depth_047"

In [5]:
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm

def plot_mumbai_aod_map(name, scaled_data, date_time, save=False, should_mask=True):

    add_text = ""
    
    plt.figure(figsize=(20, 10))

    pad = 0.01

    m = Basemap(projection='cyl', resolution='l', llcrnrlat=MUMBAI_B_MIN_LAT-pad, urcrnrlat=MUMBAI_B_MAX_LAT+pad, llcrnrlon=MUMBAI_B_MIN_LON-pad, urcrnrlon=MUMBAI_B_MAX_LON+pad , suppress_ticks=True)

    m.readshapefile("../Municipal_Spatial_Data/Mumbai/Mumbai",'Mumbai', linewidth=1)

    m.drawparallels(np.arange(-90., 120., 0.05), labels=[1, 0, 0, 0], fontsize=16)
    m.drawmeridians(np.arange(-180., 181., 0.05), labels=[0, 0, 0, 1],  fontsize=16)
    x, y = m(mcd19a2_longitude[mask], mcd19a2_latitude[mask])

    if should_mask:
        sc = m.scatter(x, y, c=scaled_data[mask], s=250, cmap=plt.cm.jet,
                        edgecolors='white', linewidth=0, marker='s')
    else:
        add_text = "_interpolated"
        sc = m.scatter(x, y, c=scaled_data, s=250, cmap=plt.cm.jet,
                        edgecolors='white', linewidth=0, marker='s')

    cb = m.colorbar()
    cb.set_label('AOD', size=18)
    cb.ax.tick_params(labelsize=16)

    plotTitle = name[:-4]
    plt.title('{0}\n {1}\n Mumbai, India\n {2}'.format(plotTitle, SDS_NAME, date_time.strftime("%Y-%m-%d")), fontsize=24)
    fig = plt.gcf()

#     plt.show()
    
    if save:
        pngfile = 'images/{0}{1}.png'.format(plotTitle, add_text)
        fig.savefig(pngfile, bbox_inches="tight", dpi=400)

In [6]:
def hypotenuse_distance(x, y):
    return np.sqrt(x**2 + y**2)

In [7]:
def model_predict(X, regressor, scaler):    
    if scaler:
        try:
            sc_X, sc_y = scaler
            y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X)))
        except Exception as e:
            poly_reg = scaler
            y_pred = regressor.predict(poly_reg.transform(X))
    else:
        y_pred = regressor.predict(X)
    distances = np.array(list(map(lambda x: hypotenuse_distance(x[1], x[2]), X)))
    return np.array(list(zip(y_pred, distances)))

In [8]:
def get_weighted_value(y):
    weights = 1 / pow(y[:,1], 2)
    return sum(weights * y[:, 0]) / sum(weights)
    
def mix(y_list):
    return np.array(list(map(lambda x: get_weighted_value(x), y_list)))

In [9]:
def get_nearest_3x3_grid(data, x, y):
    
    if x < 1:
        x += 1
    if x > data.shape[0]-2:
        x -= 2
    if y < 1:
        y += 1
    if y > data.shape[1]-2:
        y -= 2  
    
    three_by_three = data[x-1:x+2,y-1:y+2]
    three_by_three = three_by_three.astype(float)
    
    not_nans = np.count_nonzero(~np.isnan(three_by_three))
    
    if not_nans == 0:
        return {
            "x": x,
            "y": y,
        }
    else:
        three_by_three_average = np.nanmean(three_by_three)
        three_by_three_std = np.nanstd(three_by_three)
        three_by_three_median = np.nanmedian(three_by_three)
        
        return {
            "x": x,
            "y": y,
            "data": three_by_three,
            "average": three_by_three_average,
            "std": three_by_three_std,
            "median": three_by_three_median
        }    

In [10]:
MUMBAI_GEO_OBJ = pickle.load(open("./mumbai_geodata.pkl", "rb"))
MUMBAI_MASK = MUMBAI_GEO_OBJ['mumbai_mask']
MUMBAI_COORDS = MUMBAI_GEO_OBJ['mumbai_coords']

DECIMAL_PLACES = 7
PROCESS_ML_DATA = True

CITY = "Mumbai"

In [11]:
india_stations_df = pd.read_pickle('../2015-2020-pm25/india_stations.pkl')
stations = india_stations_df[india_stations_df['City'] == CITY]['StationId'].values

mcd19a2_obj = pickle.load(open("./mcd19a2.pkl", "rb"))
mcd19a2_longitude, mcd19a2_latitude = mcd19a2_obj['longitude'], mcd19a2_obj['latitude']

In [12]:
model_dir = "spt_models"

import warnings
warnings.filterwarnings("ignore")

In [13]:
from pyhdf import SD

def perform_task(idx):
    
    empty_flag = 0
    
    FILE_NAME = FILE_LIST[idx]
    NAME = FILE_NAME.split('/')[-1]
    date_time = extract_date_from_file_name(FILE_NAME)
    
    # ------------------ Data Loading Begin -------------------------

    hdf = SD.SD(FILE_NAME)
    sds = hdf.select(SDS_NAME)
    data = sds.get()

    attributes = sds.attributes()
    scale_factor = attributes['scale_factor']
    fv = attributes['_FillValue']

    data = data.astype(float)
    data[data == fv] = np.nan
    data = np.nanmean(data, axis=0)

    scaled_data = data * scale_factor
    
    # ------------------ Data Loading End -------------------------

    plot_mumbai_aod_map(NAME, scaled_data, date_time, save=True);
    
    # ------------------ Interpolation Begin -------------------------

    y_sp_interpolate = []

    for i, station_id in enumerate(stations):
        best = pickle.load(open(f"{model_dir}/{station_id}_spt.pkl", "rb"))

        fix_station_lat, fix_station_lon = best['station_id']['nearest_lat'], best['station_id']['nearest_lon']
        x_coord = best['station_id']['x_coord']
        y_coord = best['station_id']['y_coord']

        regressor = best['regressor']
        scaler = best['scaler']

        try:
            fix_station_aod = get_nearest_3x3_grid(scaled_data, x_coord, y_coord)['average'].round(3)
            X_sp_interpolate_sub = np.array(list(zip([fix_station_aod for k in range(MUMBAI_COORDS.shape[0])],
                             (fix_station_lat - MUMBAI_COORDS[:,0]),
                             (fix_station_lon - MUMBAI_COORDS[:,1]),
                            )
                        ))

            y_sp_interpolate_sub = model_predict(X_sp_interpolate_sub, regressor, scaler)
            y_sp_interpolate.append(y_sp_interpolate_sub)
        except KeyError :
            pass
        
    try:
        y_sp_interpolate = np.array(y_sp_interpolate)
        y_sp_interpolate = y_sp_interpolate.transpose(1, 0, 2)
        y_sp_interpolate = mix(y_sp_interpolate).round(3)
        y_sp_interpolate = y_sp_interpolate.reshape(-1,1)
        plot_mumbai_aod_map(NAME, y_sp_interpolate, date_time, save=True, should_mask=False);
    except:
        pass

In [14]:
import multiprocessing as mp

pool = mp.Pool(mp.cpu_count())

pool.map(perform_task, [idx for idx in range(len(FILE_LIST))])
pool.close()