In [11]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import xarray as xr
from collections import Counter



def load_landcover(path):
    landcover = xr.open_dataset(path)
    
    # drop all the dims except lat and lon
    landcover_df = landcover.drop_dims(['Num_IGBP_Classes_MOD12C1','Num_UMD_Classes_MOD12C1','Num_LAI_FPAR_Classes_MOD12C1','latbnd','lonbnd'])
    landcover_df = landcover_df.to_dataframe()

    #landcover_df = landcover_df.dropna()
    #landcover_df = landcover_df.drop_duplicates()
    landcover_df = landcover_df.reset_index()


    useless_cols = ['Majority_Land_Cover_Type_2','Majority_Land_Cover_Type_2_Assessment','Majority_Land_Cover_Type_3',
                    'Majority_Land_Cover_Type_3_Assessment','Majority_Land_Cover_Type_1_Assessment']
    
    landcover_df = landcover_df.drop(columns = useless_cols)
    
    
    return landcover_df    



def load_train(H8_MOD04_path,MOD04_L2_path):
    # load H8_MOD04
    H8_MOD04 = xr.open_dataset(H8_MOD04_path)
    H8_MOD04_df = H8_MOD04.to_dataframe()
    
    
    # load MOD04_L2
    MOD04_L2 = nc.Dataset(MOD04_L2_path)

    response = MOD04_L2['AOD_550_Dark_Target_Deep_Blue_Combined'][:]
    AOD_ds = pd.Series(response,name = 'AOD_550_Dark_Target_Deep_Blue_Combined')
    AOD_df = AOD_ds.to_frame()
    
    # combine the training set 
    train_df = pd.concat([H8_MOD04_df,AOD_df],axis=1)
    
    return train_df


def build_class_file(file, landcover_path):
    '''
    TODO: comments
    '''
    # data in training set
    landcover = nc.Dataset(landcover_path)
    lat = np.array(file['lat'])
    lon = np.array(file['lon'])

    # data in landcover data set
    latbnd = landcover.variables['latbnd']
    lonbnd = landcover.variables['lonbnd']
    latbnd_arr = sorted(latbnd[:]) # grid-bnd of lat, decreasing
    lonbnd_arr = lonbnd[:]         # grid-bnd of lon, increasing
    
    
    lat_class = np.searchsorted(latbnd_arr, lat,side='left')
    ny = len(latbnd_arr)
    lat_class  = ny - np.searchsorted(latbnd_arr, lat,side='left')
    
    lon_class = np.searchsorted(lonbnd_arr,lon,side='left')
    file['lat_class'] = lat_class
    file['lon_class'] = lon_class
    
    return file


def combine_tables_by_class(train,landcover):
    '''
    TODO: comments
    '''
    result = pd.merge(train, landcover, on=['lat_class', 'lon_class'],how = 'left')
    result = result.dropna()
    
    return result

In [12]:
landcover2019_path = '/Users/sunfuhan/Desktop/capstone/MCD12C1.A2019001.006.2020220162300.nc'
landcover2019 = load_landcover(landcover2019_path)
landcover2019

Unnamed: 0,lat,lon,Majority_Land_Cover_Type_1
0,-10.025000,110.025002,0.0
1,-10.025000,110.074997,0.0
2,-10.025000,110.125000,0.0
3,-10.025000,110.175003,0.0
4,-10.025000,110.224998,0.0
...,...,...,...
631596,-45.025002,154.824997,0.0
631597,-45.025002,154.875000,0.0
631598,-45.025002,154.925003,0.0
631599,-45.025002,154.975006,0.0


In [13]:
# load training set
H8_MOD04_path = '/Users/sunfuhan/Desktop/capstone/H8_MODIS_2019-2020/H8_MOD04_L2/H8_MOD04_L2_Aus_0.05_2019-01-01.nc'
MOD04_L2_path = '/Users/sunfuhan/Desktop/capstone/MODIS_L2_Aus_2019-2020/MOD04_L2_Aus_0.05_2019-01-01.nc'
train_df = load_train(H8_MOD04_path,MOD04_L2_path)

In [14]:
# classification the lon&lat in train
train_df = build_class_file(train_df, landcover2019_path)
print(train_df.shape)
train_df.head()

(19540, 30)


Unnamed: 0,lat,lon,time,solar_azimuth_angle,solar_zenith_angle,channel_0001_brf,channel_0001_scaled_radiance,channel_0002_brf,channel_0002_scaled_radiance,channel_0003_brf,...,channel_0010_brightness_temperature,channel_0011_brightness_temperature,channel_0012_brightness_temperature,channel_0013_brightness_temperature,channel_0014_brightness_temperature,channel_0015_brightness_temperature,channel_0016_brightness_temperature,AOD_550_Dark_Target_Deep_Blue_Combined,lat_class,lon_class
0,-14.753107,131.765823,2019-01-01 01:20:00,111.947266,27.097656,0.130859,0.120117,0.115234,0.105469,0.112305,...,257.25,297.9375,278.1875,300.3125,297.9375,291.9375,276.4375,0.155,96,436
1,-16.210176,141.704361,2019-01-01 01:20:00,115.446289,17.688477,0.133789,0.131836,0.115234,0.113281,0.126953,...,256.5625,294.9375,276.75,297.0,294.3125,288.125,273.9375,0.002,125,635
2,-16.236547,141.939377,2019-01-01 01:20:00,115.623047,17.483398,0.141602,0.139648,0.12793,0.125977,0.12793,...,256.6875,294.125,276.5625,296.3125,293.4375,287.1875,273.5,-0.05,125,639
3,-16.299374,141.688354,2019-01-01 01:20:00,115.15332,17.663086,0.120117,0.119141,0.105469,0.104492,0.103516,...,256.6875,298.125,279.75,300.4375,297.625,290.875,275.6875,-0.016,126,634
4,-16.325727,141.923538,2019-01-01 01:20:00,115.327148,17.458008,0.125977,0.124023,0.112305,0.110352,0.109375,...,256.9375,298.875,280.3125,301.25,298.0,290.875,275.5,-0.05,127,639


In [15]:
# classification the lon&lat in landcover
landcover_df = build_class_file(landcover2019, landcover2019_path)
print(landcover_df.shape)
landcover_df.head()

(631601, 5)


Unnamed: 0,lat,lon,Majority_Land_Cover_Type_1,lat_class,lon_class
0,-10.025,110.025002,0.0,1,1
1,-10.025,110.074997,0.0,1,2
2,-10.025,110.125,0.0,1,3
3,-10.025,110.175003,0.0,1,4
4,-10.025,110.224998,0.0,1,5


In [16]:
train = combine_tables_by_class(train_df,landcover_df)
print(train.shape)

(19540, 33)


In [17]:
train.head()

Unnamed: 0,lat_x,lon_x,time,solar_azimuth_angle,solar_zenith_angle,channel_0001_brf,channel_0001_scaled_radiance,channel_0002_brf,channel_0002_scaled_radiance,channel_0003_brf,...,channel_0013_brightness_temperature,channel_0014_brightness_temperature,channel_0015_brightness_temperature,channel_0016_brightness_temperature,AOD_550_Dark_Target_Deep_Blue_Combined,lat_class,lon_class,lat_y,lon_y,Majority_Land_Cover_Type_1
0,-14.753107,131.765823,2019-01-01 01:20:00,111.947266,27.097656,0.130859,0.120117,0.115234,0.105469,0.112305,...,300.3125,297.9375,291.9375,276.4375,0.155,96,436,-14.775,131.774994,10.0
1,-16.210176,141.704361,2019-01-01 01:20:00,115.446289,17.688477,0.133789,0.131836,0.115234,0.113281,0.126953,...,297.0,294.3125,288.125,273.9375,0.002,125,635,-16.225,141.725006,10.0
2,-16.236547,141.939377,2019-01-01 01:20:00,115.623047,17.483398,0.141602,0.139648,0.12793,0.125977,0.12793,...,296.3125,293.4375,287.1875,273.5,-0.05,125,639,-16.225,141.925003,10.0
3,-16.299374,141.688354,2019-01-01 01:20:00,115.15332,17.663086,0.120117,0.119141,0.105469,0.104492,0.103516,...,300.4375,297.625,290.875,275.6875,-0.016,126,634,-16.275,141.675003,10.0
4,-16.325727,141.923538,2019-01-01 01:20:00,115.327148,17.458008,0.125977,0.124023,0.112305,0.110352,0.109375,...,301.25,298.0,290.875,275.5,-0.05,127,639,-16.325001,141.925003,10.0


In [18]:
train.to_csv('result_train.csv',index = False,encoding='utf_8_sig')