In [1]:
import os
import numpy as np

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from random import randint,shuffle
from glob import glob

import sys 
sys.path.append(os.path.join(os.path.expanduser("~"),'station2grid'))
from tools import CustomKNN,plotMap

home=os.path.expanduser("~")

# grid2code

## train

In [None]:

domain,k,weights='air',5,'distance'
single='domain_air-k_5-weightKNN_distance'
grid_paths=glob(os.path.join(home,'station2grid','datasets','npy',domain,single,'grid','*'))
grid_paths[:3]

In [None]:
train_paths,test_paths=train_test_split(grid_paths,test_size=0.2, random_state=42)
len(grid_paths),len(train_paths),len(test_paths)


In [None]:
norm_const=100

In [None]:
def generator_autoencoder(grid_paths,batch_size=2):
    while True:
        batch_x=[]
        for i in range(batch_size):
            j=randint(0,len(grid_paths)-1)
            path=grid_paths[i]
            one_dt = np.load(path)
            one_dt_pm25 = one_dt[...,:1]
            one_dt_pm25_norm = one_dt_pm25 / norm_const
            one_dt_pm25_norm = one_dt_pm25_norm.astype('float')
            batch_x.append(one_dt_pm25_norm)
        batch_x=np.concatenate(batch_x,axis=0)
        yield batch_x,batch_x

        
g_train=generator_autoencoder(train_paths,batch_size=2)
batch_x,batch_y=next(g_train)
batch_x.shape

In [None]:
input_shape = (348, 204, 1)
input_img = Input(shape=input_shape)  

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(4, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

x = Conv2D(4, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)


# autoencoder
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)


dir_path=os.path.join(home,'station2grid','weights','single',single,'grid2code','test')
os.makedirs(dir_path,exist_ok=True)


file_name="g2c-epoch_{epoch:02d}-val_loss_{val_loss:.3f}.hdf5"
path=os.path.join(dir_path,file_name)
checkpointer = ModelCheckpoint(filepath=path, verbose=1, monitor='val_loss', save_best_only=True, mode='min')


In [None]:
batch_size=2
g_train=generator_autoencoder(train_paths,batch_size=batch_size)
g_test=generator_autoencoder(test_paths,batch_size=batch_size)

epochs = 10

history = autoencoder.fit_generator(
    generator = g_train,
    steps_per_epoch = (len(train_paths) // batch_size),
    
    validation_data = g_test,
    validation_steps = (len(test_paths) // batch_size),
    
    epochs = epochs,
    verbose = 1,
    callbacks=[early_stopping,checkpointer],
    
    use_multiprocessing = True,
    workers = 8,
    max_queue_size = 10,
)

In [None]:
# history.history

In [None]:
# plot
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('autoencoder', fontsize=30)
plt.ylabel('cross entropy', fontsize=20)
plt.xlabel('epoch', fontsize=20)
plt.legend(['train', 'val'], loc='upper right')
plt.show()

## test

In [None]:
dir_path

In [None]:
path=os.path.join(dir_path,'grid2code*')
path_list = glob(path)
path_list.sort()
path=path_list[-1]
print(path)

# autoencoder
autoencoder_best = load_model(path)
autoencoder_best.summary()
autoencoder_best.layers[6]

# encoder
encoder_best = Model(autoencoder_best.input, autoencoder_best.layers[6].output)
encoder_best.summary()

# decoder
input_decoder = Input(shape=(44, 26, 4))
x = autoencoder_best.layers[-7](input_decoder) 
x = autoencoder_best.layers[-6](x) 
x = autoencoder_best.layers[-5](x) 
x = autoencoder_best.layers[-4](x) 
x = autoencoder_best.layers[-3](x) 
x = autoencoder_best.layers[-2](x) 
output_decoder = autoencoder_best.layers[-1](x) 
decoder_best = Model(input_decoder, output_decoder)
decoder_best.summary()

In [None]:
path = grid_paths[2]
print(path)
one_dt = np.load(path)
one_dt_pm25 = one_dt[:,:,:,-1:]
one_dt_pm25_norm = one_dt_pm25 / norm_const
one_dt_pm25_norm.shape

In [None]:
encode_path = path.replace('_grid','_encode')

In [None]:
one_ts_grid_pm25_norm_encode = encoder_best.predict(one_ts_grid_pm25_norm)
one_ts_grid_pm25_norm_encode.shape

## script

### 1st

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from random import randint,shuffle
from glob import glob
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D,BatchNormalization,Activation
from keras.models import Model,load_model
from keras import backend as K
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils import Sequence
home=os.path.expanduser("~")
import pandas as pd


In [None]:
class Grid2Code():
    def __init__(self,opt):
        self.opt=opt
        self.single='domain_%s-k_%s-weightKNN_%s'%(opt.domain,opt.k,opt.weightKNN)
        self.batch_size=opt.batch_size
        self.domain=opt.domain
        self.n_epochs = opt.n_epochs
        self.autoencoderArcht=opt.autoencoderArcht
        
        self.grid_paths=self.get_paths()
        self.trainPaths,self.valPaths=train_test_split(self.grid_paths,test_size=0.2, random_state=42)
        self.autoencoder=self.get_autoencoder()
        self.weight_dir=self.get_weight_dir()
        
    def get_autoencoder(self):
        input_shape = (348, 204, 1)
        input_img = Input(shape=input_shape)  

        x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(4, (3, 3), activation='relu', padding='same')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(4, (3, 3), activation='relu', padding='same')(encoded)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(16, (3, 3), activation='relu')(x)
        x = UpSampling2D((2, 2))(x)
        decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

        # autoencoder
        autoencoder = Model(input_img, decoded)
        autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
        
        autoencoder.summary()
        return autoencoder
    
    def train(self): 
        early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3, min_delta=0.01)
        checkpointer=self.get_checkpointer()
        
        g_train=self.get_generator(self.trainPaths)
        g_val=self.get_generator(self.valPaths)
        
        print('training...')
        history = self.autoencoder.fit_generator(
            generator = g_train,
            steps_per_epoch = (len(self.trainPaths) // self.batch_size),

            validation_data = g_val,
            validation_steps = (len(self.valPaths) // self.batch_size),

            epochs = self.n_epochs,
            verbose = 0,
            callbacks=[early_stopping,checkpointer],

            use_multiprocessing = True,
            workers = 8,
            max_queue_size = 10,
        )
        print('finish!')
        
        df_history=pd.DataFrame(history.history)
        path=os.path.join(self.weight_dir,'history.csv',)
        df_history.to_csv(path,index=False)
        
    def test(self): 
        path=os.path.join(self.weight_dir,'grid2code*')
        weights = glob(path)
        weights.sort()
        weight=weights[-1]
        
        print('testing...')
        autoencoder_best = load_model(weight)
        encoder_best = Model(autoencoder_best.input, autoencoder_best.layers[6].output)
        
        code_dir = self.grid_paths[0].split('/')[:-2]
        code_dir=os.path.join('/'.join(code_dir),'code',self.autoencoderArcht)
#         print(code_dir)
        os.makedirs(code_dir,exist_ok=True)
        
        for grid_path in self.grid_paths[:]:
            one_dt_pm25_norm=self.gridPath2arr(grid_path)
            one_dt_pm25_norm_encode = encoder_best.predict(one_dt_pm25_norm)
            encode_path=os.path.join(code_dir,grid_path.split('/')[-1].replace('grid','code'))
#             print(encode_path)
            np.save(encode_path,one_dt_pm25_norm_encode)

        print('finish!')
        
    def get_paths(self):
        grid_paths=glob(os.path.join(home,'station2grid','datasets','npy',self.domain,self.single,'grid','*'))
        return grid_paths
    
    def gridPath2arr(self,path):
        one_dt = np.load(path)
        one_dt_pm25 = one_dt[...,:1]
        one_dt_pm25_norm = self.normalize(one_dt_pm25)
        return one_dt_pm25_norm
    
    def normalize(self,one_dt_pm25):
        norm_const=100
        one_dt_pm25_norm = one_dt_pm25 / norm_const
        one_dt_pm25_norm = one_dt_pm25_norm.astype('float')
        return one_dt_pm25_norm
    
    def denormalize(self,arr):
        norm_const=100
        return arr*norm_const
    
    def get_generator(self,grid_paths):
        while True:
            batch_x=[]
            for i in range(self.batch_size):
                j=randint(0,len(grid_paths)-1)
                grid_path=grid_paths[i]
                one_dt_pm25_norm=self.gridPath2arr(grid_path)
                batch_x.append(one_dt_pm25_norm)
            batch_x=np.concatenate(batch_x,axis=0)
            yield batch_x,batch_x
    
    def get_weight_dir(self):
        weight_dir=os.path.join(home,'station2grid','weights','single',self.single,'grid2code',self.autoencoderArcht)
        os.makedirs(weight_dir,exist_ok=True)
        return weight_dir
    
    def get_checkpointer(self):
        file_name="grid2code-epoch_{epoch:02d}-val_loss_{val_loss:.3f}.hdf5"
        path=os.path.join(self.weight_dir,file_name)
        checkpointer = ModelCheckpoint(filepath=path, verbose=1, period=1,monitor='val_loss', save_best_only=True, mode='min')
        return checkpointer
    

        

### 2nd

In [None]:
from models.networks import AE
from tools.data_generator import DataGenerator
import pandas as pd

class Grid2Code():
    def __init__(self, opt):
        self.opt = opt
        self.ae = AE(opt)
        self.dataGenerator = DataGenerator(opt)
        self.setup_weight_dir()
    
    def train(self):
        print('training...')
        dataGenerator = self.dataGenerator
        g_train = dataGenerator.generator_train
        g_valid = dataGenerator.generator_valid
        
        self.autoencoder = self.ae.define_ae()
        
        callbacks = self.ae.get_callbacks(self.weight_dir)
        
        history = self.autoencoder.fit_generator(
            generator = g_train,
            steps_per_epoch = (len(dataGenerator.x_train_paths) // self.opt.batch_size),

            validation_data = g_valid,
            validation_steps = (len(dataGenerator.x_valid_paths) // self.opt.batch_size),

            epochs = self.opt.n_epochs,
            verbose = 0,
            callbacks = callbacks,

            use_multiprocessing = True,
            workers = 8,
            max_queue_size = 10,
        )
        
        self.save_history(history)
        print('finish!')
        
    def test(self): 
        print('testing...')
        dataGenerator = self.dataGenerator
        weights = sorted(glob(os.path.join(self.weight_dir, '*hdf5')))
        weight = weights[-1]
        
        encoder_best = self.ae.get_encoder(weight)
        
        code_dir = os.path.join(dataGenerator.base_dir, 'code', self.opt.ae_type)
        os.makedirs(code_dir, exist_ok=True)
        
        grid_paths = dataGenerator.get_paths('grid')
        for grid_path in grid_paths[:]:
            grid = dataGenerator.grid_path2arr(grid_path)
            code = encoder_best.predict(grid)
            code_name = grid_path.split('/')[-1].replace('grid', 'code')
            code_path = os.path.join(code_dir, code_name)
            np.save(code_path, code)
        print('finish!')
        
    
    def save_history(self, history):
        df_history = pd.DataFrame(history.history)
        path = os.path.join(self.weight_dir, 'history.csv',)
        df_history.to_csv(path, index=False)
        
    def setup_weight_dir(self):
        opt = self.opt
        source = 'domain_%s-k_%s-weightKNN_%s'%(opt.domain, opt.k, opt.weightKNN)
        self.weight_dir = os.path.join(home, 'station2grid', 'weights', 'single', source, opt.model_name, opt.ae_type)
        os.makedirs(self.weight_dir, exist_ok=True)
        
        

In [None]:
class Opt():
    def __init__(self, domain='air', k=5, weightKNN='distance', ae_type='A1', batch_size=2, n_epochs=10, model_name='grid2code', features='pm25_pm10', val_stations='Shilin_Guting'):
        self.domain = domain
        self.k = k
        self.weightKNN = weightKNN
        self.ae_type = ae_type
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.model_name = model_name
        self.features = features
        self.val_stations = val_stations

opt=Opt('air', 5, 'distance', 'A1', 2, 10, 'grid2code')


In [None]:
grid2code = Grid2Code(opt)


In [None]:
grid2code.train()

In [None]:
grid2code.test()


# station2code

## 1st

In [None]:
class Station2Code():
    def __init__(self,opt):
        self.opt=opt
        self.single='domain_%s-k_%s-weightKNN_%s'%(opt.domain,opt.k,opt.weightKNN)
        self.batch_size=opt.batch_size
        self.domain=opt.domain
        self.n_epochs = opt.n_epochs
        self.autoencoderArcht=opt.autoencoderArcht
        ### 
        self.features=opt.features
        self.valStations=opt.valStations
        self.codeLength=opt.codeLength
        self.domainFeatures=pd.read_csv(os.path.join(home,'station2grid','datasets','info','%s-features.csv'%(self.domain)))
        self.epaStationInfo=pd.read_csv(os.path.join(home,'station2grid','datasets','info','epa-station-info.csv'))
        self.n_features=len(self.features.split('_'))
        self.n_valStations=len(self.valStations.split('_'))
        self.iFeatures=[self.domainFeatures.feature.tolist().index(feature) for feature in self.features.split('_')]
        self.iStations=self.epaStationInfo[~self.epaStationInfo.SiteEngName.isin(self.valStations.split('_'))].index.tolist()
        
        self.station_paths=self.get_paths('station')
        self.trainPaths,self.valPaths=train_test_split(self.station_paths,test_size=0.2, random_state=42)
        self.fnn=self.get_fnn()
        self.weight_dir=self.get_weight_dir()
        
    def get_fnn(self):
        num_station = 73-self.n_valStations
        num_encoded = self.codeLength #4576
        input_ = Input(shape=(num_station,self.n_features))
        x = Flatten()(input_)
        x = Dense(500, activation='relu')(x)
        x = Dense(2500, activation='relu')(x)
        output_ = Dense(num_encoded, activation='linear')(x)
        model = Model(input_, output_)
        model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])
        model.summary()
        return model
    
    def train(self): 
        early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3) # , min_delta=0.01
        checkpointer=self.get_checkpointer()
        
        g_train=self.get_generator(self.trainPaths)
        g_val=self.get_generator(self.valPaths)
        
        print('training...')
        history = self.fnn.fit_generator(
            generator = g_train,
            steps_per_epoch = (len(self.trainPaths) // self.batch_size),

            validation_data = g_val,
            validation_steps = (len(self.valPaths) // self.batch_size),

            epochs = self.n_epochs,
            verbose = 0,
            callbacks=[early_stopping,checkpointer],

            use_multiprocessing = True,
            workers = 8,
            max_queue_size = 10,
        )
        print('finish!')
        
        df_history=pd.DataFrame(history.history)
        path=os.path.join(self.weight_dir,'history.csv',)
        df_history.to_csv(path,index=False)
    
    def get_generator(self,station_paths):
        while True:
            batch_x=[]
            batch_y=[]
            for i in range(self.batch_size):
                j=randint(0,len(station_paths)-1)
                ### x
                station_path=station_paths[i]
                one_dt_station=self.stationPath2arr(station_path)
                batch_x.append(one_dt_station)
                ### y
                code_path=self.stationPath2codePath(station_path)
                one_dt_code=self.codePath2arr(code_path)
                batch_y.append(one_dt_code)
                
            batch_x=np.concatenate(batch_x,axis=0)
            batch_y=np.concatenate(batch_y,axis=0)
            yield batch_x,batch_y
    
        
    def get_paths(self,fileType):
        if fileType in ['station','grid']:
            paths=glob(os.path.join(home,'station2grid','datasets','npy',self.domain,self.single,fileType,'*'))
        else:
            paths=glob(os.path.join(home,'station2grid','datasets','npy',self.domain,self.single,self.autoencoderArcht,fileType,'*'))
        return paths
    
    def stationPath2codePath(self,stationPath):
        code_path=stationPath.split('/')
        code_path=os.path.join('/'.join(code_path[:-2]),'code',self.autoencoderArcht,code_path[-1].replace('station','code'))
        return code_path       
    
    def stationPath2arr(self,path):
        one_dt = np.load(path)
        one_dt_pm25 = one_dt[:,self.iStations,:][...,self.iFeatures]
        one_dt_pm25_norm = self.normalize(one_dt_pm25)
        return one_dt_pm25_norm
    
    def codePath2arr(self,path):
        one_dt=np.load(path)
        one_dt=one_dt.reshape(1,-1)
        return one_dt
    
    def normalize(self,one_dt_pm25):
        norm_const=100
        one_dt_pm25_norm = one_dt_pm25 / norm_const
        one_dt_pm25_norm = one_dt_pm25_norm.astype('float')
        return one_dt_pm25_norm
    
    def get_weight_dir(self):
        weight_dir=os.path.join(home,'station2grid','weights','single',self.single,'station2code',self.autoencoderArcht,str(self.n_valStations),self.valStations,self.features)
        os.makedirs(weight_dir,exist_ok=True)
        return weight_dir
    
    def get_checkpointer(self):
        file_name="station2code-epoch_{epoch:02d}-val_loss_{val_loss:.3f}.hdf5"
        path=os.path.join(self.weight_dir,file_name)
        checkpointer = ModelCheckpoint(filepath=path, verbose=1, period=1,monitor='val_loss', save_best_only=True, mode='min')
        return checkpointer

## 2nd

In [None]:
from models.networks import AE, FCNN
from tools.data_generator import DataGenerator
import pandas as pd

class Station2Code():
    def __init__(self, opt):
        self.opt = opt
        self.fcnn = FCNN(opt)
        self.dataGenerator = DataGenerator(opt)
        
        self.n_val_stations = len(self.opt.val_stations.split('_')) ###
        self.n_features = len(self.opt.features.split('_')) ###
        self.setup_weight_dir()
    
    def train(self):
        print('training...')
        dataGenerator = self.dataGenerator
        g_train = dataGenerator.generator_train
        g_valid = dataGenerator.generator_valid
        
        self.s2c_model = self.fcnn.define_fcnn(self.n_val_stations, self.n_features, self.opt.code_length)
        
        callbacks = self.s2c_model.get_callbacks(self.weight_dir)
        
        history = self.s2c_model.fit_generator(
            generator = g_train,
            steps_per_epoch = (len(dataGenerator.x_train_paths) // self.opt.batch_size),

            validation_data = g_valid,
            validation_steps = (len(dataGenerator.x_valid_paths) // self.opt.batch_size),

            epochs = self.opt.n_epochs,
            verbose = 0,
            callbacks = callbacks,

            use_multiprocessing = True,
            workers = 8,
            max_queue_size = 10,
        )
        
        self.save_history(history)
        print('finish!')
        
    
    def save_history(self, history):
        df_history = pd.DataFrame(history.history)
        path = os.path.join(self.weight_dir, 'history.csv',)
        df_history.to_csv(path, index=False)
        
    def setup_weight_dir(self):
        opt = self.opt
        source = 'domain_%s-k_%s-weightKNN_%s'%(opt.domain, opt.k, opt.weightKNN)
        self.weight_dir = os.path.join(home, 'station2grid', 'weights', 'single', source, opt.model_name, opt.ae_type, str(self.n_val_stations), opt.val_stations, opt.features)
        os.makedirs(self.weight_dir, exist_ok=True)
        
        

In [None]:
class Opt():
    def __init__(self, domain='air', k=5, weightKNN='distance', ae_type='A1', batch_size=2, n_epochs=10, model_name='grid2code', features='pm25_pm10', val_stations='Shilin_Guting', code_length=4576):
        self.domain = domain
        self.k = k
        self.weightKNN = weightKNN
        self.ae_type = ae_type
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.model_name = model_name
        self.features = features
        self.val_stations = val_stations
        self.code_length = code_length
        
opt=Opt('air', 5, 'distance', 'A1', 2, 10, 'station2code', features='pm25_temperature_pm10_humidity', val_stations='Chaozhou_Shilin_Guting_Puli', )


In [None]:
station2code=Station2Code(opt)

In [None]:
station2code.train()

# Station2GridSingle

## 1st

In [None]:
class Station2GridSingle():
    def __init__(self,opt):
        self.single='domain_%s-k_%s-weightKNN_%s'%(opt.domain,opt.k,opt.weightKNN)
        self.domain=opt.domain
        self.autoencoderArcht=opt.autoencoderArcht
        ### 
        self.features=opt.features
        self.valStations=opt.valStations
        self.codeLength=opt.codeLength
        self.stationFeatures=pd.read_csv(os.path.join(home,'station2grid','datasets','info','epa-features.csv'))
        self.epaStationInfo=pd.read_csv(os.path.join(home,'station2grid','datasets','info','epa-station-info.csv'))
        self.n_features=len(self.features.split('_'))
        self.n_valStations=len(self.valStations.split('_'))
        self.iFeatures=[self.stationFeatures.feature.tolist().index(feature) for feature in self.features.split('_')]
        self.iStations=self.epaStationInfo[~self.epaStationInfo.SiteEngName.isin(self.valStations.split('_'))].index.tolist()
        
        ###
        self.stationPath=opt.stationPath
    

    def get_decoder(self):
        weight=self.get_weight('grid2code')
        # autoencoder
        autoencoder_best = load_model(weight)
        # decoder
        input_decoder = Input(shape=(44, 26, 4))
        x = autoencoder_best.layers[-7](input_decoder) 
        x = autoencoder_best.layers[-6](x) 
        x = autoencoder_best.layers[-5](x) 
        x = autoencoder_best.layers[-4](x) 
        x = autoencoder_best.layers[-3](x) 
        x = autoencoder_best.layers[-2](x) 
        output_decoder = autoencoder_best.layers[-1](x) 
        decoder_best = Model(input_decoder, output_decoder)
        return decoder_best
    
    def get_fnn(self):
        weight=self.get_weight('station2code')
        nn_best = load_model(weight)
        return nn_best
    
    def test(self): 
        print('load model') 
        print('########################################################')
        fnn=self.get_fnn()
        decoder=self.get_decoder()
        print('########################################################')
        print('processing...')
        stations=self.stationPath2arr(self.stationPath)
        
        codes=fnn.predict(stations)
        codes=codes.reshape(-1, 44, 26, 4)
        
        grids=decoder.predict(codes)
        grids=self.denormalize(grids)
        print('finish!')
        return grids
    
    def stationPath2arr(self,path):
        one_dt = np.load(path)
        one_dt_pm25 = one_dt[:,self.iStations,:][...,self.iFeatures]
        one_dt_pm25_norm = self.normalize(one_dt_pm25)
        return one_dt_pm25_norm
    
    def normalize(self,one_dt_pm25):
        norm_const=100
        one_dt_pm25_norm = one_dt_pm25 / norm_const
        one_dt_pm25_norm = one_dt_pm25_norm.astype('float')
        return one_dt_pm25_norm
    
    def denormalize(self,arr):
        norm_const=100
        arr=norm_const*arr
        arr = arr.astype('float')
        return arr
    
    def get_weight(self,modelType):
        base_dir=os.path.join(home,'station2grid','weights','single',self.single,modelType,self.autoencoderArcht)
        if modelType=='grid2code':
            weight_dir=os.path.join(base_dir,'*hdf5')
        elif modelType=='station2code':
            weight_dir=os.path.join(base_dir,str(self.n_valStations),self.valStations,self.features,'*hdf5')
        weights=sorted(glob(weight_dir))
        weight=weights[-1]
        print(weight)
        return weight
    

## 2nd

In [12]:
from models.networks import AE, FCNN
from tools.data_generator import DataGenerator
import pandas as pd

class Station2GridSD():
    def __init__(self, opt):
        self.opt = opt
        self.ae = AE(opt)
        self.fcnn = FCNN(opt)
        self.dataGenerator = DataGenerator(opt)
        
        self.n_val_stations = len(self.opt.val_stations.split('_')) ###
        self.n_features = len(self.opt.features.split('_')) ###
        self.setup_weight_dir()
    
    def test(self): 
        print('testing...')
        dataGenerator = self.dataGenerator
        
        g2c_weights = sorted(glob(os.path.join(self.g2c_weight_dir, '*hdf5')))
        g2c_weight = g2c_weights[-1]
        s2c_weights = sorted(glob(os.path.join(self.s2c_weight_dir, '*hdf5')))
        s2c_weight = s2c_weights[-1]

        c2g_model = self.ae.get_decoder(g2c_weight)
        s2c_model = self.fcnn.get_fcnn(s2c_weight)
        
        self.stations = dataGenerator.station_path2arr(self.opt.epa_station_path)
        
        codes=s2c_model.predict(self.stations)
        codes=codes.reshape(-1, 44, 26, 4)
        
        grids=c2g_model.predict(codes)
        grids=dataGenerator.denormalize(grids)
        print('finish!')
        return grids
    
    def save_history(self, history):
        df_history = pd.DataFrame(history.history)
        path = os.path.join(self.weight_dir, 'history.csv',)
        df_history.to_csv(path, index=False)
        
    def setup_weight_dir(self):
        opt = self.opt
        source = 'domain_%s-k_%s-weightKNN_%s'%(opt.domain, opt.k, opt.weightKNN)
        base_dir = os.path.join(home, 'station2grid', 'weights', 'single', source)
        self.g2c_weight_dir = os.path.join(base_dir, 'grid2code', opt.ae_type)
        self.s2c_weight_dir = os.path.join(base_dir, 'station2code', opt.ae_type, str(self.n_val_stations), opt.val_stations, opt.features)
        
        

In [8]:
class Opt():
    def __init__(self, domain='air', k=5, weightKNN='distance', ae_type='A1', batch_size=2, n_epochs=10, model_name='grid2code', features='pm25_pm10', val_stations='Shilin_Guting', code_length=4576, epa_station_path=''):
        self.domain = domain
        self.k = k
        self.weightKNN = weightKNN
        self.ae_type = ae_type
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.model_name = model_name
        self.features = features
        self.val_stations = val_stations
        self.code_length = code_length
        self.epa_station_path = epa_station_path
        
epa_station_path = os.path.join(home, 'station2grid', 'datasets', 'npy', 'epa', 'epa2014.npy')
opt=Opt('air', 5, 'distance', 'A1', 2, 10, model_name='station2gridSD', features='pm25_temperature_pm10_humidity', val_stations='Chaozhou_Shilin_Guting_Puli', epa_station_path=epa_station_path)


In [13]:
station2GridSD = Station2GridSD(opt)

In [14]:
grids = station2GridSD.test()

testing...
finish!


In [15]:
grids.shape,station2GridSD.stations.shape

((48, 348, 204, 1), (48, 69, 4))

# refactor

## generator

In [None]:
import sys 
sys.path.append(os.path.join(os.path.expanduser("~"),'station2grid'))
from tools import CustomKNN, plotMap, data_generator
from tools.data_generator import DataGenerator

In [None]:
class Opt():
    def __init__(self, domain='air', k=5, weightKNN='distance', ae_type='A1', batch_size=2, n_epochs=10, model_name='grid2code', features='pm25_pm10', val_stations='Shilin_Guting'):
        self.domain = domain
        self.k = k
        self.weightKNN = weightKNN
        self.ae_type = ae_type
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.model_name = model_name
        self.features = features
        self.val_stations = val_stations

opt=Opt('air', 5, 'distance', 'A1', 2, 10, 'grid2code')
dataGenerator = DataGenerator(opt)
generator = dataGenerator.generator_valid
batch_x, batch_y = next(generator)
batch_x.shape, batch_y.shape

In [None]:
opt=Opt('air', 5, 'distance', 'A1', 2, 10, 'station2code', features='pm25_temperature_pm10_humidity', val_stations='Chaozhou_Shilin_Guting_Puli')
dataGenerator = DataGenerator(opt)
generator = dataGenerator.generator
batch_x, batch_y = next(generator)
batch_x.shape, batch_y.shape

## networks

In [None]:
from models.networks import Autoencoder, FCNN

In [None]:
class Opt():
    def __init__(self, domain='air', k=5, weightKNN='distance', ae_type='A1', batch_size=2, n_epochs=10, model_name='grid2code', features='pm25_pm10', val_stations='Shilin_Guting'):
        self.domain = domain
        self.k = k
        self.weightKNN = weightKNN
        self.ae_type = ae_type
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.model_name = model_name
        self.features = features
        self.val_stations = val_stations

opt=Opt('air', 5, 'distance', 'A1', 2, 10, 'grid2code')

In [None]:
autoencoder=Autoencoder(opt).define_autoencoder()
# autoencoder.summary()

In [None]:
fcnn=FCNN(opt).define_fcnn(5,3,4576)
# fcnn.summary()

## options