In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import pickle

In [2]:
import configparser

config = configparser.ConfigParser()
config.read("./config.ini")

['./config.ini']

In [3]:
pm25_to_aod_model_obj = pickle.load(open(config['convert']["pm25_aod"], "rb"))
model_pkl = pm25_to_aod_model_obj
regressor = model_pkl['regressor']
scaler = model_pkl['scaler']

def model_predict(X):    
    if scaler:
        try:
            sc_X, sc_y = scaler
            y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X)))
        except Exception as e:
            poly_reg = scaler
            y_pred = regressor.predict(poly_reg.transform(X))
    else:
        y_pred = regressor.predict(X)
    return y_pred

In [4]:
def plot_graph(pkl_obj, pm25_predictions, aod_predictions):
    fig = plt.figure(figsize=(6, 4))
    
    plt.scatter(pm25_predictions, aod_predictions, c='k')
    plt.title(f"{pkl_obj['station_id']} - AOD vs PM2.5")
    plt.xlabel("PM2.5")
    plt.ylabel("AOD")
    plt.grid(ls='--')
    plt.close()
    
#     plt.show()
    
    return fig.gca()

In [5]:
import multiprocessing as mp

manager = mp.Manager()
ax_list = manager.list()

def perform_task(path):
    pkl_obj = pickle.load(open(path, "rb"))
    pm25_predictions = pkl_obj['test_set_predictions']

    aod_predictions = model_predict(pm25_predictions.reshape(-1,1)).reshape(-1)
    ax = plot_graph(pkl_obj, pm25_predictions, aod_predictions);
    
    pickle.dump(aod_predictions, open(f"{path[:-15]}/{pkl_obj['station_id']}_aod.pkl", "wb"))

    ax_list.append(ax)

In [6]:
import glob

directory = "ml_models"

pool = mp.Pool(mp.cpu_count())
pool.map(perform_task, [path for path in glob.glob(f"./{directory}/*/*_pm25.pkl")])
pool.close()

for ax in ax_list:
    plt.show()

In [7]:
import glob

directory = "stat_models"

pool = mp.Pool(mp.cpu_count())
pool.map(perform_task, [path for path in glob.glob(f"./{directory}/*/*_pm25.pkl")])
pool.close()

for ax in ax_list:
    plt.show()

In [8]:
import glob

directory = "dl_models"

pool = mp.Pool(mp.cpu_count())
pool.map(perform_task, [path for path in glob.glob(f"./{directory}/*/*_pm25.pkl")])
pool.close()

for ax in ax_list:
    plt.show()