In [None]:
from app.Transformer_Classifier import Transformer_Classifier 
from app.Data_Loader import Data_Loader
from app.TextRank_Extractor import TextRank_Extractor
from app.Keyword_Classifier import Keyword_Classifier
from app.common.MySQLUtility import MySQLUtility
import os 
from app.Risk_Score_Service import Risk_Score_Service

domains = ['esg', 'liabilities' ] #'liabilities', 'esg'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './store/genuine-wording-key.json'

DB_HOST = '34.170.168.203'
DB_USER = 'root'
DB_PASSWORD = 'nu123456'
DB_NAME = 'lca_db'

class Data_ETL_Pipeline(object):
    dbutil = None
    data_load = None
    textrank = None 
    key_classifier = None
    class_service = None
    risk_class = None

    def __init__(self):
        self.dbutil = MySQLUtility(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
        self.data_load = Data_Loader(self.dbutil)
        self.textrank = TextRank_Extractor(self.dbutil)
        self.key_classifier = Keyword_Classifier(self.dbutil)
        self.class_service = Transformer_Classifier(self.dbutil)
        self.risk_class = Risk_Score_Service(self.dbutil)
        pass    

    def create_dataset(self):
        print("dbutil.db_cleanup():")
        self.dbutil.clean_db()
        print("dbutil.create_database():")
        self.dbutil.create_database() 

    def load_seed_training_data(self):
        print("data_load.import_seed_data_batch():")
        self.data_load.import_seed_data_batch()

        for domain in domains:
            print("textrank.extract_keyword_seed_data():" + domain)
            self.textrank.extract_keyword_seed_data(domain) 

            print("textrank.load_seed_to_training_data_batch():" + domain)
            self.data_load.load_seed_to_training_data_batch(domain) 
    
    def load_contract_data(self):
        for domain in domains:
            print("self.data_load.import_reports_contract_data()" + domain)
            self.data_load.import_reports_contract_data(domain)

    def process_keyword_model(self):
        for domain in domains:
            print("key_classifier.prepare_training_data():" + domain)
            self.key_classifier.prepare_training_data(domain)

            print("key_classifier.train_model():" + domain)
            self.key_classifier.train_model(domain)

            print("key_classifier.evaluate_model():" + domain)
            self.key_classifier.evaluate_model(domain)

            print("key_classifier.process_contract_data():" + domain)
            self.key_classifier.process_contract_data(domain)

    def process_transformer_model(self):
        for domain in domains:
            print("class_service.training():" + domain)
            #self.class_service.training(domain)    

            print("class_service.process_contract_training_data_eval():" + domain)
            #self.class_service.process_contract_training_data_eval(domain)

        print("risk_class.process_keyword_polarity():" , domains)
        self.risk_class.process_keyword_polarity(domains)

    def evaluate_results(self):
        for domain in domains:
            print ("key_classifier.Keyword Classifier Accuracy: " + domain)
            self.key_classifier.evaluate_model(domain) 
            
            print ("class_service.Transformer Classifier Accuracy: " + domain)
            self.class_service.evalute_model(domain)

if __name__ == '__main__': 
    dbloader = Data_ETL_Pipeline()
    #dbloader.create_dataset()
    #dbloader.load_seed_training_data() 
    #dbloader.load_contract_data()
    #dbloader.process_keyword_model()
    dbloader.process_transformer_model()
    #dbloader.evaluate_results()


In [None]:
from app.common.GCP_Storage import GCP_Storage

import os 
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './store/genuine-wording-key.json'

domains = ['liabilities', 'esg']
loader = GCP_Storage(domains)

#loader.setup_bucket()
#loader.upload_models()
#loader.download_models()
#loader.download_seed_data()
#loader.upload_seed_data()