In [None]:
import os
import sys
sys.path.append('..') 
import warnings
warnings.filterwarnings("ignore")
import yaml
import pandas as pd
import numpy as np
import pickle
from lightgbm import LGBMClassifier
from sklearn.model_selection import  GridSearchCV
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score, make_scorer
from src.utils.utils_p import save_dataset_pickle,download_pickle,load_config
from src.intermediate.Combine_Datasets import concat_data_set
import pathlib
#pip install LGBMClassifier

if __name__ == '__main__':

    #yaml file Load path:
    p=pathlib.Path().absolute()
    par=p.parent / 'src' 
    CONFIG_PATH =str(par)
    
    #Load project config from yaml file:
    config = load_config(CONFIG_PATH,'importcsv.yaml')
    
    #Processed path to dowload saved Train,Test Datasets
    processed_path=config["Upload_list_all"]['Parent_path']+"data/04_processed/"
    
    #Model path to dowload saved Train,Test Datasets
    model_path=config["Upload_list_all"]['Parent_path']+"data/06_models/"
    
    #Model Parameters
    params=config["Upload_list_all"]["Model_Parameters"]["LightGBM"]
    
    # Download Train Dataframes for Feature Selection:
    X_trains=download_pickle(processed_path,'X_trains')
    y_trains=download_pickle(processed_path,'y_trains')
    X_test=download_pickle(processed_path,'X_test')
    y_test=download_pickle(processed_path,'y_test')
    
    # Download Train List for Feature Selection:
    X_trains_list=download_pickle(processed_path,'X_trains_list')
    y_trains_list=download_pickle(processed_path,'y_trains_list')
    X_tests_list=download_pickle(processed_path,'X_tests_list')
    y_tests_list=download_pickle(processed_path,'y_tests_list')

    selectedfeatures=download_pickle(processed_path,'selectedfeatures')

    ## List input convert to Dataframe to train or use Dataframes:
    X_trains = concat_data_set(X_trains_list)
    y_trains = concat_data_set(y_trains_list)
    
    X_test = concat_data_set(X_tests_list)
    y_test = concat_data_set(y_tests_list)

    # DataFrame input
    parameters = {
    'max_depth':params["max_depth"],
    'min_child_weight':[float(i) for i in params["min_child_weight"]]
    }
    
    #Define Dictionary
    bests = {}
 
    #Create Classifier
    classifier = LGBMClassifier(learning_rate=0.1,n_estimators=100)
    #Create GridSearchClassifier
    gridsearcher = GridSearchCV(classifier, parameters, cv=10, scoring=make_scorer(f1_score))
    #Train with Training set X_Trains,y_trains
    gridsearcher.fit(X_trains[selectedfeatures],y_trains)
    #Save model as pickle file 
    save_dataset_pickle(model_path,'LightGBM_finalized_model.pckl',gridsearcher)
    
    
    #Optinal:Predict each testset seperately from X_tests_list    
    f1_test_list= []
    for i in range(len(X_tests_list)):
        y_pred = gridsearcher.predict(X_tests_list[i][selectedfeatures])
        f1_test = f1_score(y_tests_list[i],y_pred)        
        #append each score to f1_test_list
        f1_test_list.append(f1_test)
                        
    #Predict whole testset 
    y_pred = gridsearcher.predict(X_test[selectedfeatures])    
    f1_test = f1_score(y_test,y_pred)
    
    #Attach results to dictionary
    bests['LighGBM'] = {}
    bests['LighGBM']['best_parameters'] = gridsearcher.best_params_
    bests['LighGBM']['f1_test_seperated'] = f1_test_list
    bests['LighGBM']['f1_test_combined'] = f1_test
    bests['LighGBM']['cv_results'] = gridsearcher.cv_results_
    
    

In [None]:
# load the model from disk
loaded_model = download_pickle(model_path,'RF_finalized_model.pckl')
result = loaded_model.predict(X_test[selectedfeatures])
f1_test = f1_score(y_test,y_pred)
print(f1_test)