In [None]:
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy.exc import SQLAlchemyError
from ml_functions import *
from sklearn.metrics import accuracy_score, recall_score
from db_queries import username, password, dsn, dbhostname, service_name, dbtables, querys
from table_functions import *
import mlflow
import mlflow.pyfunc
import xgboost as xgb

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# DGM id - oni id_dmc
# DGM dmc - MEB_DMC dmc_casting

def read_last_meb_dgm(last_id = 0):

    data = {}
    if last_id:
        query = f"""SELECT *
            FROM (
                SELECT
                    t.*,
                    ROW_NUMBER() OVER (PARTITION BY DMC ORDER BY ID DESC) AS rn
                FROM
                    Z3DMC.MEB_DGM t
            ) subquery
            WHERE rn = 1
            AND id > {last_id}"""

    else:
        query = """SELECT *
            FROM (
                SELECT *
                FROM Z3DMC.MEB_DGM
                ORDER BY ID DESC
            )
            WHERE ROWNUM = 1
            """
    
    try:
        sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
        engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)
        data.update({'MEB_DGM': pd.read_sql(query, engine)})
    except SQLAlchemyError as e:
        print(e)
    
    data['MEB_DGM'].drop(columns=['timestamp','data_znakowania','data_odlania', 'metal_level', 'metal_pressure', 'max_press_kolbenhub', 'oni_temp_curr_f2'], inplace= True)

    last_id = data['MEB_DGM'].id.max()
    
    return data, last_id

def check_if_meb_base(data):
    data['MEB_DGM'].dmc = data['MEB_DGM']['dmc'].str.strip()
    data['MEB_DGM'] = data['MEB_DGM'][(data['MEB_DGM']['nr_dgm'].between(8, 10)) & (data['MEB_DGM']['dmc'].apply(lambda x: len(str(x)) == 21))]

    if data['MEB_DGM'].empty:
        print('There are not MEB_BASE+ part produced since last time')
        return 1
    else:
        return data

def read_oni(data):
    id_list = list(data['MEB_DGM'].id)
    ids_ranges = [id_list[x:x+500] for x in range(0, len(id_list), 500)]
    ids_ranges_tuples = [tuple(sublist) for sublist in ids_ranges]

    result_df = pd.DataFrame()
    try:
        sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
        engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)

        for ids in ids_ranges_tuples:
            query = f"""SELECT ID_DMC, CIRCUIT_NR, 
                    MAX(ASSIGMENT) AS ASSIGMENT, 
                    MAX(FLOW) AS FLOW, 
                    MAX(SET_POINT) AS SET_POINT,
                    MAX(START_DELAY) AS START_DELAY,
                    MAX(TEMP) AS TEMP,
                    MAX(WORKING_MODE) AS WORKING_MODE
                FROM Z3DMC.ONI_CIRCUITS
                WHERE ID_DMC IN {ids}
                GROUP BY ID_DMC, CIRCUIT_NR
                ORDER BY ID_DMC
                """
    
            df = pd.read_sql(query, engine)
            result_df = pd.concat([result_df, df], ignore_index=True)

            
    except SQLAlchemyError as e:
        print(e)

    data.update({'ONI_CIRCUITS': result_df})
    
    return data

def combine_into_one_table(data):
    data['ONI_CIRCUITS'].drop(columns = ['assigment', 'working_mode', 'set_point'], inplace = True)
    oni_circuits = data['ONI_CIRCUITS'].pivot(index='id_dmc', columns='circuit_nr', values=['flow', 'start_delay', 'temp'])
    oni_circuits.columns = oni_circuits.columns.map('{0[0]}_{0[1]}'.format) 
    oni_circuits.reset_index(inplace=True)
    final_table = data['MEB_DGM'].copy()
    final_table = final_table.merge(oni_circuits, left_on='id', right_on='id_dmc', how='inner')

    return final_table

# Save value to a text file
def save_id_to_file(value, filename='pipeline_files/id.txt'):
    with open(filename, 'w') as file:
        file.write(str(value))

# Read value from a text file
def read_id_from_file(filename='pipeline_files/id.txt'):
    try:
        with open(filename, 'r') as file:
            return file.read().strip()
    except FileNotFoundError:
        return None

In [None]:
dgm, id_max = read_last_meb_dgm(1474000)
dgm = check_if_meb_base(dgm)
dgm_oni = read_oni(dgm)


In [None]:
final_tab = combine_into_one_table(dgm_oni)

In [None]:
train_data = load_csv('final_table_before_normalization.csv')

In [None]:
train_data.columns.to_series().to_csv('./pipeline_files/column_names.csv', index=False)

In [None]:
columns_needed = pd.read_csv('./pipeline_files/column_names.csv', header=None)[0].tolist()

In [None]:
columns_to_drop = final_tab.columns.difference(columns_needed)
final_tab = final_tab.drop(columns=columns_to_drop)

In [None]:
print(dgm_oni['ONI_CIRCUITS'].shape)
print(final_tab.shape)
print(train_data.shape)

In [None]:
# Load the XGBoost model
model = xgb.Booster(model_file=r'C:\Users\DLXPMX8\Desktop\Projekt_AI\meb_process_data_analysis\src\final_model\model\model.xgb')

In [None]:
dmatrix = xgb.DMatrix(final_tab)
predictions = model.predict(dmatrix)
y_pred = np.where(predictions < 0.8, 0, 1)
print(y_pred)

In [None]:
np.sum(y_pred == 1)

In [None]:
from_october = load_csv('test_data_from_october.csv')

In [None]:
from_october.keys()

In [None]:
y_october = from_october['our_final_status']

In [None]:
dmatrix = xgb.DMatrix(from_october.drop(columns = ['id','our_final_status', 'data_odlania', 'nr_dgm']))
predictions = model.predict(dmatrix)
y_pred_october = np.where(predictions < 0.95, 0, 1)

In [None]:
fig1 = create_confusion_matrix(y_october, y_pred_october)

In [None]:
recall_ok = recall_score(y_october, y_pred_october, pos_label=0)
recall_nok = recall_score(y_october, y_pred_october, pos_label=1)
accuracy = accuracy_score(y_october, y_pred_october)

In [None]:
print(f'acc: {accuracy}')
print(f'recall_nok {recall_nok}')
print(f'recall_ok {recall_ok}')

In [None]:
print(recall_score(y_october, y_pred_october))

In [None]:
fig_ = distribution_of_probability_plot(predictions, y_october)

## Rozłożenie detali NOK w czasie

In [None]:
from_october['y_pred_october'] = y_pred_october

In [None]:
from_october[['data_odlania','nr_dgm', 'our_final_status', 'y_pred_october']]

In [None]:
from_october['data_odlania'].head()

In [None]:
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'