In [4]:
import pandas as pd
import sqlalchemy
from sqlalchemy.exc import SQLAlchemyError

from db_queries import username, password, dsn, dbhostname, service_name, dbtables, querys
from table_functions import *

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# DGM id - oni id_dmc
# DGM dmc - MEB_DMC dmc_casting

def read_last_meb_dgm(last_id = 0):

    data = {}
    if last_id:
        query = f"""SELECT *
            FROM (
                SELECT
                    t.*,
                    ROW_NUMBER() OVER (PARTITION BY DMC ORDER BY ID DESC) AS rn
                FROM
                    Z3DMC.MEB_DGM t
            ) subquery
            WHERE rn = 1
            AND id > {last_id}"""

    else:
        query = """SELECT *
            FROM (
                SELECT *
                FROM Z3DMC.MEB_DGM
                ORDER BY ID DESC
            )
            WHERE ROWNUM = 1
            """
    
    try:
        sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
        engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)
        data.update({'MEB_DGM': pd.read_sql(query, engine)})
    except SQLAlchemyError as e:
        print(e)
    
    data['MEB_DGM'].drop(columns=['timestamp','data_znakowania','data_odlania', 'metal_level', 'metal_pressure', 'max_press_kolbenhub', 'oni_temp_curr_f2'], inplace= True)

    last_id = data['MEB_DGM'].id.max()
    return data, last_id

def check_if_meb_base(data):
    data['MEB_DGM'].dmc = data['MEB_DGM']['dmc'].str.strip()
    data['MEB_DGM'] = data['MEB_DGM'][(data['MEB_DGM']['nr_dgm'].between(8, 10)) & (data['MEB_DGM']['dmc'].apply(lambda x: len(str(x)) == 21))]

    if data['MEB_DGM'].empty:
        print('There are not MEB_BASE+ part produced since last time')
        return 1
    else:
        return data

def read_oni(data):
    id_list = list(data['MEB_DGM'].id)
    ids_ranges = [id_list[x:x+500] for x in range(0, len(id_list), 500)]
    ids_ranges_tuples = [tuple(sublist) for sublist in ids_ranges]

    result_df = pd.DataFrame()
    try:
        sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
        engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)

        for ids in ids_ranges_tuples:
            query = f"""SELECT ID_DMC, CIRCUIT_NR, 
                    MAX(ASSIGMENT) AS ASSIGMENT, 
                    MAX(FLOW) AS FLOW, 
                    MAX(SET_POINT) AS SET_POINT,
                    MAX(START_DELAY) AS START_DELAY,
                    MAX(TEMP) AS TEMP,
                    MAX(WORKING_MODE) AS WORKING_MODE
                FROM Z3DMC.ONI_CIRCUITS
                WHERE ID_DMC IN {ids}
                GROUP BY ID_DMC, CIRCUIT_NR
                ORDER BY ID_DMC
                """
    
            df = pd.read_sql(query, engine)
            result_df = pd.concat([result_df, df], ignore_index=True)

            
    except SQLAlchemyError as e:
        print(e)

    data.update({'ONI_CIRCUITS': result_df})
    
    return data

def combine_into_one_table(data):
    data['ONI_CIRCUITS'].drop(columns = ['assigment', 'working_mode', 'set_point'], inplace = True)
    oni_circuits = data['ONI_CIRCUITS'].pivot(index='id_dmc', columns='circuit_nr', values=['flow', 'start_delay', 'temp'])
    oni_circuits.columns = oni_circuits.columns.map('{0[0]}_{0[1]}'.format) 
    oni_circuits.reset_index(inplace=True)
    final_table = data['MEB_DGM'].copy()
    final_table = final_table.merge(oni_circuits, left_on='id', right_on='id_dmc', how='inner')

    return final_table


In [5]:
dgm, id_max = read_last_meb_dgm(1474000)
dgm = check_if_meb_base(dgm)
dgm_oni = read_oni(dgm)


  result_df = pd.concat([result_df, df], ignore_index=True)


In [6]:
final_tab = combine_into_one_table(dgm_oni)

In [7]:

print(list(final_tab.columns))

['id', 'dmc', 'nr_dgm', 'status', 'czas_fazy_1', 'czas_fazy_2', 'czas_fazy_3', 'max_predkosc', 'cisnienie_tloka', 'cisnienie_koncowe', 'nachdruck_hub', 'anguss', 'temp_pieca', 'oni_temp_curr_f1', 'oni_temp_fore_f1', 'oni_temp_fore_f2', 'vds_air_pressure', 'vds_vac_hose1', 'vds_vac_hose2', 'vds_vac_tank', 'vds_vac_valve1', 'vds_vac_valve2', 'czas_taktu', 'rn', 'id_dmc', 'flow_1', 'flow_2', 'flow_3', 'flow_4', 'flow_5', 'flow_6', 'flow_7', 'flow_8', 'flow_9', 'flow_10', 'flow_11', 'flow_12', 'flow_13', 'flow_14', 'flow_15', 'flow_16', 'flow_17', 'flow_18', 'flow_19', 'flow_20', 'flow_21', 'flow_22', 'flow_23', 'flow_24', 'flow_25', 'flow_26', 'flow_27', 'flow_28', 'start_delay_1', 'start_delay_2', 'start_delay_3', 'start_delay_4', 'start_delay_5', 'start_delay_6', 'start_delay_7', 'start_delay_8', 'start_delay_9', 'start_delay_10', 'start_delay_11', 'start_delay_12', 'start_delay_13', 'start_delay_14', 'start_delay_15', 'start_delay_16', 'start_delay_17', 'start_delay_18', 'start_delay_1

In [8]:
train_data = load_csv('final_table_before_normalization.csv')

File to read:
c:/Users/DLXPMX8/Desktop/Projekt_AI/meb_process_data_analysis/src/.data/final_table_before_normalization.csv


In [9]:
train_data.head()

Unnamed: 0,czas_fazy_1,czas_fazy_2,czas_fazy_3,max_predkosc,cisnienie_tloka,cisnienie_koncowe,nachdruck_hub,anguss,oni_temp_curr_f1,oni_temp_fore_f2,vds_air_pressure,vds_vac_hose1,vds_vac_hose2,vds_vac_tank,vds_vac_valve1,vds_vac_valve2,czas_taktu,flow_1,flow_3,flow_4,flow_6,flow_7,flow_8,flow_10,flow_11,flow_12,flow_13,flow_14,flow_15,flow_16,flow_17,flow_18,flow_20,flow_21,flow_22,flow_24,flow_26,flow_27,flow_28,start_delay_1,start_delay_2,start_delay_3,start_delay_4,start_delay_6,start_delay_10,start_delay_12,start_delay_13,start_delay_21,start_delay_22,start_delay_25,start_delay_26,temp_1,temp_2,temp_3,temp_4,temp_6,temp_7,temp_8,temp_9,temp_10,temp_11,temp_13,temp_15,temp_17,temp_18,temp_21,temp_22,temp_23,temp_24,temp_25,temp_26,temp_27,temp_28,our_final_status
0,2277.0,90.0,45.0,6.3,5.0,282.0,27.0,35.0,79.8,-1.0,5713.976,91.37731,90.79861,94.96528,171.2384,130.9028,94.0,0.0,11.1,14.5,14.3,7.8,9.1,2.5,9.3,9.8,0.0,10.3,11.5,11.6,11.6,11.6,11.5,10.6,3.7,11.1,15.0,11.6,13.5,7.0,15.0,2.0,2.0,2.0,2.0,5.0,12.0,2.0,2.0,2.0,2.0,85.9,85.7,85.9,96.2,79.8,80.7,78.7,81.7,72.6,87.1,81.9,79.3,80.4,78.2,74.1,78.5,74.8,80.9,80.5,72.3,86.8,77.6,0
1,2220.0,76.0,16.0,5.0,4.0,281.0,0.0,41.0,71.8,-1.0,4910.59,77.66204,73.61111,74.30556,381.5393,397.5116,85.0,0.0,11.8,15.1,16.7,7.6,11.6,0.0,10.1,11.0,0.0,11.6,11.1,13.3,11.5,11.8,12.8,14.3,4.6,13.5,14.3,12.3,15.6,7.0,15.0,2.0,2.0,2.0,20.0,5.0,12.0,2.0,2.0,2.0,2.0,80.3,83.2,92.4,87.5,72.1,72.8,71.3,84.6,68.2,79.2,74.4,71.3,73.5,75.0,67.8,76.6,67.9,72.5,69.0,71.3,85.7,70.0,0
2,2277.0,91.0,44.5,6.26,5.0,282.0,27.0,34.0,79.5,-1.0,5751.302,66.08796,64.93056,70.83333,165.7986,121.9907,95.0,0.0,11.1,14.5,14.3,7.8,9.1,2.7,9.3,9.6,0.0,10.3,11.3,11.1,11.6,11.8,11.5,10.6,3.7,11.1,14.8,11.6,13.5,7.0,15.0,2.0,2.0,2.0,2.0,5.0,12.0,2.0,2.0,2.0,2.0,85.9,85.7,85.8,96.8,79.8,80.5,79.1,81.8,73.1,87.3,81.4,79.6,80.8,77.9,74.1,78.9,74.9,81.0,80.2,72.4,86.8,78.2,0
3,2275.0,89.0,43.5,6.37,5.0,282.0,27.0,33.0,79.8,-1.0,5785.156,71.06481,69.90741,75.75231,164.4097,122.1065,95.0,0.0,11.1,14.5,14.3,7.8,9.1,2.7,9.3,9.6,0.0,10.5,11.6,11.6,11.6,11.8,11.3,10.6,4.0,11.1,15.0,11.6,13.5,7.0,15.0,2.0,2.0,2.0,2.0,5.0,12.0,2.0,2.0,2.0,2.0,86.1,85.9,86.1,96.6,79.9,80.9,79.1,82.0,73.2,87.1,81.8,79.5,80.7,78.4,74.3,79.3,75.0,81.3,80.6,72.4,87.1,78.7,0
4,2276.0,91.0,44.5,6.37,5.0,282.0,27.0,33.0,79.5,-1.0,5751.302,88.59954,87.67361,92.41898,163.9468,127.0833,94.0,0.0,11.1,14.3,14.3,7.8,9.3,2.5,9.3,9.8,0.0,10.5,11.5,11.6,11.6,11.8,11.3,10.8,3.7,11.1,14.8,11.6,13.5,7.0,15.0,2.0,2.0,2.0,2.0,5.0,12.0,2.0,2.0,2.0,2.0,86.1,85.9,86.1,96.7,79.8,80.8,79.3,82.0,73.3,87.5,81.8,79.8,81.0,78.3,74.2,79.6,74.7,81.3,80.6,72.4,87.0,78.5,0


In [10]:
columns_to_drop = final_tab.columns.difference(train_data.columns)
final_tab = final_tab.drop(columns=columns_to_drop)

In [11]:
print(dgm_oni['ONI_CIRCUITS'].shape)
print(final_tab.shape)
print(train_data.shape)

(2016, 5)
(72, 73)
(685181, 74)


In [None]:
final_tab.head()

In [None]:
train_data = load_csv('final_table_before_normalization.csv')