In [None]:
import pandas as pd
import sqlalchemy
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
from sqlalchemy.exc import SQLAlchemyError
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


from db_queries import username, password, dsn, dbhostname, service_name, dbtables, querys
from table_functions import *
from analyze_visualisation import *

In [None]:
data = {}

try:
    sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
    engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)
    for table, query in zip(dbtables, querys):
        data.update({table: pd.read_sql(query, engine)})
except SQLAlchemyError as e:
    print(e)

In [None]:
data = drop_unused_columns(data)

In [None]:
data.keys()

In [None]:
data['ONI_CIRCUITS'].shape

In [None]:
final_table = combine_final_table(data)

In [None]:
final_table.shape

In [None]:

final_table = create_final_status(final_table)
final_table = drop_columns_not_used_in_ml(final_table)

In [None]:
final_table.shape

In [None]:
final_table = read_csv('final_table_before_standarization.csv')

In [None]:
fig = px.box(final_table, y='vds_vac_hose1')
fig.update_layout(width=500, height=400, margin=dict(l=10, r=10, b=10, t=10), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
fig.update_traces(marker=dict(color='darkblue'), boxpoints='outliers')
fig.show()

In [None]:
fig.write_html(r"C:\Users\dlxpmx8\Desktop\Projekt_AI\meb_process_data_analysis\not_in_repo/box_plot_with_outliers.html")
fig.write_image(r"C:\Users\dlxpmx8\Desktop\Projekt_AI\meb_process_data_analysis\not_in_repo/box_plot_with_outliers.png", format="png")

In [None]:
final_table = read_csv('final_table_before_normalization.csv')

In [None]:
status_1_data = final_table[final_table['our_final_status'] == 0].sample(n=200, random_state= 69)
status_2_data = final_table[final_table['our_final_status'] == 1].sample(n=200, random_state= 69)
random_to_analyze = pd.concat([status_1_data, status_2_data], ignore_index=True)

col_dgm = ['cisnienie_koncowe','nachdruck_hub', 
                'anguss','vds_air_pressure','vds_vac_hose1', 'vds_vac_valve1', 'vds_vac_valve2']

make_and_save_pariplot(random_to_analyze, col_dgm, 'some_dgm_corr3.png')

In [None]:
final_table['our_final_status'].value_counts()

In [None]:
final_table = read_csv('final_table_before_normalization.csv')

In [None]:
final_table.corr()

In [None]:
correlation_matrix = final_table.corr()
threshold = 0.85  
high_corr_features = set()
for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > threshold:
            colname = correlation_matrix.columns[i]
            high_corr_features.add(colname)

In [None]:
pocz = [value for value in high_corr_features if value.startswith('flow')]
print(pocz)

In [None]:
'flow_13', 'flow_9', 'flow_2', 'flow_23'

In [None]:
print(len(high_corr_features))

In [None]:
final_table.shape

In [None]:
final_table_droped = final_table.drop(columns = high_corr_features)

In [None]:
final_table_droped.shape

In [None]:
save_df_to_csv(final_table_droped, 'final_table_droped_before_normalization.csv')

In [None]:
ml_data = split_data(final_table_droped)
ml_data['x_train'], scaler = normalize_data(ml_data['x_train'])
save_df_to_csv(ml_data['x_train'], 'x_train_droped.csv')
save_df_to_csv(ml_data['y_train'], 'y_train_droped.csv')
ml_data['x_valid'] = normalize_data(ml_data['x_valid'], scaler)
save_df_to_csv(ml_data['x_valid'], 'x_valid_droped.csv')
save_df_to_csv(ml_data['y_valid'], 'y_valid_droped.csv')
ml_data['x_test'] = normalize_data(ml_data['x_test'], scaler)
save_df_to_csv(ml_data['x_test'], 'x_test_droped.csv')
save_df_to_csv(ml_data['y_test'], 'y_test_droped.csv')

In [23]:
pocz = [value for value in final_table_droped if value.startswith('assigment') or value.startswith('working')]
print(pocz)

['assigment_1_16.0', 'assigment_2_16.0', 'assigment_2_17.0', 'assigment_3_16.0', 'assigment_10_16.0', 'assigment_12_2.0', 'assigment_12_17.0', 'assigment_22_1.0', 'assigment_22_16.0', 'assigment_23_16.0', 'assigment_27_16.0', 'working_mode_1_2.0', 'working_mode_4_2.0', 'working_mode_5_1.0', 'working_mode_5_2.0', 'working_mode_12_1.0', 'working_mode_21_3.0', 'working_mode_22_2.0', 'working_mode_25_2.0', 'working_mode_26_1.0', 'working_mode_27_3.0']


In [None]:
for umap_c in [None, 10, 20, 100, 150]:
    if umap_c is not None:
        for umap_dist in [0.75, 0.5, 0.25, 0.1, 0.01]:
            ml_data_c = ml_data.copy()
            ml_data_c['x_train'], ml_data_c['x_valid'], ml_data_c['x_test'] = umap_transformation(ml_data_c['x_train'], ml_data_c['x_valid'], ml_data_c['x_test'],
                                                                                                   n_components_umap= umap_c, umap_min_dist= umap_dist)
            for batch_ in [16, 32, 64, 128, 256, 512]:
                for model_num in [1, 3, 4, 5, 6]:
                    if model_num == 4:
                        for drop_ in [0.5, 0.25]:
                            compile_fit_evaluate_model(*ml_data_c.values(), epochs_=100, batch_size_= batch_, model_number=model_num, drop_neurons=drop_,
                                                        run_name_=f"aa_{umap_c}u_{umap_dist}dst_{batch_}b_{model_num}m_{drop_}d")
                    else:
                        compile_fit_evaluate_model(*ml_data_c.values(), epochs_=100, batch_size_= batch_, model_number=model_num,
                            run_name_=f"aa_{umap_c}u_{umap_dist}dst_{batch_}b_{model_num}m")
    else:
        for batch_ in [16, 32, 64, 128, 256, 512]:
            for model_num in [1, 3, 4, 5, 6]:
                if model_num == 4:
                    for drop_ in [0.5, 0.25]:
                        compile_fit_evaluate_model(*ml_data.values(), epochs_=100, batch_size_= batch_, model_number=model_num, drop_neurons=drop_,
                            run_name_=f"aa_{batch_}b_{model_num}m_{drop_}d")
                else:
                    compile_fit_evaluate_model(*ml_data.values(), epochs_=100, batch_size_= batch_, model_number=model_num,
                            run_name_=f"aa_{batch_}b_{model_num}m")