In [None]:
import pandas as pd
import sqlalchemy
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
from sqlalchemy.exc import SQLAlchemyError
from sklearn.preprocessing import StandardScaler

from db_queries import username, password, dsn, dbhostname, service_name, dbtables, querys
from table_functions import *
from analyze_visualisation import *

In [None]:
data = {}

try:
    sqlalchemy_engine="oracle+cx_oracle://"+username+":"+password+"@"+dbhostname+"/?service_name="+service_name
    engine = sqlalchemy.create_engine(sqlalchemy_engine, arraysize=1000)
    for table, query in zip(dbtables, querys):
        data.update({table: pd.read_sql(query, engine)})
except SQLAlchemyError as e:
    print(e)

data = drop_unused_columns(data)

In [None]:
final_table = combine_final_table(data)
final_table = create_final_status(final_table)
#final_table = normalize_data(final_table)

In [None]:
pd.set_option('display.max_columns', None)
final_table.head(3)

In [None]:
#data anal
status_1_data = final_table[final_table['our_final_status'] == 1].sample(n=200, random_state= 69)
status_2_data = final_table[final_table['our_final_status'] == 2].sample(n=200, random_state= 69)
random_to_analyze = pd.concat([status_1_data, status_2_data], ignore_index=True)

In [None]:
print(list(random_to_analyze.keys()))

In [None]:
col_dgm = ['czas_fazy_1', 'czas_fazy_2', 'czas_fazy_3', 'max_predkosc', 'cisnienie_tloka', 'cisnienie_koncowe','nachdruck_hub', 
                  'anguss', 'temp_pieca', 'oni_temp_curr_f1', 'oni_temp_curr_f2', 'oni_temp_fore_f1', 'oni_temp_fore_f2', 'vds_air_pressure',
                    'vds_vac_hose1', 'vds_vac_hose2', 'vds_vac_tank', 'vds_vac_valve1', 'vds_vac_valve2', 'czas_taktu']
col_flow = [f'flow_{n}' for n in range(1,29)]
col_delay = [f'start_delay_{n}' for n in range(1,29)]
col_temp = [f'temp_{n}' for n in range(1,29)]

make_and_save_pariplot(random_to_analyze, col_dgm, 'normalized_dirst_20.png')
make_and_save_pariplot(random_to_analyze, col_flow, 'normalized_flow.png')
make_and_save_pariplot(random_to_analyze, col_delay, 'normalize_delay_pairplot.png')
make_and_save_pariplot(random_to_analyze, col_temp, 'normalize_temp_pairplot.png')

In [None]:
make_and_save_heatmap(final_table, col_dgm, 'cor1_hm.png')
make_and_save_heatmap(final_table, col_flow, 'flow_heatmap.png')
make_and_save_heatmap(final_table, col_delay, 'delay_heatmap.png')
make_and_save_heatmap(final_table, col_temp, 'temp_heatmap.png')

In [None]:
final_table['oni_temp_fore_f2'].value_counts()

In [None]:
col_dgm_bar = ['czas_fazy_2', 'czas_fazy_3', 'max_predkosc', 'cisnienie_tloka', 'cisnienie_koncowe','nachdruck_hub', 
                  'anguss', 'temp_pieca', 'oni_temp_curr_f1', 'oni_temp_curr_f2', 'oni_temp_fore_f1', 'oni_temp_fore_f2',
                    'vds_vac_hose1', 'vds_vac_hose2', 'vds_vac_tank', 'vds_vac_valve1', 'vds_vac_valve2', 'czas_taktu']
mean_values_grouped = final_table.groupby('nr_dgm')[col_dgm_bar].mean()

# Plot mean values with grouped bars for each parameter
x = np.arange(len(col_dgm_bar)) 
width = 0.35 

fig, ax = plt.subplots(figsize=(12, 6))
bar1 = ax.bar(x - width/2, mean_values_grouped.iloc[0], width, label='nr_dgm = 1')
bar2 = ax.bar(x + width/2, mean_values_grouped.iloc[1], width, label='nr_dgm = 2')

ax.set_xlabel('Columns')
ax.set_ylabel('Mean Value')
ax.set_title('Mean Values of Specified Columns for nr_dgm=1 and nr_dgm=2')
ax.set_xticks(x)
ax.set_xticklabels(col_dgm_bar, rotation=45)
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
mean_values_grouped = final_table.groupby('nr_dgm')[col_dgm].mean()
percentage_difference = ((mean_values_grouped.iloc[1] - mean_values_grouped.iloc[0]) / mean_values_grouped.iloc[0]) * 100

print("Percentage Difference in Mean Values (nr_dgm=2 - nr_dgm=1):")
print(percentage_difference)

In [None]:
print(mean_values_grouped)