In [105]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score, recall_score, precision_score, accuracy_score
import numpy as np

from sklearn.ensemble import IsolationForest
from joblib import dump, load

In [106]:
import pandas as pd 
import plotly.express as px
dfo = pd.read_csv("..\\Data\\SmA-Four-Tank-Batch-Process_V2.csv",sep=';')
dfo['timestamp']=pd.to_datetime(dfo['timestamp'])
#df_day=dfo.copy()
#df_day['h']=df_day['timestamp'].dt.hour

In [107]:
df=dfo.sample(50000)
#df=dfo.copy()
df=df.drop(columns=["timestamp"])
c=list(df.columns)
c[0]="Dev"
df.columns=c
df=df[df['Dev']!=0]
df['Dev']=df['Dev']!=1
df['Dev']=df['Dev'].astype('int')
df_clean=df.copy()

In [108]:
df_clean.to_csv('..\\Data\\data_01_clean.csv')
X_train,X_test,y_train,y_test=train_test_split(df_clean.drop(columns=['Dev']),df_clean['Dev'],shuffle=False,random_state=42)

In [None]:
model=RandomForestClassifier(n_estimators=100)
model.fit(X_train,y_train)

In [None]:
y_pred=model.predict(X_train)
print(f'R2:{r2_score(y_train,y_pred)}\nAccuracy: {accuracy_score(y_train,y_pred)}\nRecall: {recall_score(y_train,y_pred)}\nPrecision: {precision_score(y_train,y_pred)}')

In [None]:
y_pred=model.predict(X_train)
tn, fp, fn, tp = confusion_matrix(y_train,y_pred).ravel()
print(f'TN: {tn}')
print(f'FP: {fp}')
print(f'FN: {fn}')
print(f'TP: {tp}')

In [None]:
y_pred=model.predict(X_test)
print(f'R2:{r2_score(y_test,y_pred)}\nAccuracy: {accuracy_score(y_test,y_pred)}\nRecall: {recall_score(y_test,y_pred)}\nPrecision: {precision_score(y_test,y_pred)}')

In [None]:
y_pred=model.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()
print(f'TN: {tn}')
print(f'FP: {fp}')
print(f'FN: {fn}')
print(f'TP: {tp}')

In [None]:
from sklearn.ensemble import IsolationForest
from joblib import dump, load

ISF = IsolationForest(n_estimators=100)
ISF.fit(X_train) 
dump(ISF, 'ISF.pkl')

In [None]:
list_train=[]
list_test=[]

from sklearn.neighbors import LocalOutlierFactor
from joblib import dump, load


for n_n in [5,10,20,40,80,160]:
    for boot in [True,False]:
        print(n_n,boot)
        model = IsolationForest(n_estimators=n_n,n_jobs=-1,bootstrap=boot)
        model.fit(X_train) 
        dump(model, f'..\\Models\\ISF{n_n}_{boot}.pkl')


        #-------------------Train-Set----------------------------------------------------------------
        y_pred=model.predict(X_train)
        y_pred=pd.Series(y_pred).replace(1,0).replace(-1,1)
        tn, fp, fn, tp = confusion_matrix(y_train,y_pred).ravel()

        list_train.append({
        'params':(n_n,boot),
        'tn':tn,
        'fp':fp,
        'fn':fn,
        'tp':tp,
        'R2':r2_score(y_train,y_pred),
        'Accuracy': accuracy_score(y_train,y_pred),
        'Recall': recall_score(y_train,y_pred),
        'Precision': precision_score(y_train,y_pred),
        })

        #-------------------Test-Set----------------------------------------------------------------
        y_pred=model.predict(X_test)
        y_pred=pd.Series(y_pred).replace(1,0).replace(-1,1)
        tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()

        list_test.append({
        'n_n':n_n,
        'tn':tn,
        'fp':fp,
        'fn':fn,
        'tp':tp,
        'R2':r2_score(y_test,y_pred),
        'Accuracy': accuracy_score(y_test,y_pred),
        'Recall': recall_score(y_test,y_pred),
        'Precision': precision_score(y_test,y_pred),
        })


In [None]:
import plotly.express as px
df_test_data=pd.DataFrame(list_test)
df_train_data=pd.DataFrame(list_train)

In [None]:
px.scatter(df_test_data,x='n_n',y=['fp','fn'])

# Distance 

In [None]:
dist_array=[]
for a in X_train.to_numpy():
    for b in X_train.to_numpy():
        dist_array.append( np.sqrt(np.sum(np.square(a-b))) )

array_dist=np.array(dist_array)

In [None]:
array_dist=np.array(dist_array)

In [None]:
dist_array