# Start: Analysiere Daten

In [58]:
# import initial_data.csv
import pandas as pd
import numpy as np

from feasibility import is_feasible

# import data from csv file
def import_data():
    data = pd.read_csv('initial_data.csv')
    return data

data = import_data()

In [59]:
# put column names into a list
column_names = data.columns.values
print(column_names)
inputs = column_names[0:8]
print(inputs)
outputs = column_names[8:13]
print(outputs)

['Engine speed' 'Engine load' 'Railpressure' 'Air supply' 'Crank angle'
 'Intake pressure' 'Back pressure' 'Intake temperature' 'NOx' 'PM 1' 'CO2'
 'PM 2' 'Pressure cylinder']
['Engine speed' 'Engine load' 'Railpressure' 'Air supply' 'Crank angle'
 'Intake pressure' 'Back pressure' 'Intake temperature']
['NOx' 'PM 1' 'CO2' 'PM 2' 'Pressure cylinder']


In [60]:
def get_input_data(row):
    x1, x2, x3, x4, x5, x6, x7, x8 = data.iloc[row, 0], data.iloc[row, 1], data.iloc[row, 2], data.iloc[row, 3], data.iloc[row, 4], data.iloc[row, 5], data.iloc[row, 6], data.iloc[row, 7]
    return x1, x2, x3, x4, x5, x6, x7, x8

def get_output_data(row):
    x1, x2, x3, x4, x5 = data.iloc[row, 8], data.iloc[row, 9], data.iloc[row, 10], data.iloc[row, 11], data.iloc[row, 12]
    return x1, x2, x3, x4, x5

def get_critical_output_data(i):
    x1, x2, x3 = data.iloc[i, 9], data.iloc[i, 11], data.iloc[i, 12]
    return x1, x2, x3

In [61]:
# show if data row is feasible via the first 8 columns and the function is_feasible
feasible = []
for i in range(len(data)):
    x1, x2, x3, x4, x5, x6, x7, x8 = get_input_data(i)
    # print(x1, x2, x3, x4, x5, x6, x7, x8)
    feasible.append(is_feasible(x1, x2, x3, x4, x5, x6, x7, x8))
data['feasible'] = feasible

In [62]:
# Check if outputs are in safe range
# PM 1 < 6, PM 2 < 16, Pressure cylinder < 160

def label_safe(data):
    safe = []
    for i in range(len(data)):
        x9, x10, x11 = get_critical_output_data(i)
        if x9 < 6 and x10 < 16 and x11 < 160:
            safe.append(True)
        else:
            safe.append(False)
    data['safe'] = safe
    return data

data = label_safe(data)

In [63]:
unsafe_params = [5, 13, 130]

# label broader safe range
def label_broader_safe(data):
    safe = []
    for i in range(len(data)):
        x9, x10, x11 = get_critical_output_data(i)
        if x9 < unsafe_params[0] and x10 < unsafe_params[1] and x11 < unsafe_params[2]:
            safe.append(True)
        else:
            safe.append(False)
    data['broader_safe'] = safe
    return data

data = label_broader_safe(data)

# count number of False values in safe column
print(len(data[data['broader_safe'] == False]))

12


In [64]:
# show unsafe data (safe = False)
unsafe_data = data[data['safe'] == False]
print(unsafe_data)

    Engine speed  Engine load  Railpressure  Air supply  Crank angle  \
70        1800.0        35.66        1521.7    310.5600         5.69   
89        2000.0       139.54        2500.3   1008.3008         3.66   

    Intake pressure  Back pressure  Intake temperature         NOx      PM 1  \
70           1123.5         1236.1                60.9   66.869420  4.095885   
89           3129.9         3329.4                79.7  328.147428  4.017909   

           CO2       PM 2  Pressure cylinder  feasible   safe  broader_safe  
70   36.291467  16.283255          60.497930      True  False         False  
89  163.080520   4.692198         160.047375      True  False         False  


In [66]:
# train a model to predict unsafe output
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# split data into training and testing data
X = data[inputs]
y = data['broader_safe']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# count number of False values in y_train and y_test
print("Number of unsafe in train: ",len(y_train[y_train == False]))
print("Number of unsafe in test: ",len(y_test[y_test == False]))

print("Parameters for unsafe data: ")
print("PM 1: ", unsafe_params[0])
print("PM 2: ", unsafe_params[1])
print("Pressure cylinder: ", unsafe_params[2])

# train a random forest classifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

# predict on test data
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Number of unsafe in train:  10
Number of unsafe in test:  2
Parameters for unsafe data: 
PM 1:  5
PM 2:  13
Pressure cylinder:  130
Accuracy: 0.9
