In [1]:
#!/bin/python3
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt, warnings, os.path, pickle
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from datetime import date
import joblib
from sklearn.inspection import permutation_importance

warnings.filterwarnings("ignore")

In [2]:
### data import

df = pd.read_csv("dataset/e117-2443.csv")

In [3]:
df['sfc'].value_counts()



[]                                                                                                  4880
['S1164061012012304430', 'S1164061012022304430', 'S1164062012023300005', 'S1164062012013300005']       2
['S1155695031703300005', 'S1155696031692304430', 'S1155695031693300005', 'S1155696031702304430']       2
['S1164061007412304430', 'S1164062007413300005', 'S1164061007422304430', 'S1164062007423300005']       2
['S1158701028392304430', 'S1158700028393300005', 'S1158701028402304430', 'S1158700028403300005']       1
                                                                                                    ... 
['S1164062003033300005', 'S1164062003043300005', 'S1164061003032304430', 'S1164061003042304430']       1
['S1161728026913300005', 'S1161729026912304430', 'S1161729026922304430', 'S1161728026923300005']       1
['S1094213008873300005', 'S1094214008882304430', 'S1094214008872304430', 'S1094213008883300005']       1
['S1112828003303300005', 'S1112828003293300005', 'S1112

#Data prep

In [4]:
## Convert reason(injection defect) data to 0(OK) and 1 (NOK)
df['sfc'] = df['sfc'].replace(['[]'],1)

df['reason'] = df['reason'].replace(['[]'],0)
df['reason'][df['reason'] != 0] = 1
df['reason'][df['sfc'] == 1] = 1
df = df.astype({"reason": int})

df['stsMachine'] = df['stsMachine'].replace(['0M000'],np.nan)
df['stsMachine'] = df['stsMachine'].replace(['0S000'],np.nan)
df['stsMachine'] = df['stsMachine'].replace(['0U000'],np.nan)

df = df.dropna()


In [21]:
df['reason'].value_counts()

0    16554
1     6385
Name: reason, dtype: int64

In [6]:
drop_list = ['Unnamed: 0', 'date', 'cntCycle','stsMachine','timestamp','sfc','decMold','prsInjectionHyd1','prsTransferHyd1','strCushion1','strPlasticisation1','strTransfer1']
df = df.drop(drop_list, axis=1)

In [28]:
print(df[2000:2001].values.tolist())

[[24.12, 294.0, 88.0, 295.0, 295.0, 295.0, 62.5, 0.0, 292.0, 0.0, 40.0, 303.0, 295.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 2.35, 290.0, 280.0, 59.0, 255.0, 270.0, 0.0, 260.0, 0.0, 2.47, 1.97, 0.0, 0.0, 0.0, 0.0, 836.0, 249.0, 47.44, 4600.0, 15.06, 0.0]]


In [7]:
y = df['reason']
#X = df[['timCool1','tmpFlange1', 'spdInjection1', 'timMoldClose', 'timFill1', 'prsHoldSpec1', 'timCycle', 'frcClamp','timPlasticisation1']]
X = df.drop(['reason'], axis=1)




#Oversampling

In [None]:
#Oversampling
ros = RandomOverSampler(random_state=0)
X_resampled, y_resampled = ros.fit_resample(X, y)
X = X_resampled
y = y_resampled

#Scaler

In [9]:
#Scaler
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
X = pd.DataFrame(X)
joblib.dump(scaler, "/home/farplas/Desktop/Farplas/Injection-machine-parameter-optimization/e117/scaler/Scaler_2443_"+str(date.today())+".joblib")


['/home/farplas/Desktop/Farplas/Injection-machine-parameter-optimization/e117/scaler/Scaler_2443_2021-08-23.joblib']

In [10]:
#Train
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
#Random Forest
rf2=RandomForestClassifier(n_estimators=100, criterion='entropy',random_state=3)
model = rf2.fit(X_train,y_train)


In [14]:
list(df.columns)

['timCool1',
 'tmpMoldZone3',
 'tmpFlange1',
 'tmpMoldZone4',
 'tmpMoldZone1',
 'tmpMoldZone2',
 'spdInjection1',
 'tmpMoldZone9',
 'tmpMoldZone7',
 'tmpMoldZone8',
 'tmpOil',
 'tmpMoldZone5',
 'tmpMoldZone6',
 'tmpMoldZone19',
 'tmpMoldZone18',
 'tmpMoldZone15',
 'volTransfer1',
 'tmpMoldZone14',
 'tmpMoldZone17',
 'tmpMoldZone16',
 'timTransfer1',
 'timMoldClose',
 'tmpBarrel1Zone5',
 'tmpBarrel1Zone4',
 'prsPomp1',
 'tmpBarrel1Zone1',
 'tmpBarrel1Zone3',
 'tmpMoldZone20',
 'tmpBarrel1Zone2',
 'volShot1',
 'timFill1',
 'timMoldOpen',
 'tmpMoldZone11',
 'tmpMoldZone10',
 'tmpMoldZone13',
 'tmpMoldZone12',
 'prsHoldSpec1',
 'tmpNozle1',
 'timCycle',
 'frcClamp',
 'timPlasticisation1',
 'reason']

In [12]:
'''
#Importance of features
feature_names = df.columns[:41]
result = permutation_importance(model, X_train, y_train, n_repeats = 10, random_state = 42, n_jobs = 2)
forest_importances = pd.Series(result.importances_mean, index = feature_names)
print(forest_importances)

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
forest_importances.plot.bar(yerr=result.importances_std,ax=ax)
fig.tight_layout()
plt.show()
'''

'\n#Importance of features\nfeature_names = df.columns[:41]\nresult = permutation_importance(model, X_train, y_train, n_repeats = 10, random_state = 42, n_jobs = 2)\nforest_importances = pd.Series(result.importances_mean, index = feature_names)\nprint(forest_importances)\n\nimport matplotlib.pyplot as plt\nfig, ax = plt.subplots()\nforest_importances.plot.bar(yerr=result.importances_std,ax=ax)\nfig.tight_layout()\nplt.show()\n'

In [13]:
#with out oversamp   timCool1,tmpFlange1, spdInjection1, timMoldCLose, timFill, prsHoldSpec1, timCycle, frcClamp,timPlasticisation1
#save model
joblib.dump(model,"/home/farplas/Desktop/Farplas/Injection-machine-parameter-optimization/e117/model/model_2443_"+str(date.today())+".joblib")


['/home/farplas/Desktop/Farplas/Injection-machine-parameter-optimization/e117/model/model_2443_2021-08-23.joblib']

In [29]:
df.to_csv("e117-2443_edit")