### Importing required libraries

In [100]:
import numpy as np
import pandas as pd
import statistics
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### Importing the dataset

In [14]:
## 600 control chart data synthetically generated
## https://archive.ics.uci.edu/ml/datasets/Synthetic%2BControl%2BChart%2BTime%2BSeries
ex = pd.read_csv('./assets/synthetic_control_data.csv', delim_whitespace=True, header=None)
ex.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,28.7812,34.4632,31.3381,31.2834,28.9207,33.7596,25.3969,27.7849,35.2479,27.1159,...,31.4333,24.5556,33.7431,25.0466,34.9318,34.9879,32.4721,33.3759,25.4652,25.8717
1,24.8923,25.741,27.5532,32.8217,27.8789,31.5926,31.4861,35.5469,27.9516,31.6595,...,32.5577,31.0205,26.6418,28.4331,33.6564,26.4244,28.4661,34.2484,32.1005,26.691
2,31.3987,30.6316,26.3983,24.2905,27.8613,28.5491,24.9717,32.4358,25.2239,27.3068,...,33.6318,26.5966,25.5387,32.5434,25.5772,29.9897,31.351,33.9002,29.5446,29.343
3,25.774,30.5262,35.4209,25.6033,27.97,25.2702,28.132,29.4268,31.4549,27.32,...,34.6292,28.7261,28.2979,31.5787,34.6156,32.5492,30.9827,24.8938,27.3659,25.3069
4,27.1798,29.2498,33.6928,25.6264,24.6555,28.9446,35.798,34.9446,24.5596,34.2366,...,27.7218,27.9601,35.7198,27.576,35.3375,29.9993,34.2149,33.1276,31.1057,31.0179


### Setting features

In [15]:
%%capture --no-display
xdf = pd.DataFrame(columns=['mean', 'std', 'skew', 'kurt', 'label'])
xdf['mean'] = np.mean(ex, axis=1)
xdf['std'] = np.std(ex, axis=1)
xdf['skew'] = ex.skew(axis=1)
xdf['kurt'] = ex.kurt(axis=1)
xdf['label'].iloc[0:100] = 'normal'
xdf['label'].iloc[100:201] = 'cyclic'
xdf['label'].iloc[201:301] = 'increasing_trend'
xdf['label'].iloc[301:401] = 'decreasing_trend'
xdf['label'].iloc[401:501] = 'upward_shift'
xdf['label'].iloc[501:] = 'downward_shift'

In [16]:
X = np.asarray(xdf[['mean', 'std', 'skew', 'kurt']])
y = np.asarray(xdf['label'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

### Training and Testing

In [91]:
## Support Vector Machines (SVM)
classifier = svm.SVC(kernel='linear', gamma='auto', C=2)
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)
print(classification_report(y_test, y_predict))

                  precision    recall  f1-score   support

          cyclic       1.00      1.00      1.00        15
decreasing_trend       0.93      1.00      0.96        25
  downward_shift       1.00      0.94      0.97        18
increasing_trend       0.91      0.91      0.91        22
          normal       1.00      1.00      1.00        17
    upward_shift       0.95      0.91      0.93        23

        accuracy                           0.96       120
       macro avg       0.96      0.96      0.96       120
    weighted avg       0.96      0.96      0.96       120



In [63]:
## more models to do
## --> logistic regression
## --> k nearest neighbours
## --> random forrest 

In [146]:
test_data = pd.read_excel('./assets/test_data_amtek_auto.xlsx', sheet_name="cylinder_block_depth")
test_data = test_data.drop(['Sample'], axis=1)
test_data.columns = pd.RangeIndex(0, len(test_data.columns)) 
test_data['mean_0'] = np.mean(test_data, axis=1) 
test_data = test_data.drop([0, 1, 2, 3], axis=1)
test_df = pd.DataFrame(columns=['mean', 'std', 'skew', 'kurt'])
test_df['mean'] = np.mean(test_data, axis=0)
test_df['std'] = np.std(test_data, axis=0)
test_df['skew'] = test_data.skew(axis=0)
test_df['kurt'] = test_data.kurt(axis=0)
test_df= test_df.to_numpy()
test_predict=classifier.predict(test_df)
print(test_predict[0])

decreasing_trend


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
