# # Data import & Preprocessing

In [125]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn as skl

In [126]:
data =pd.read_csv('Makine verileri.csv')
data.head()

Unnamed: 0,machineID,datetime,volt_min_3h,rotate_min_3h,pressure_min_3h,vibration_min_3h,volt_max_3h,rotate_max_3h,pressure_max_3h,vibration_max_3h,...,error3count,error4count,error5count,comp1,comp2,comp3,comp4,model,age,failure
0,1,02-01-15 6:00,158.2714,403.235951,92.439132,32.516838,200.87243,495.777958,96.535487,52.355876,...,0,0,0,20.0,215.0,155.0,170.0,model3,18,none
1,1,02-01-15 9:00,160.528861,384.645962,86.944273,29.527665,197.363124,486.459056,114.342061,42.992509,...,0,0,0,20.125,215.125,155.125,170.125,model3,18,none
2,1,02-01-15 12:00,147.300678,412.965696,90.711354,34.203042,173.394523,439.57946,110.408985,37.117103,...,0,0,0,20.25,215.25,155.25,170.25,model3,18,none
3,1,02-01-15 15:00,152.420775,385.354924,99.506819,30.665184,185.205355,497.84062,105.993247,47.862484,...,0,0,0,20.375,215.375,155.375,170.375,model3,18,none
4,1,02-01-15 18:00,145.248486,424.542633,93.743827,37.422272,180.030715,495.376449,111.950587,43.099758,...,0,0,0,20.5,215.5,155.5,170.5,model3,18,none


In [106]:
# modeli denemek için id si 3 olan makine test için ayrılmıştır.
#data = data[data['machineID'] != 3]

In [127]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14566 entries, 0 to 14565
Data columns (total 46 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   machineID           14566 non-null  int64  
 1   datetime            14566 non-null  object 
 2   volt_min_3h         14566 non-null  float64
 3   rotate_min_3h       14566 non-null  float64
 4   pressure_min_3h     14566 non-null  float64
 5   vibration_min_3h    14566 non-null  float64
 6   volt_max_3h         14566 non-null  float64
 7   rotate_max_3h       14566 non-null  float64
 8   pressure_max_3h     14566 non-null  float64
 9   vibration_max_3h    14566 non-null  float64
 10  volt_mean_3h        14566 non-null  float64
 11  rotate_mean_3h      14566 non-null  float64
 12  pressure_mean_3h    14566 non-null  float64
 13  vibration_mean_3h   14566 non-null  float64
 14  volt_sd_3h          14566 non-null  float64
 15  rotate_sd_3h        14566 non-null  float64
 16  pres

In [128]:
data['datetime'] = pd.to_datetime(data['datetime'],format='%d-%m-%y %H:%M')

In [129]:
# Tarih sütununu dönüştürme
data['datetime'] = pd.to_datetime(data['datetime'])
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data = data.drop(columns=['datetime'])

In [130]:
data_encoded = pd.get_dummies(data, columns=['model'], prefix='model')
data_encoded = pd.get_dummies(data_encoded, columns=['failure'], prefix='failure')

In [131]:
data_encoded.loc[3521, 'failure_comp2'] = 1

In [132]:
data_encoded.drop(index=3522, inplace=True)
data_encoded.reset_index(drop=True, inplace=True)

In [133]:
data_encoded.iloc[3521:3523]

Unnamed: 0,machineID,volt_min_3h,rotate_min_3h,pressure_min_3h,vibration_min_3h,volt_max_3h,rotate_max_3h,pressure_max_3h,vibration_max_3h,volt_mean_3h,...,year,month,day,hour,model_model3,model_model4,failure_comp1,failure_comp2,failure_comp4,failure_none
3521,2,163.674462,360.224008,100.753128,33.515403,205.275428,410.609817,119.943141,47.711374,177.978058,...,2015,3,19,6,0,1,1,1,0,0
3522,2,149.367769,322.38817,107.352289,36.180633,179.277874,509.282578,118.153934,47.415885,169.154494,...,2015,3,19,9,0,1,0,0,0,1


In [134]:
data_encoded['failure_yes'] = 1 - data_encoded['failure_none']

# Feature Engineering:

In [135]:
# Lag sütunlarını oluşturmak için gerekli sütunların listesi ve lag süreleri
columns_to_lag = {
    'rotate_mean_3h': [4],
    'vibration_mean_3h': [2, 4],
    'volt_mean_24h': [8],
    'rotate_min_24h': [1, 8],
    'rotate_max_24h': [1],
    'rotate_mean_24h': [1, 8]
}

# Her machineID grubu için lag sütunlarını oluşturma
for col, lags in columns_to_lag.items():
    for lag in lags:
        lag_col_name = f'{col}_lag{lag}'
        data_encoded[lag_col_name] = data_encoded.groupby('machineID')[col].shift(lag)
        
        # NaN değerlerini orijinal sütundaki değerlerle doldurma
        data_encoded[lag_col_name].fillna(data_encoded[col], inplace=True)

In [136]:
data_encoded['comp1_comp2_product'] = data_encoded['comp1'] * data_encoded['comp2']

from sklearn.preprocessing import PolynomialFeatures
# Polinomiyal özellikler oluşturma
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(data_encoded[['comp1', 'comp2']])
poly_columns = poly.get_feature_names_out(input_features=['comp1', 'comp2'])
poly_df = pd.DataFrame(poly_features, columns=poly_columns)
data_encoded = pd.concat([data_encoded, poly_df], axis=1)

In [137]:
# Özellikleri seçme
features = ['vibration_mean_3h', 'volt_mean_3h'
           ]

# Polynomial features oluşturma
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
poly_features = poly.fit_transform(data_encoded[features])
poly_feature_names = poly.get_feature_names_out(features)

# Yeni özellikleri DataFrame'e ekleme
poly_df = pd.DataFrame(poly_features, columns=poly_feature_names, index=data_encoded.index)
data_encoded = pd.concat([data_encoded, poly_df], axis=1)


In [138]:
# Gereken sütunları seçerek yeni bir DataFrame oluştur
columns_to_select = ['rotate_min_3h', 'vibration_min_3h', 'vibration_max_3h', 'volt_mean_3h',
                     'vibration_mean_3h', 'pressure_sd_3h', 'volt_min_24h', 'vibration_min_24h',
                     'rotate_max_24h', 'volt_mean_24h', 'rotate_mean_24h', 'vibration_mean_24h',
                     'error1count', 'error5count', 'comp1', 'comp2', 'comp4', 'rotate_mean_3h',
                     'rotate_mean_3h_lag4', 'vibration_mean_3h_lag2', 'vibration_mean_3h_lag4',
                     'volt_mean_24h_lag8', 'rotate_min_24h_lag1', 'rotate_min_24h_lag8',
                     'rotate_max_24h_lag1', 'rotate_mean_24h_lag1', 'rotate_mean_24h_lag8',
                     'vibration_mean_3h volt_mean_3h', 'comp1_comp2_product',
                     'comp1 comp2', 'failure_comp1', 'failure_comp2', 'failure_comp4', 'failure_none', 'failure_yes',]

data_selected = data_encoded[columns_to_select]


In [139]:
data_selected.columns

Index(['rotate_min_3h', 'vibration_min_3h', 'vibration_max_3h', 'volt_mean_3h',
       'volt_mean_3h', 'vibration_mean_3h', 'vibration_mean_3h',
       'pressure_sd_3h', 'volt_min_24h', 'vibration_min_24h', 'rotate_max_24h',
       'volt_mean_24h', 'rotate_mean_24h', 'vibration_mean_24h', 'error1count',
       'error5count', 'comp1', 'comp1', 'comp2', 'comp2', 'comp4',
       'rotate_mean_3h', 'rotate_mean_3h_lag4', 'vibration_mean_3h_lag2',
       'vibration_mean_3h_lag4', 'volt_mean_24h_lag8', 'rotate_min_24h_lag1',
       'rotate_min_24h_lag8', 'rotate_max_24h_lag1', 'rotate_mean_24h_lag1',
       'rotate_mean_24h_lag8', 'vibration_mean_3h volt_mean_3h',
       'comp1_comp2_product', 'comp1 comp2', 'failure_comp1', 'failure_comp2',
       'failure_comp4', 'failure_none', 'failure_yes'],
      dtype='object')

In [140]:
data_selected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14565 entries, 0 to 14564
Data columns (total 39 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   rotate_min_3h                   14565 non-null  float64
 1   vibration_min_3h                14565 non-null  float64
 2   vibration_max_3h                14565 non-null  float64
 3   volt_mean_3h                    14565 non-null  float64
 4   volt_mean_3h                    14565 non-null  float64
 5   vibration_mean_3h               14565 non-null  float64
 6   vibration_mean_3h               14565 non-null  float64
 7   pressure_sd_3h                  14565 non-null  float64
 8   volt_min_24h                    14565 non-null  float64
 9   vibration_min_24h               14565 non-null  float64
 10  rotate_max_24h                  14565 non-null  float64
 11  volt_mean_24h                   14565 non-null  float64
 12  rotate_mean_24h                 

In [141]:
data_selected.to_csv('selected_data.csv', index=False)

scale

In [142]:
from sklearn.preprocessing import MinMaxScaler
# MinMaxScaler objesini oluşturun
scaler = MinMaxScaler()

data_scaled = pd.DataFrame(scaler.fit_transform(data_selected), columns=data_selected.columns)

# Ölçeklendirilmiş veriyi yeni bir DataFrame olarak saklamak için
data_scaled_df = pd.DataFrame(data_scaled, columns=data_selected.columns)

data_scaled_df.head()

Unnamed: 0,rotate_min_3h,vibration_min_3h,vibration_max_3h,volt_mean_3h,volt_mean_3h.1,vibration_mean_3h,vibration_mean_3h.1,pressure_sd_3h,volt_min_24h,vibration_min_24h,...,rotate_mean_24h_lag1,rotate_mean_24h_lag8,vibration_mean_3h volt_mean_3h,comp1_comp2_product,comp1 comp2,failure_comp1,failure_comp2,failure_comp4,failure_none,failure_yes
0,0.647742,0.302892,0.531161,0.556252,0.556252,0.347207,0.347207,0.069618,0.649723,0.179161,...,0.772176,0.772176,0.439346,0.072203,0.072203,0.0,0.0,0.0,1.0,0.0
1,0.595184,0.230814,0.287412,0.510424,0.510424,0.184691,0.184691,0.460242,0.649723,0.179161,...,0.772176,0.778381,0.261223,0.072696,0.072696,0.0,0.0,0.0,1.0,0.0
2,0.67525,0.343551,0.134463,0.316153,0.316153,0.184364,0.184364,0.332845,0.598461,0.179161,...,0.778381,0.781756,0.168946,0.07319,0.07319,0.0,0.0,0.0,1.0,0.0
3,0.597189,0.258243,0.414188,0.438795,0.438795,0.31532,0.31532,0.108838,0.598461,0.284168,...,0.781756,0.767221,0.34629,0.073685,0.073685,0.0,0.0,0.0,1.0,0.0
4,0.707981,0.421176,0.290204,0.351157,0.351157,0.328414,0.328414,0.303253,0.598461,0.284168,...,0.767221,0.789249,0.311181,0.07418,0.07418,0.0,0.0,0.0,1.0,0.0


In [143]:
from sklearn.model_selection import train_test_split
# Hedef ve özellikleri ayırma
X = data_scaled.drop(columns=['failure_yes','failure_comp1', 'failure_comp2', 'failure_comp4', 'failure_none'])  # Özellikler
y = data_scaled[['failure_comp1', 'failure_comp2', 'failure_comp4', 'failure_none']]  # Hedef

# Eğitim ve test setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

GradientBoosting Sonuçlar

In [94]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_score

# Model parametreleri
params = {
    'failure_comp1': {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200},
    'failure_comp2': {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50},
    'failure_comp4': {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50},
    'failure_none': {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 100}
}

# Performans metrikleri hesaplama fonksiyonu
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)
    
    return accuracy, precision, recall, f1, cm

# Her hedef sütun için model eğitme ve değerlendirme
results = {}
for target in y.columns:
    # Model oluşturma ve eğitme
    model = GradientBoostingClassifier(**params[target])
    model.fit(X_train, y_train[target])
    
    # Tahminler yapma
    y_pred = model.predict(X_test)
    
    # Performans metriklerini hesaplama
    accuracy, precision, recall, f1, cm = evaluate_model(y_test[target], y_pred)
    
    # Cross-validation
    cv_scores = cross_val_score(model, X, y[target], cv=5, scoring='accuracy')
    
    # Sonuçları saklama
    results[target] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Confusion Matrix': cm,
        'CV Mean Accuracy': cv_scores.mean(),
        'CV Std Accuracy': cv_scores.std(),
        'CV Scores': cv_scores,
        'Best Params': params[target]
    }

# Sonuçları yazdırma
for target, metrics in results.items():
    print(f"Results for {target}:")
    print(f"Accuracy: {metrics['Accuracy']}")
    print(f"Precision: {metrics['Precision']}")
    print(f"Recall: {metrics['Recall']}")
    print(f"F1 Score: {metrics['F1 Score']}")
    print(f"Confusion Matrix:\n{metrics['Confusion Matrix']}")
    print(f"CV Mean Accuracy: {metrics['CV Mean Accuracy']}")
    print(f"CV Std Accuracy: {metrics['CV Std Accuracy']}")
    print(f"CV Scores: {metrics['CV Scores']}")
    print(f"Best Params: {metrics['Best Params']}\n")




Results for failure_comp1:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[4368    0]
 [   0    2]]
CV Mean Accuracy: 1.0
CV Std Accuracy: 0.0
CV Scores: [1. 1. 1. 1. 1.]
Best Params: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200}

Results for failure_comp2:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[4365    0]
 [   0    5]]
CV Mean Accuracy: 0.9999313422588397
CV Std Accuracy: 0.0001373154823206324
CV Scores: [1.         1.         0.99965671 1.         1.        ]
Best Params: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50}

Results for failure_comp4:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[4369    0]
 [   0    1]]
CV Mean Accuracy: 0.9416409200137317
CV Std Accuracy: 0.11637497253179623
CV Scores: [0.70889118 1.         1.         0.99965671 0.99965671]
Best Params: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}

Results for failure_none:
Accuracy: 0.99

BaggingClassifier Sonuçlar

In [95]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Model parametreleri
params = {
    'failure_comp1': {'max_samples': 0.7, 'n_estimators': 50},
    'failure_comp2': {'max_samples': 0.5, 'n_estimators': 50},
    'failure_comp4': {'max_samples': 0.5, 'n_estimators': 50},
    'failure_none': {'max_samples': 1.0, 'n_estimators': 50}
}

# Performans metrikleri hesaplama fonksiyonu
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)
    
    return accuracy, precision, recall, f1, cm

# Her hedef sütun için model eğitme ve değerlendirme
results = {}
for target in y.columns:
    # Model oluşturma ve eğitme
    model = BaggingClassifier(base_estimator=DecisionTreeClassifier(), **params[target])
    model.fit(X_train, y_train[target])
    
    # Tahminler yapma
    y_pred = model.predict(X_test)
    
    # Performans metriklerini hesaplama
    accuracy, precision, recall, f1, cm = evaluate_model(y_test[target], y_pred)
    
    # Cross-validation
    cv_scores = cross_val_score(model, X, y[target], cv=5, scoring='accuracy')
    
    # Sonuçları saklama
    results[target] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Confusion Matrix': cm,
        'CV Mean Accuracy': cv_scores.mean(),
        'CV Std Accuracy': cv_scores.std(),
        'CV Scores': cv_scores,
        'Best Params': params[target]
    }

# Sonuçları yazdırma
for target, metrics in results.items():
    print(f"Results for {target}:")
    print(f"Accuracy: {metrics['Accuracy']}")
    print(f"Precision: {metrics['Precision']}")
    print(f"Recall: {metrics['Recall']}")
    print(f"F1 Score: {metrics['F1 Score']}")
    print(f"Confusion Matrix:\n{metrics['Confusion Matrix']}")
    print(f"CV Mean Accuracy: {metrics['CV Mean Accuracy']}")
    print(f"CV Std Accuracy: {metrics['CV Std Accuracy']}")
    print(f"CV Scores: {metrics['CV Scores']}")
    print(f"Best Params: {metrics['Best Params']}\n")

  _warn_prf(average, modifier, msg_start, len(result))


Results for failure_comp1:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[4368    0]
 [   0    2]]
CV Mean Accuracy: 1.0
CV Std Accuracy: 0.0
CV Scores: [1. 1. 1. 1. 1.]
Best Params: {'max_samples': 0.7, 'n_estimators': 50}

Results for failure_comp2:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
[[4365    0]
 [   0    5]]
CV Mean Accuracy: 0.9999313422588397
CV Std Accuracy: 0.0001373154823206324
CV Scores: [1.         1.         1.         1.         0.99965671]
Best Params: {'max_samples': 0.5, 'n_estimators': 50}

Results for failure_comp4:
Accuracy: 0.9997711670480549
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
Confusion Matrix:
[[4369    0]
 [   1    0]]
CV Mean Accuracy: 0.9997253690353588
CV Std Accuracy: 0.0001373154823206324
CV Scores: [1.         0.99965671 0.99965671 0.99965671 0.99965671]
Best Params: {'max_samples': 0.5, 'n_estimators': 50}

Results for failure_none:
Accuracy: 0.9997711670480549
Precision: 1.0
Recall: 0.

failure_none : bagging
failure_comp1: Both
failure_comp2: Both
failure_comp4: Gradient_Boosting

In [None]:
ML modellerini eğitip kaydetmek:

In [144]:
import joblib
from sklearn.ensemble import GradientBoostingClassifier, BaggingClassifier

# Function to train models and save them
def train_and_save_models(X_train, X_test, y_train, params):
    trained_models = {}

    # Gradient Boosting models
    gb_params_comp1 = params.get('failure_comp1_gb', {})
    gb_params_comp2 = params.get('failure_comp2_gb', {})
    gb_params_comp4 = params.get('failure_comp4_gb', {})

    gb_model_comp1 = GradientBoostingClassifier(**gb_params_comp1)
    gb_model_comp2 = GradientBoostingClassifier(**gb_params_comp2)
    gb_model_comp4 = GradientBoostingClassifier(**gb_params_comp4)

    gb_model_comp1.fit(X_train, y_train['failure_comp1'])
    gb_model_comp2.fit(X_train, y_train['failure_comp2'])
    gb_model_comp4.fit(X_train, y_train['failure_comp4'])

    trained_models['gb_model_failure_comp1'] = gb_model_comp1
    trained_models['gb_model_failure_comp2'] = gb_model_comp2
    trained_models['gb_model_failure_comp4'] = gb_model_comp4

    # Bagging models
    bagging_params_comp1 = params.get('failure_comp1_bagging', {})
    bagging_params_comp2 = params.get('failure_comp2_bagging', {})
    bagging_params_none = params.get('failure_none_bagging', {})

    bagging_model_comp1 = BaggingClassifier(**bagging_params_comp1)
    bagging_model_comp2 = BaggingClassifier(**bagging_params_comp2)
    bagging_model_none = BaggingClassifier(**bagging_params_none)

    bagging_model_comp1.fit(X_train, y_train['failure_comp1'])
    bagging_model_comp2.fit(X_train, y_train['failure_comp2'])
    bagging_model_none.fit(X_train, y_train['failure_none'])

    trained_models['bagging_model_failure_comp1'] = bagging_model_comp1
    trained_models['bagging_model_failure_comp2'] = bagging_model_comp2
    trained_models['bagging_model_failure_none'] = bagging_model_none

    return trained_models


# Model parameters
params = {
    'failure_comp1_gb': {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200},
    'failure_comp2_gb': {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50},
    'failure_comp4_gb': {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50},
    'failure_comp1_bagging': {'max_samples': 0.7, 'n_estimators': 50},
    'failure_comp2_bagging': {'max_samples': 0.5, 'n_estimators': 50},
    'failure_none_bagging': {'max_samples': 1.0, 'n_estimators': 50},
}

# Train models
trained_models = train_and_save_models(X_train, X_test, y_train, params)

# Save models to files
for model_name, model in trained_models.items():
    joblib.dump(model, f'{model_name}.joblib')


Kontrol

In [145]:
import joblib

# Load the model from file
model_path = 'gb_model_failure_comp1.joblib'
gb_model_failure_comp1 = joblib.load(model_path)

# Assuming X_test is defined earlier in your notebook
# Make predictions
predictions = gb_model_failure_comp1.predict(X_test)

# Example: Print first 10 predictions
print(predictions[:10])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
