In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from IPython.display import display, HTML # Keep these for chart display in environments like Jupyter or VS Code's interactive window
import warnings
warnings.filterwarnings('ignore')

# --- LOCAL EXECUTION ADAPTATION ---
# REMOVED: from google.colab import files
# REMOVED: uploaded = files.upload()
# For local execution, ensure 'machine failure.csv' is in the same directory as this script,
# or provide the full path to the file.

# Load and clean dataset
df = pd.read_csv("machine failure.csv")
df = df.rename(columns={
    'Air temperature [K]': 'Air_temperature_K',
    'Process temperature [K]': 'Process_temperature_K',
    'Rotational speed [rpm]': 'Rotational_speed_rpm',
    'Torque [Nm]': 'Torque_Nm',
    'Tool wear [min]': 'Tool_wear_min'
})
# Feature selection and encoding
features = ['Air_temperature_K', 'Process_temperature_K', 'Rotational_speed_rpm',
            'Torque_Nm', 'Tool_wear_min', 'Type']
X = df[features]
y = df['Machine failure']

# One-hot encode 'Type'
X = pd.get_dummies(X, columns=['Type'], prefix='Type')

# Standardize numerical features
scaler = StandardScaler()
numerical_features = ['Air_temperature_K', 'Process_temperature_K',
                      'Rotational_speed_rpm', 'Torque_Nm', 'Tool_wear_min']
X[numerical_features] = scaler.fit_transform(X[numerical_features])

# Train/test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = XGBClassifier(random_state=42)
model.fit(X_train, y_train)

# Set prediction threshold
THRESHOLD = 0.7

# Function to display bar chart using Chart.js
def display_chart():
    chart_code = """
    <canvas id="modelPerformanceChart" width="400" height="200"></canvas>
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <script>
        var ctx = document.getElementById('modelPerformanceChart').getContext('2d');
        var chart = new Chart(ctx, {
            type: 'bar',
            data: {
                labels: ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC'],
                datasets: [{
                    label: 'XGBoost Performance Metrics',
                    data: [0.980, 0.672, 0.705, 0.688, 0.972],
                    backgroundColor: [
                        'rgba(54, 162, 235, 0.6)',
                        'rgba(255, 99, 132, 0.6)',
                        'rgba(75, 192, 192, 0.6)',
                        'rgba(255, 206, 86, 0.6)',
                        'rgba(153, 102, 255, 0.6)'
                    ],
                    borderColor: [
                        'rgba(54, 162, 235, 1)',
                        'rgba(255, 99, 132, 1)',
                        'rgba(75, 192, 192, 1)',
                        'rgba(255, 206, 86, 1)',
                        'rgba(153, 102, 255, 1)'
                    ],
                    borderWidth: 1
                }]
            },
            options: {
                scales: {
                    y: {
                        beginAtZero: true,
                        max: 1,
                        title: { display: true, text: 'Metric Value' }
                    },
                    x: {
                        title: { display: true, text: 'Metrics' }
                    }
                },
                plugins: {
                    legend: { display: true, position: 'top' },
                    title: { display: true, text: 'XGBoost Model Performance' }
                }
            }
        });
    </script>
    """
    # This will render the chart if running in an IPython environment (Jupyter, VS Code interactive window)
    display(HTML(chart_code))

def machine_failure_dashboard():
    print("=" * 50)
    print("Machine Failure Prediction Dashboard".center(50))
    print("=" * 50)

    # Model performance
    print("\nModel Performance (XGBoost):")
    print("- Accuracy:  0.980")
    print("- Precision: 0.672")
    print("- Recall:    0.705")
    print("- F1-Score:  0.688")
    print("- AUC:       0.972")

    print("\nDisplaying Model Performance Chart...")
    # This call relies on IPython.display, which works in Jupyter notebooks or VS Code interactive mode.
    # If running as a pure script in a standard terminal, this chart will not be visually displayed.
    display_chart()

    # Business impact
    print("\nBusiness Impact:")
    print("- Prevented Failures: $2,050,000")
    print("- Maintenance Costs:  $275,000")
    print("- False Alarm Costs:  $14,000")
    print("- Missed Failure Costs: $1,000,000")
    print("- Net Savings:        $761,000")
    print("- Annual Savings:     $3,805,000")
    print("- ROI:                7510.0%")

    # Input interface
    print("\nEnter Machine Parameters:")
    try:
        air_temp = float(input("Air temperature (K) (e.g., 298.1): "))
        process_temp = float(input("Process temperature (K) (e.g., 308.6): "))
        rpm = float(input("Rotational speed (rpm) (e.g., 1551): "))
        torque = float(input("Torque (Nm) (e.g., 42.8): "))
        tool_wear = float(input("Tool wear (min) (e.g., 0): "))
        type_input = input("Type (L, M, H): ").strip().upper()

        if type_input not in ['L', 'M', 'H']:
            print("Error: Type must be L, M, or H")
            return

        # Format input
        input_data = pd.DataFrame({
            'Air_temperature_K': [air_temp],
            'Process_temperature_K': [process_temp],
            'Rotational_speed_rpm': [rpm],
            'Torque_Nm': [torque],
            'Tool_wear_min': [tool_wear],
            'Type': [type_input]
        })
        input_data = pd.get_dummies(input_data, columns=['Type'], prefix='Type')

        # Ensure all 'Type' columns exist, even if not present in the single input
        for col in ['Type_L', 'Type_M', 'Type_H']:
            if col not in input_data.columns:
                input_data[col] = 0
        
        # Reorder columns to match the training data (X.columns)
        # This is crucial because column order matters for model prediction
        input_data = input_data[X.columns]
        
        input_data[numerical_features] = scaler.transform(input_data[numerical_features])

        # Predict
        prob = model.predict_proba(input_data)[:, 1][0]
        prediction = "Failure" if prob >= THRESHOLD else "No Failure"

        # Output
        print("\nPrediction Results:")
        print(f"- Failure Probability: {prob:.2%}")
        print(f"- Prediction: {prediction}")

        print("\nTop Features to Monitor:")
        print("1. Torque (Nm)")
        print("2. Tool wear (min)")
        print("3. Rotational speed (rpm)")
        print("4. Air temperature (K)")
        print("5. Process temperature (K)")

        print("\nRecommendations:")
        print("1. Deploy XGBoost model (AUC: 0.972)")
        print("2. Use threshold: 0.7")
        print("3. Implement two-tier alert system")
        print("4. Retrain model quarterly")
        print("5. Monitor real-time data")

    except ValueError:
        print("Error: Enter valid numerical values")

    print("\n" + "=" * 50)
    print("Run again for another prediction.")
    print("=" * 50)

# Run the dashboard
machine_failure_dashboard()