In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns


file_paths = {
    'maintenance_schedule': '/mnt/data/Maintenance_Schedule.xlsx',
    'machine_usage': '/mnt/data/Machine_Usage.xlsx',
    'data1': '/mnt/data/Data_1.xlsx',
    'failure_logs': '/mnt/data/Machine_Failure_Logs.xlsx'
}

try:
    data_maintenance = pd.read_excel(file_paths['maintenance_schedule'])
    data_usage = pd.read_excel(file_paths['machine_usage'])
    data1 = pd.read_excel(file_paths['data1'])
    data_failures = pd.read_excel(file_paths['failure_logs'])
except FileNotFoundError as e:
    print(f"Error: {e}")
    raise

print("Initial Data Shapes:")
print("Maintenance Schedule:", data_maintenance.shape)
print("Machine Usage:", data_usage.shape)
print("Data 1:", data1.shape)
print("Failure Logs:", data_failures.shape)


merged_data = pd.merge(data_usage, data_failures, on='MachineID', how='inner')
merged_data = pd.merge(merged_data, data_maintenance, on='MachineID', how='inner')
merged_data = pd.merge(merged_data, data1, on='MachineID', how='inner')


merged_data.fillna(method='ffill', inplace=True)
print("Merged Data Shape:", merged_data.shape)


merged_data['FailureFlag'] = merged_data['FailureCount'] > 0

X = merged_data.drop(columns=['FailureFlag', 'MachineID'])
y = merged_data['FailureFlag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


feature_importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

sns.barplot(x='Importance', y='Feature', data=feature_importances)
plt.title('Feature Importance')
plt.show()


print("Prepare data for Power BI...")
merged_data.to_csv('processed_data_for_powerbi.csv', index=False)


