In [2]:
# 1. Import libraries
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

# 2. Load data
df = pd.read_csv('../data/synthetic_data.csv')

# 3. Data exploration and visualization
df.head()

# Plot temperature over time
plt.figure(figsize=(10, 6))
plt.plot(pd.to_datetime(df['timestamp']), df['temperature'], label='Temperature')
plt.xlabel('Time')
plt.ylabel('Temperature')
plt.title('Temperature over Time')
plt.legend()
plt.show()

# 4. Feature engineering: Create rolling means
df['temperature_mean_24h'] = df['temperature'].rolling(24).mean().fillna(method='bfill')
df['vibration_mean_24h'] = df['vibration'].rolling(24).mean().fillna(method='bfill')
df['pressure_mean_24h'] = df['pressure'].rolling(24).mean().fillna(method='bfill')

# 5. Prepare features and labels
features = ['temperature_mean_24h', 'vibration_mean_24h', 'pressure_mean_24h']
X = df[features]
y = df['label']

# 6. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

# 7. Train XGBoost model
model = XGBClassifier()
model.fit(X_train, y_train)

# 8. Evaluate model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# 9. Save the model
joblib.dump(model, '../models/xgboost_model.pkl')


ModuleNotFoundError: No module named 'xgboost'