In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
from src.data.data_loader import DataLoader
from src.data.data_preprocessor import DataPreprocessor
from src.features.feature_engineering import FeatureEngineer
from src.features.feature_selection import FeatureSelector
from src.models.ensemble_model import EnsembleModel
from src.models.time_series_models import TimeSeriesModels
from src.utils.anomaly_detection import AnomalyDetector
from src.utils.causality_analysis import granger_causality
from src.utils.model_interpretation import ModelInterpreter
from src.utils.online_learning import ConceptDriftDetector
from src.models.model_evaluator import ModelEvaluator
from src.utils.visualization import plot_feature_importance, plot_actual_vs_predicted

In [None]:
# 1. Data Loading
print("Loading data...")
data_loader = DataLoader()
raw_data = data_loader.load_data(['google_ads.csv', 'meta_ads.csv', 'microsoft_ads.csv'])


In [None]:
# 2. Data Preprocessing
print("Preprocessing data...")
preprocessor = DataPreprocessor()
processed_data = preprocessor.preprocess(raw_data)

In [None]:
# 3. Feature Engineering
print("Engineering features...")
feature_engineer = FeatureEngineer()
featured_data = feature_engineer.engineer_features(processed_data)

In [None]:
# 4. Exploratory Data Analysis
print("Performing exploratory data analysis...")
plt.figure(figsize=(12, 6))
sns.heatmap(featured_data.corr(), annot=False, cmap='coolwarm')
plt.title('Correlation Heatmap of Features')
plt.show()

In [None]:
# 5. Feature Selection
print("Selecting features...")
feature_selector = FeatureSelector()
X = featured_data.drop(['ROAS', 'Date'], axis=1)
y = featured_data['ROAS']
X_selected = feature_selector.select_features(X, y)
selected_features = X.columns[feature_selector.get_selected_features()]

print(f"Selected features: {selected_features}")

In [None]:
# 6. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, shuffle=False)

In [None]:
# 7. Model Training
print("Training ensemble model...")
ensemble_model = EnsembleModel()
ensemble_model.fit(X_train, y_train)

In [None]:
# 8. Model Evaluation
print("Evaluating model...")
evaluator = ModelEvaluator()
y_pred = ensemble_model.predict(X_test)
metrics = evaluator.evaluate_model(y_test, y_pred)
print("Model Performance:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

In [None]:
# 9. Model Interpretation
print("Interpreting model...")
interpreter = ModelInterpreter()
shap_values = interpreter.explain_model(ensemble_model.best_xgb, X_test)
interpreter.plot_shap_summary(shap_values, X_test)

In [None]:
# 10. Time Series Forecasting
print("Performing time series forecasting...")
ts_models = TimeSeriesModels()
last_date = featured_data['Date'].max()
future_dates = pd.date_range(start=last_date, periods=30)
sarima_forecast = ts_models.sarima_forecast(y)
prophet_forecast = ts_models.prophet_forecast(pd.DataFrame({'ds': featured_data['Date'], 'y': y}))

plt.figure(figsize=(12, 6))
plt.plot(featured_data['Date'], y, label='Actual')
plt.plot(future_dates, sarima_forecast, label='SARIMA Forecast')
plt.plot(prophet_forecast['ds'], prophet_forecast['yhat'], label='Prophet Forecast')
plt.title('ROAS Forecasting')
plt.xlabel('Date')
plt.ylabel('ROAS')
plt.legend()
plt.show()


In [None]:
# 11. Anomaly Detection
print("Detecting anomalies...")
anomaly_detector = AnomalyDetector()
outliers = anomaly_detector.detect_outliers(X)
plt.figure(figsize=(12, 6))
plt.scatter(featured_data['Date'], y, c=outliers, cmap='viridis')
plt.title('ROAS Anomalies')
plt.xlabel('Date')
plt.ylabel('ROAS')
plt.colorbar(label='Anomaly Score')
plt.show()

In [None]:
# 12. Causality Analysis
print("Performing causality analysis...")
granger_results = granger_causality(featured_data, ['ROAS', 'Spend'])
print("Granger Causality Test Results:")
print(granger_results)

In [None]:
# 13. Online Learning and Concept Drift Detection
print("Detecting concept drift...")
drift_detector = ConceptDriftDetector()
drift_points = drift_detector.detect_drift(featured_data[['Date', 'ROAS']])

plt.figure(figsize=(12, 6))
plt.plot(featured_data['Date'], y)
for drift_point in drift_points:
    plt.axvline(x=featured_data['Date'].iloc[drift_point], color='r', linestyle='--')
plt.title('ROAS with Concept Drift Points')
plt.xlabel('Date')
plt.ylabel('ROAS')
plt.show()

In [None]:
# 14. Recommendations
print("\nRecommendations based on the analysis:")
print("1. Focus on optimizing the top features identified by SHAP analysis.")
print("2. Monitor and investigate the detected anomalies in ROAS.")
print("3. Consider the forecasted ROAS when planning future ad spend.")
print("4. Be aware of the detected concept drift points and potentially retrain the model at these points.")
print("5. Adjust strategies based on the causality analysis between ROAS and Spend.")

# 15. Next Steps
print("\nNext steps:")
print("1. Implement real-time monitoring of ad performance metrics.")
print("2. Develop an automated system for adjusting ad spend based on model predictions.")
print("3. Conduct A/B tests to validate the effectiveness of the model's recommendations.")
print("4. Regularly retrain the model with new data to maintain its accuracy.")
print("5. Explore additional external factors that might influence ad performance.")