In [None]:
import pandas as pd
import numpy as np
import os
from data_loader import load_raw_data
from data_preprocessing import preprocess_and_merge_data
from feature_engineering import perform_feature_engineering, split_data
from model_training import train_sentiment_models, run_clustering, train_predictive_model
from analysis import analyze_clusters, analyze_feature_importance

# --- 1. Load and Preprocess Data ---
print("--- STEP 1 & 2: Data Loading, Preprocessing, and Feature Engineering ---")
review_df, metadata_df = load_raw_data()
final_df = preprocess_and_merge_data(review_df, metadata_df)
X, y_sentiment, y_category, vectorizer = perform_feature_engineering(final_df)
X_train, X_test, y_train, y_test, X_train_pred, X_test_pred, y_train_cat, y_test_cat = split_data(X, y_sentiment, y_category)

# --- 2. Model Training and Evaluation ---
print("\n--- STEP 3.1: Sentiment Classification ---")
sentiment_results = train_sentiment_models(X_train, X_test, y_train, y_test)
lr_model = sentiment_results['Logistic Regression']['Model']

print("\n--- STEP 3.2: Customer Segmentation (K-Means) ---")
kmeans_model, final_df = run_clustering(X, final_df, K=5)

print("\n--- STEP 3.3: Predictive Modeling (MLP) ---")
mlp_model, mlp_accuracy = train_predictive_model(X_train_pred, X_test_pred, y_train_cat, y_test_cat)


# --- 3. Final Analysis and Interpretation ---
print("\n--- STEP 4: Results Analysis and Interpretation ---")

# a) Analyze K-Means Clusters
analyze_clusters(kmeans_model, vectorizer, final_df, K=5)

# b) Analyze Logistic Regression Feature Importance
analyze_feature_importance(lr_model, X_train, y_train, vectorizer)

print("\n\nâœ… Sentiment Analysis METHODOLOGY EXECUTION COMPLETE.")