In [None]:
import pandas as pd
import os
from data_cleaner import load_data, preprocess_data, CSV_PATH
from analysis import calculate_success_metrics
from visualize import (
    plot_genre_success, 
    plot_budget_vs_revenue, 
    plot_runtime_profit, 
    plot_time_trends
)

# Define output path for summary table
SUMMARY_PATH = os.path.join("results", "genre_summary_table.csv")


def main():
    """
    Orchestrates the entire movie data analysis workflow:
    1. Loads the data.
    2. Cleans and preprocesses the data.
    3. Performs scientific and statistical analysis.
    4. Generates and saves visualizations.
    5. Outputs a summary table.
    """
    # --- 1. Input/Output: Data Loading ---
    df = load_data(CSV_PATH)

    if df is None:
        print("Script terminated due to data loading failure.")
        return

    # --- 2. Data Manipulation & Cleaning ---
    df_cleaned = preprocess_data(df)
    
    print(f"\nSuccessfully cleaned {len(df_cleaned)} valid movie entries.")

    # --- 3. Scientific Computing & Analysis ---
    analysis_results = calculate_success_metrics(df_cleaned)
    
    genre_summary = analysis_results['genre_summary']
    correlation_matrix = analysis_results['correlation_matrix']
    time_series_data = analysis_results['time_series_data']

    # --- 4. Visualization ---
    print("\nGenerating visualizations...")
    plot_genre_success(genre_summary)
    plot_budget_vs_revenue(df_cleaned)
    plot_runtime_profit(df_cleaned)
    plot_time_trends(time_series_data)
    print("All visualizations saved to the 'results' directory.")

    # --- 5. Input/Output: Output Summary Table ---
    os.makedirs("results", exist_ok=True)
    # Save a readable summary of the top 10 most profitable genres
    top_10_profitable_genres = genre_summary.sort_values(by='Median_ROI', ascending=False).head(10)
    top_10_profitable_genres.to_csv(SUMMARY_PATH, index=False, float_format='%.2f')
    
    print(f"\n--- Top 10 Most Profitable Genres (Saved to {SUMMARY_PATH}) ---")
    print(top_10_profitable_genres.to_markdown(index=False))
    
    # Display Correlation Matrix findings
    print("\n--- Key Correlation Findings ---")
    print("Correlation of Vote Count with Revenue:", correlation_matrix.loc['vote_count', 'revenue'])
    print("Correlation of Budget with Revenue:", correlation_matrix.loc['budget', 'revenue'])
    print("Correlation of Vote Average with Popularity:", correlation_matrix.loc['vote_average', 'popularity'])

if __name__ == "__main__":
    main()