In [None]:
import os
import pandas as pd
import visualize
# Import our local modules
# import data_preprocesso
from prep import data_preprocess
from prep.data_preprocess import load_data, preprocess_data, CSV_PATH
import analysis.analysis
import visual.visual

def main():
    # 1. Load Data
    # Ensure 'movies_metadata.csv' is inside a 'data' folder in this directory
    if not os.path.exists(CSV_PATH):
        print(f"CRITICAL ERROR: Could not find file at {CSV_PATH}")
        print("Please create a folder named 'data' and put 'movies_metadata.csv' inside it.")
        return

    df_raw = load_data(CSV_PATH)
    if df_raw is None:
        return

    # 2. Clean Data
    df = preprocess_data(df_raw)
    
    # 3. Analyze Data
    genre_metrics = analysis.analysis.get_genre_metrics(df)
    yearly_trends = analysis.analysis.get_yearly_trends(df)
    correlations = analysis.analysis.get_correlation_matrix(df)
    
    # Output some insights to console
    print("\n--- Top 5 Most Profitable Genres (ROI) ---")
    print(genre_metrics[['primary_genre', 'median_roi', 'count']].head(5))
    
    print("\n--- Correlation Matrix (Budget vs Revenue) ---")
    print(correlations.loc[['budget'], ['revenue']])

    # 4. Visualize Data
    visualize.plot_genre_roi(genre_metrics)
    visualize.plot_budget_vs_revenue(df)
    visualize.plot_yearly_trends(yearly_trends)
    
    print("\nAnalysis complete! Check the 'results' folder for plots.")

if __name__ == "__main__":
    main()

Loading data from: data/movies_metadata.csv...
Data loaded successfully: 45466 rows.
Starting data preprocessing...
Parsing JSON columns...
Preprocessing complete. 5369 entries remain after cleaning.
Calculating genre success metrics...

--- Top 5 Most Profitable Genres (ROI) ---
      primary_genre  median_roi  count
11           Horror    2.027030    325
7            Family    1.829940     55
15  Science Fiction    1.579513    104
2         Animation    1.452970    146
1         Adventure    1.327112    416

--- Correlation Matrix (Budget vs Revenue) ---
         revenue
budget  0.730151


NameError: name 'visualize' is not defined