In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

def visualize_data(file_paths, output_dir):
    os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

    for file_path in file_paths:
        # Load the CSV file
        data = pd.read_csv(file_path)

        # Convert the Date column to datetime format
        data['Date'] = pd.to_datetime(data['Date'])

        # Set the Date column as the index for easier time-series visualization
        data.set_index('Date', inplace=True)

        # File-specific identifier for titles
        file_name = os.path.basename(file_path).split('.')[0]

        # Line plot for trends of economic indicators over time
        plt.figure(figsize=(12, 6))
        data[['GDP Growth', 'CPI', 'Interest Rate', 'M2 Money Supply', 'PPI', 'Unemployment Rate']].plot(figsize=(12, 6))
        plt.title(f'Trends of Economic Indicators Over Time ({file_name})')
        plt.xlabel('Date')
        plt.ylabel('Scaled Values')
        plt.legend(title='Indicators')
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, f'{file_name}_trends.png'))
        plt.close()

        # Histogram for distributions of each economic indicator
        data[['GDP Growth', 'CPI', 'Interest Rate', 'M2 Money Supply', 'PPI', 'Unemployment Rate', 'VIX_Close']].hist(
            figsize=(14, 10), bins=20, edgecolor='black')
        plt.suptitle(f'Distributions of Economic Indicators ({file_name})')
        plt.savefig(os.path.join(output_dir, f'{file_name}_distributions.png'))
        plt.close()

        # Box plots to visualize the relationship between Market_Label and economic indicators
        indicators = ['GDP Growth', 'CPI', 'Interest Rate', 'M2 Money Supply', 'PPI', 'Unemployment Rate', 'VIX_Close']
        plt.figure(figsize=(16, 12))
        for i, indicator in enumerate(indicators, start=1):
            plt.subplot(3, 3, i)
            sns.boxplot(x=data['Market_Label'], y=data[indicator], palette='coolwarm')
            plt.title(f'{indicator} by Market_Label ({file_name})')
            plt.xlabel('Market_Label (0 = Bear, 1 = Bull)')
            plt.ylabel(indicator)
            plt.grid(True)

        plt.tight_layout()
        plt.suptitle(f'Box Plots: Economic Indicators vs Market Label ({file_name})', y=1.02)
        plt.savefig(os.path.join(output_dir, f'{file_name}_boxplots.png'))
        plt.close()

# Visualize the min-max-scaled nasdaq data
file_paths = [
    '../data/min_max_scaling/cleaned_minmax_scaled_combined_data_nasdaq.csv',
    # Add other file paths here
]
output_dir = '../data/EDA'  # Specify the directory to save visualizations

visualize_data(file_paths, output_dir)

# Visualize the min-max-scaled sp500 data
file_paths = [
    '../data/min_max_scaling/cleaned_minmax_scaled_combined_data_sp500.csv',
    # Add other file paths here
]
output_dir = '../data/EDA'  # Specify the directory to save visualizations

visualize_data(file_paths, output_dir)

# Visualize the standardscaled nasdaq data
file_paths = [
    '../data/standardscaler/cleaned_normalized_combined_data_nasdaq.csv',
    # Add other file paths here
]
output_dir = '../data/EDA'  # Specify the directory to save visualizations

visualize_data(file_paths, output_dir)

# Visualize the standardscaled sp500 data
file_paths = [
    '../data/standardscaler/cleaned_normalized_combined_data_sp500.csv',
    # Add other file paths here
]
output_dir = '../data/EDA'  # Specify the directory to save visualizations

visualize_data(file_paths, output_dir)


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x=data['Market_Label'], y=data[indicator], palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x=data['Market_Label'], y=data[indicator], palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x=data['Market_Label'], y=data[indicator], palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x=data['Market_Label'], y=data[indicator], palette='coolwarm')

Passing `palette` witho

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>

<Figure size 1200x600 with 0 Axes>