In [2]:
!pwd

/Users/keyur


In [12]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Base directories
base_directory = "/data/home/prabhasreddy/MS_final/output/"
dataset_directory = "/data/home/prabhasreddy/MS_final/code base/Database/"
methods = ["ARIMA", "JumpDiffusion", "MarkovChain", "MonteCarlo"]
datasets = {
    "Dataset-A": {
        "start_date": "2015-01-02",
        "plot_start_date": "2018-01-01",  # Start plotting from 2020
        "end_date": "2020-02-26",
        "pred_end_date": "2021-02-26"
    },
    "Dataset-B": {
        "start_date": "2015-01-02",
        "plot_start_date": "2019-01-01",  # Start plotting from 2018
        "end_date": "2020-08-26",
        "pred_end_date": "2021-02-26"
    }
}
companies = ["AAPL", "TSLA", "GOOG", "MSFT", "AMZN"]

# Colors and line styles for methods and datasets
styles = {
    "Train": {"color": "navy", "linestyle": "-"},  # Navy blue for training data
    "Test": {"color": "deepskyblue", "linestyle": "-"},  # Light blue for testing data
    "ARIMA": {"color": "darkred", "linestyle": "-"},       # Dark red for ARIMA
    "JumpDiffusion": {"color": "forestgreen", "linestyle": "-"},  # Forest green for Jump Diffusion
    "MarkovChain": {"color": "goldenrod", "linestyle": "-"},   # Goldenrod for Markov Chain
    "MonteCarlo": {"color": "indigo", "linestyle": "-"}       # Indigo for Monte Carlo
}


# Function to load training and testing data
def load_train_test_data(company, dataset_params):
    file_path = os.path.join(dataset_directory, f"Pre_Processed_{company}.csv")
    if not os.path.exists(file_path):
        print(f"Data file not found for {company}: {file_path}")
        return None, None

    # Load the dataset
    data = pd.read_csv(file_path, parse_dates=["Date"])
    data.set_index("Date", inplace=True)

    # Split into training and testing data
    train_data = data.loc[dataset_params["plot_start_date"]:dataset_params["end_date"], "Adj. Close"]
    test_data = data.loc[dataset_params["end_date"]:dataset_params["pred_end_date"], "Adj. Close"]

    return train_data, test_data

# Function to plot comparison for each company
def plot_comparison(company, dataset, train_data, test_data, predictions, output_dir):
    plt.figure(figsize=(12, 6))

    # Plot training and testing data with styles
    plt.plot(
        train_data.index, train_data, 
        label="Train Data", 
        color=styles["Train"]["color"], 
        linestyle=styles["Train"]["linestyle"]
    )
    plt.plot(
        test_data.index, test_data, 
        label="Test Data", 
        color=styles["Test"]["color"], 
        linestyle=styles["Test"]["linestyle"]
    )

    # Plot predictions for each method with styles
    for method, pred_data in predictions.items():
        if not pred_data.empty:
            plt.plot(
                pred_data.index, pred_data['Predicted_Adj_Close'], 
                label=method, 
                color=styles[method]["color"], 
                linestyle=styles[method]["linestyle"]
            )

    # Customize and save plot
    plt.title(f"{company} Predictions Comparison ({dataset})", fontsize=16)
    plt.xlabel("Date", fontsize=12)
    plt.ylabel("Adj. Close Price", fontsize=12)
    plt.legend(loc="best", fontsize=10)
    plt.grid(True)

    # Save the plot
    output_file = os.path.join(output_dir, f"{company}_{dataset}_Comparison.png")
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()
    print(f"Graph saved for {company} ({dataset}) at {output_file}")

# Main function to generate comparison graphs
def generate_graphs():
    comparison_output_dir = os.path.join(base_directory, "ComparsionGraphs")
    os.makedirs(comparison_output_dir, exist_ok=True)

    for dataset, params in datasets.items():
        dataset_output_dir = os.path.join(comparison_output_dir, dataset)
        os.makedirs(dataset_output_dir, exist_ok=True)

        for company in companies:
            # Load training and testing data
            train_data, test_data = load_train_test_data(company, params)
            if train_data is None or test_data is None:
                print(f"Skipping {company} in {dataset} due to missing data.")
                continue

            predictions = {}
            for method in methods:
                # Path to predictions for the current dataset, company, and method
                pred_file = os.path.join(
                    base_directory,
                    method,
                    "predictions",
                    dataset,
                    f"{company}_predictions.csv"
                )
                if os.path.exists(pred_file):
                    predictions[method] = pd.read_csv(pred_file, index_col="Date", parse_dates=True)
                else:
                    predictions[method] = pd.DataFrame()  # Empty DataFrame if file is missing

            # Generate the comparison graph if at least one method has predictions
            if any(not pred.empty for pred in predictions.values()):
                plot_comparison(company, dataset, train_data, test_data, predictions, dataset_output_dir)
            else:
                print(f"No predictions available for {company} in {dataset}")

# Generate the comparison graphs
generate_graphs()

Graph saved for AAPL (Dataset-A) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-A/AAPL_Dataset-A_Comparison.png
Graph saved for TSLA (Dataset-A) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-A/TSLA_Dataset-A_Comparison.png
Graph saved for GOOG (Dataset-A) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-A/GOOG_Dataset-A_Comparison.png
Graph saved for MSFT (Dataset-A) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-A/MSFT_Dataset-A_Comparison.png
Graph saved for AMZN (Dataset-A) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-A/AMZN_Dataset-A_Comparison.png
Graph saved for AAPL (Dataset-B) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-B/AAPL_Dataset-B_Comparison.png
Graph saved for TSLA (Dataset-B) at /data/home/prabhasreddy/MS_final/output/ComparsionGraphs/Dataset-B/TSLA_Dataset-B_Comparison.png
Graph saved for GOOG (Dataset-B) at /data/home/prabhasreddy/MS_final/

''Metrics

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Base directory for metrics
base_directory = "/Users/keyur/Downloads/Phase2 modeling and simulation/output/"
methods = ["ARIMA", "JumpDiffusion", "MarkovChain", "MonteCarlo"]
datasets = ["Dataset-A", "Dataset-B"]
metrics_types = ["MSE", "MAE", "RMSE", "MAPE"]

# Function to load metrics for a specific method and dataset
def load_metrics(method, dataset):
    metrics_file = os.path.join(base_directory, method, "metrics", dataset, "metrics.csv")
    if os.path.exists(metrics_file):
        metrics_data = pd.read_csv(metrics_file)
        return metrics_data
    else:
        print(f"Metrics file not found: {metrics_file}")
        return None

# Function to scale metrics (Min-Max Scaling)
def scale_metrics(metrics_data):
    scaled_data = {}
    for metric_idx, metric in enumerate(metrics_types):
        # Collect all metric values across methods
        all_values = [
            metrics_data[method][metric_idx]
            for method in metrics_data if method in metrics_data
        ]
        if all_values:  # If metric values exist
            min_value = min(all_values)
            max_value = max(all_values)
            range_value = max_value - min_value if max_value > min_value else 1

            # Scale each value to [0, 1]
            for method in metrics_data:
                metrics_data[method][metric_idx] = (
                    (metrics_data[method][metric_idx] - min_value) / range_value
                )
        scaled_data[metric] = metrics_data
    return metrics_data

# Function to plot metrics comparison
def plot_metrics_comparison(metrics_data, company, dataset, output_dir):
    plt.figure(figsize=(10, 6))

    # Create a bar chart for each metric
    bar_width = 0.2
    x = range(len(metrics_types))

    for i, method in enumerate(methods):
        if method in metrics_data:
            method_metrics = metrics_data[method]
            plt.bar(
                [pos + i * bar_width for pos in x], 
                method_metrics, 
                width=bar_width, 
                label=method
            )

    # Add labels and legend
    plt.title(f"{company} Metrics Comparison ({dataset})", fontsize=16)
    plt.xlabel("Metrics (Scaled)", fontsize=12)
    plt.ylabel("Scaled Values [0, 1]", fontsize=12)
    plt.xticks([pos + bar_width for pos in x], metrics_types)
    plt.legend(loc="best", fontsize=10)
    plt.grid(True)

    # Add note about scaling
    plt.figtext(0.5, -0.05, "Note: Values scaled using Min-Max normalization.", ha="center", fontsize=10)

    # Save the plot
    output_file = os.path.join(output_dir, f"{company}_{dataset}_MetricsComparison.png")
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()
    print(f"Metrics comparison graph saved for {company} ({dataset}) at {output_file}")

# Main function to generate metrics comparison graphs
def generate_metrics_comparison():
    metrics_output_dir = os.path.join(base_directory, "MetricsComparisonGraphs")
    os.makedirs(metrics_output_dir, exist_ok=True)

    for dataset in datasets:
        dataset_output_dir = os.path.join(metrics_output_dir, dataset)
        os.makedirs(dataset_output_dir, exist_ok=True)

        for company in ["AAPL", "TSLA", "GOOG", "MSFT", "AMZN"]:
            metrics_data = {}
            for method in methods:
                # Load metrics for the current method and dataset
                metrics = load_metrics(method, dataset)
                if metrics is not None:
                    # Extract metrics for the current company
                    company_metrics = metrics[metrics["Stock"] == company]
                    if not company_metrics.empty:
                        metrics_data[method] = [
                            company_metrics[metric].values[0] for metric in metrics_types
                        ]

            # Scale the metrics for better visualization
            metrics_data = scale_metrics(metrics_data)

            # Plot metrics comparison if we have data for at least one method
            if metrics_data:
                plot_metrics_comparison(metrics_data, company, dataset, dataset_output_dir)
            else:
                print(f"No metrics data available for {company} in {dataset}")

# Generate the metrics comparison graphs
generate_metrics_comparison()

Metrics comparison graph saved for AAPL (Dataset-A) at /Users/keyur/Downloads/Phase2 modeling and simulation/output/MetricsComparisonGraphs/Dataset-A/AAPL_Dataset-A_MetricsComparison.png
Metrics comparison graph saved for TSLA (Dataset-A) at /Users/keyur/Downloads/Phase2 modeling and simulation/output/MetricsComparisonGraphs/Dataset-A/TSLA_Dataset-A_MetricsComparison.png
Metrics comparison graph saved for GOOG (Dataset-A) at /Users/keyur/Downloads/Phase2 modeling and simulation/output/MetricsComparisonGraphs/Dataset-A/GOOG_Dataset-A_MetricsComparison.png
Metrics comparison graph saved for MSFT (Dataset-A) at /Users/keyur/Downloads/Phase2 modeling and simulation/output/MetricsComparisonGraphs/Dataset-A/MSFT_Dataset-A_MetricsComparison.png
Metrics comparison graph saved for AMZN (Dataset-A) at /Users/keyur/Downloads/Phase2 modeling and simulation/output/MetricsComparisonGraphs/Dataset-A/AMZN_Dataset-A_MetricsComparison.png
Metrics comparison graph saved for AAPL (Dataset-B) at /Users/key