In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import numpy as np
from google.colab import drive


drive.mount('/content/drive')


def extract_title_from_filename(filename):
    """Extracts the title components from the filename."""
    parts = Path(filename).stem.split('_')  # Remove .csv and split
    if len(parts) == 3:
        distribution, method, parameter = parts
        folder_name = f"{distribution}_{method}"
        return f"{distribution}_{method}_{parameter}", parameter.capitalize(), folder_name
    return filename, "Unknown", "Unknown"

def plot_time_series(phi_filename, theta_filename, save_plots=False, output_dir=None, num_points=None):
    """Create time series plots for phi and theta datasets."""
    # Read data
    phi_df = pd.read_csv(phi_filename)
    theta_df = pd.read_csv(theta_filename)

    # Extract title information
    phi_title, phi_param, folder_name = extract_title_from_filename(phi_filename)
    theta_title, theta_param, _ = extract_title_from_filename(theta_filename)

    # Determine output directory
    if save_plots:
        output_dir = Path(output_dir) / folder_name
        output_dir.mkdir(parents=True, exist_ok=True)

    # Slice data if needed
    if num_points is not None:
        phi_df = phi_df.head(num_points)
        theta_df = theta_df.head(num_points)

    # Set style and default figure size
    sns.set_style("whitegrid")
    plt.rcParams['figure.figsize'] = [10, 6]

    def create_subplots(df, title, param):
        """Create a figure with two subplots: original data and moving average."""
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
        fig.suptitle(title, fontsize=14, fontweight='bold')

        # Plot 1: Original time series
        ax1.plot(df['time'], df['value'], 'b-', linewidth=1)
        ax1.set_title(f'Original {param} Data')
        ax1.set_xlabel('Time')
        ax1.set_ylabel(f'{param} Value')

        # Plot 2: Moving Average
        ax2.plot(df['time'], df['moving_average'], 'r-', linewidth=1)
        ax2.set_title(f'21-Day Moving Average of {param}')
        ax2.set_xlabel('Time')
        ax2.set_ylabel(f'{param} Value')

        # Adjust spacing between subplots
        plt.tight_layout()

        if save_plots:
            plot_path = output_dir / f"{title}.png"
            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
            plt.close()  # Close the figure to free memory
            return plot_path

        plt.show()
        return None

    # Create plots for both phi and theta
    all_plot_paths = {
        'phi': create_subplots(phi_df, phi_title, phi_param),
        'theta': create_subplots(theta_df, theta_title, theta_param)
    }

    return all_plot_paths if save_plots else None

def process_all_distributions(data_dir, output_dir, num_points=None):
    """
    Process all distribution files in the given directory.

    Args:
        data_dir (str): Directory containing the data files
        output_dir (str): Directory to save the plots
        num_points (int, optional): Number of points to plot
    """
    data_dir = Path(data_dir)
    output_dir = Path(output_dir)

    # Define expected distributions and methods
    distributions = ['Gamma', 'InvG', 'LogN', 'Weib']
    methods = ['LeastSquare', 'MLE']

    # Process each distribution and method combination
    for dist in distributions:
        print(f"\nProcessing {dist} distribution...")
        for method in methods:
            # Find matching phi and theta files
            pattern = f"{dist}_{method}_*.csv"
            matching_files = list(data_dir.glob(pattern))

            # Group phi and theta files
            phi_file = next((f for f in matching_files if 'phi' in f.name.lower()), None)
            theta_file = next((f for f in matching_files if 'theta' in f.name.lower()), None)

            if phi_file and theta_file:
                print(f"Processing {method} method...")
                try:
                    plot_time_series(
                        str(phi_file),
                        str(theta_file),
                        save_plots=True,
                        output_dir=output_dir,
                        num_points=num_points
                    )
                    print(f"Successfully created plots for {dist}_{method}")
                except Exception as e:
                    print(f"Error processing {dist}_{method}: {str(e)}")
            else:
                print(f"Warning: Could not find matching phi/theta files for {dist}_{method}")

# Example usage:
if __name__ == "__main__":
    # Process all distributions in the data directory
    process_all_distributions(
        data_dir="/content/drive/MyDrive/mohamed/processed_data",
        output_dir="output_plots",
        num_points=720
    )

Mounted at /content/drive

Processing Gamma distribution...
Processing LeastSquare method...
Successfully created plots for Gamma_LeastSquare
Processing MLE method...
Successfully created plots for Gamma_MLE

Processing InvG distribution...
Processing LeastSquare method...
Successfully created plots for InvG_LeastSquare
Processing MLE method...
Successfully created plots for InvG_MLE

Processing LogN distribution...
Processing LeastSquare method...
Successfully created plots for LogN_LeastSquare
Processing MLE method...
Successfully created plots for LogN_MLE

Processing Weib distribution...
Processing LeastSquare method...
Successfully created plots for Weib_LeastSquare
Processing MLE method...
Successfully created plots for Weib_MLE
