In [9]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [3]:
# Input and output directories
input_directory = r'G:\Official_Vanda_Organizing\Spatial_Morphologic_Data\2_Data_Cleaning\4_Pinnacle_Volumes'
output_directory = os.path.join(input_directory, 'Height_Volume_ScatterPlots')
os.makedirs(output_directory, exist_ok=True)

In [5]:
# Loop over each CSV file in the input directory
for file_name in os.listdir(input_directory):
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_directory, file_name)

        # Read the CSV
        df = pd.read_csv(file_path)

        # Convert height from meters to centimeters
        df['Height_cm'] = df['Height_m'] * 100

        # Plot Volume vs. Height
        plt.figure(figsize=(8, 6))
        plt.scatter(df['Volume_cm3'], df['Height_cm'], color='teal', alpha=0.7, edgecolor='black')
        plt.xlabel('Volume (cm³)')
        plt.ylabel('Height (cm)')
        plt.title(f'Height vs Volume: {file_name}', fontsize=14)
        plt.grid(True)

        # Save figure
        output_path = os.path.join(output_directory, f"{os.path.splitext(file_name)[0]}_Height_vs_Volume.png")
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.close()

print("All scatter plots saved successfully to:", output_directory)

All scatter plots saved successfully to: G:\Official_Vanda_Organizing\Spatial_Morphologic_Data\2_Data_Cleaning\4_Pinnacle_Volumes\Height_Volume_ScatterPlots


In [11]:
# Loop over each CSV file in the input directory
for file_name in os.listdir(input_directory):
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_directory, file_name)

        # Read the CSV
        df = pd.read_csv(file_path)

        # Convert height from meters to centimeters
        df['Height_cm'] = df['Height_m'] * 100

        # Filter out rows where height or volume is zero or negative (log undefined)
        df = df[(df['Volume_cm3'] > 0) & (df['Height_cm'] > 0)]

        # Log-transform
        log_volume = np.log10(df['Volume_cm3'])
        log_height = np.log10(df['Height_cm'])

        # Linear regression in log-log space
        slope, intercept, r_value, p_value, std_err = linregress(log_volume, log_height)
        line_x = np.linspace(log_volume.min(), log_volume.max(), 100)
        line_y = slope * line_x + intercept

        # Plot
        plt.figure(figsize=(8, 6))
        plt.scatter(df['Volume_cm3'], df['Height_cm'], color='darkorange', alpha=0.7, edgecolor='black', label='Data')
        plt.plot(10**line_x, 10**line_y, color='blue', linestyle='--', label=f'Fit: height ∝ volume^{slope:.2f}')
        plt.xscale('log')
        plt.yscale('log')
        plt.xlabel('Volume (cm³) [log scale]')
        plt.ylabel('Height (cm) [log scale]')
        plt.title(f'Log-Scaled Height vs Volume: {file_name}', fontsize=14)
        plt.grid(True, which="both", ls="--", linewidth=0.5)
        plt.legend()

        # Save figure
        output_path = os.path.join(output_directory, f"{os.path.splitext(file_name)[0]}_LogScaled.png")
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.close()

print("All log-scaled scatter plots with regression saved to:", output_directory)

All log-scaled scatter plots with regression saved to: G:\Official_Vanda_Organizing\Spatial_Morphologic_Data\2_Data_Cleaning\4_Pinnacle_Volumes\Height_Volume_LogScatterPlots
