In [None]:
import pandas as pd

dfSubsidence = pd.read_csv('/content/SubsidenceYearlyRolling.csv')


dfLayers = pd.read_csv('/content/geology_layers_by_station.csv')
dfLayers.head()

dfWaterlevel = pd.read_csv('/content/DrawdownInterplorlated.csv')


In [None]:
import numpy as np
import pandas as pd
from scipy.integrate import odeint

class LayeredSubsidenceModel:
    def __init__(self, layer_data):
        self.layers = layer_data.sort_values('layer_number')
        self.num_layers = len(layer_data)
        self.total_thickness = self.layers['thickness'].sum()

        # set constants
        self.water_unit_weight = 9.81  # kN/m³
        self.cv_multiplier = 1e7 # Increased consolidation coefficient

    def _calculate_cv(self, layer):
        cv = (layer['hydraulic_conductivity'] * self.water_unit_weight *
              (1 + layer['void_ratio']) * self.cv_multiplier)
        return max(cv, 1e-8)  # Ensure minimum value

    def _calculate_consolidation_ratio(self, Tv):
        if isinstance(Tv, (int, float)):
            if Tv < 0.2:
                U = 2 * np.sqrt(Tv / np.pi)
            else:
                U = 1 - (8 / (np.pi**2)) * np.exp(-((np.pi**2) / 4) * Tv)
        else:
            U = np.where(Tv < 0.2,
                        2 * np.sqrt(Tv / np.pi),
                        1 - (8 / (np.pi**2)) * np.exp(-((np.pi**2) / 4) * Tv))
        return np.clip(U, 0, 1)

    def _calculate_layer_settlement(self, layer, delta_p, t):
        # Skip calculation if pressure change is zero
        if delta_p == 0:
            return 0.0

        #Calculate stresses
        initial_stress = layer['initial_effective_stress']
        precons_pressure = initial_stress * layer['OCR']
        final_stress = max(initial_stress + delta_p, 0.1 * initial_stress)  # Prevent negative stress

        # Calculate ultimate settlement
        if final_stress <= precons_pressure:
            #Recompression range
            settlement = (layer['thickness'] / (1 + layer['void_ratio'])) * \
                        layer['recompression_index'] * \
                        np.log10(final_stress / initial_stress)
        else:
            # Both recompression and virgin compression
            settlement = (layer['thickness'] / (1 + layer['void_ratio'])) * \
                        (layer['recompression_index'] * \
                         np.log10(precons_pressure / initial_stress) + \
                         layer['compression_index'] * \
                         np.log10(final_stress / precons_pressure))

        #time factor
        cv = self._calculate_cv(layer)
        Tv = (cv * t) / (layer['thickness']**2)
        U = self._calculate_consolidation_ratio(Tv)

        # Scale settlement based on percent coarse
        fine_fraction = (100 - layer['percent_coarse']) / 100
        settlement *= fine_fraction  # More settlement for fine-grained materials

        return settlement * U

    def predict_annual_subsidence(self, annual_head_change):
        """Predict subsidence for one year with improved time steps"""
        if annual_head_change == 0:
            return 0.0

        # Use more time steps for better accuracy
        times = np.linspace(0, 365, 365)  # Daily intervals

        # Create head change array (exponential approach to final value)
        tau = 30  # Time constant (days)
        head_changes = annual_head_change * (1 - np.exp(-times/tau))

        total_subsidence = np.zeros_like(times)

        # Calculate settlement for each layer
        for _, layer in self.layers.iterrows():
            for t_idx, t in enumerate(times):
                delta_p = head_changes[t_idx] * self.water_unit_weight
                settlement = self._calculate_layer_settlement(layer, delta_p, t)
                total_subsidence[t_idx] += settlement

        # Return final settlement in millimeters
        return total_subsidence[-1] * 1000


def load_layer_data(data_string):
    """Convert comma-separated string to DataFrame"""
    # Split the string into lines and parse
    lines = data_string.strip().split('\n')
    data = []
    for line in lines:
        values = line.split(',')
        data.append({
            'station_id': values[0],
            'layer_number': int(values[9]),
            'thickness': float(values[11]),
            'percent_coarse': float(values[12]),
            'void_ratio': float(values[13]),
            'compression_index': float(values[14]),
            'recompression_index': float(values[15]),
            'OCR': float(values[16]),
            'hydraulic_conductivity': float(values[17]),
            'initial_effective_stress': float(values[18])
        })
    return pd.DataFrame(data)

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

class GeologicalSubsidenceModel:
    def __init__(self, layer_data):
        self.layers = layer_data.sort_values('layer_number').reset_index(drop=True)
        self.num_layers = len(layer_data)

        # set constants
        self.water_unit_weight = 9.81  # kN/m³
        self.min_stress = 10.0  # kPa

        self._initialize_layer_weights()

    def _initialize_layer_weights(self):
        weights = []
        for _, layer in self.layers.iterrows():
            #Calculate weight factors
            compressibility = layer['compression_index'] * (1 + layer['void_ratio'])
            fine_fraction = (100 - layer['percent_coarse']) / 100
            depth_factor = np.exp(-layer['layer_number'] / self.num_layers)

            # Combined weight
            weight = compressibility * fine_fraction * depth_factor
            weights.append(weight)

        #Normalize weights
        weights = np.array(weights)
        self.layer_weights = weights / np.sum(weights)

    def predict_subsidence(self, current_change, prev_changes=None):
        """Predict annual subsidence"""
        total_subsidence = 0

        if prev_changes is not None:
            historical_effect = 0
            for i, change in enumerate(prev_changes):
                if pd.notna(change):
                    historical_effect += change * np.exp(-(len(prev_changes) - i))
            current_change += 0.3 * historical_effect

        for i, layer in self.layers.iterrows():
            # Calculate effective stress change
            depth_factor = np.exp(-layer['layer_number'] / self.num_layers)
            stress_change = current_change * self.water_unit_weight * depth_factor

            #Apply layer properties
            compression = self._calculate_compression(layer, stress_change)

            #Add to total with layer weight
            total_subsidence += compression * self.layer_weights[i]

        return total_subsidence * 1000  # mm

    def _calculate_compression(self, layer, stress_change):
        """Calculate compression for a single layer"""
        if abs(stress_change) < 1e-6:
            return 0.0

        initial_stress = max(layer['initial_effective_stress'], self.min_stress)
        final_stress = max(initial_stress + stress_change, self.min_stress)

        cc = layer['compression_index']
        cr = layer['recompression_index']
        e0 = layer['void_ratio']
        ocr = layer['OCR']
        precons_pressure = initial_stress * ocr

        # dual compression calculations
        if final_stress <= precons_pressure:
            compression = (layer['thickness'] * cr / (1 + e0)) * \
                         np.log(final_stress / initial_stress)
        else:
            compression = (layer['thickness'] / (1 + e0)) * \
                         (cr * np.log(precons_pressure / initial_stress) +
                          cc * np.log(final_stress / precons_pressure))

        #apply fine content scaling
        fine_fraction = (100 - layer['percent_coarse']) / 100
        compression *= (0.5 + 0.5 * fine_fraction)

        return compression

In [None]:
def analyze_station(station_id, dfLayers, dfWaterlevel, dfSubsidence, lookback=2):
    """Analyze a single station"""
    # Get station data
    station_layers = dfLayers[dfLayers['station_id'] == station_id].copy()
    station_subsidence = dfSubsidence[dfSubsidence['station_id'] == station_id].sort_values('Year')
    station_water = dfWaterlevel[dfWaterlevel['station_id'] == station_id].sort_values('year')

    # Initialize model
    model = GeologicalSubsidenceModel(station_layers)

    # Prepare water level changes dictionary
    water_changes = station_water.set_index('year')['level_change_mean'].to_dict()

    results = []
    for _, row in station_subsidence.iterrows():
        year = row['Year']
        actual_change = row['yearly_change']

        # Get current year water change
        current_water_change = water_changes.get(year)

        if pd.notna(current_water_change) and pd.notna(actual_change):
            # Get previous years' changes
            prev_changes = [water_changes.get(year - i) for i in range(1, lookback + 1)]

            # Make prediction
            predicted = model.predict_subsidence(
                current_water_change,
                prev_changes
            )

            results.append({
                'year': year,
                'predicted_subsidence': predicted,
                'actual_subsidence': actual_change,
                'water_change': current_water_change
            })

    return pd.DataFrame(results) if results else None

def analyze_all_stations(dfLayers, dfWaterlevel, dfSubsidence, lookback=2):
    """Analyze all stations and compile results"""
    all_results = []
    station_metrics = {}

    stations = dfLayers['station_id'].unique()

    for station in stations:
        print(f"Processing station: {station}")

        # Analyze station
        station_results = analyze_station(station, dfLayers, dfWaterlevel, dfSubsidence, lookback)

        if station_results is not None:
            # Add station ID
            station_results['station_id'] = station
            all_results.append(station_results)

            # Calculate metrics
            predictions = station_results['predicted_subsidence']
            actuals = station_results['actual_subsidence']

            rmse = np.sqrt(mean_squared_error(actuals, predictions))
            r2 = r2_score(actuals, predictions)

            station_metrics[station] = {
                'RMSE': rmse,
                'R2': r2,
                'n_predictions': len(station_results)
            }

    results_df = pd.concat(all_results, ignore_index=True)
    metrics_df = pd.DataFrame(station_metrics).T

    return results_df, metrics_df

def plot_results(results_df, metrics_df):
    """Plot analysis results"""
    plt.figure(figsize=(15, 10))

    # Scatter plot
    plt.subplot(2, 2, 1)
    plt.scatter(results_df['actual_subsidence'],
               results_df['predicted_subsidence'],
               alpha=0.5)
    plt.plot([-100, 100], [-100, 100], 'r--')  # Perfect prediction line
    plt.xlabel('Actual Subsidence (mm)')
    plt.ylabel('Predicted Subsidence (mm)')
    plt.title('Predicted vs Actual Subsidence')

    # Error histogram
    plt.subplot(2, 2, 2)
    errors = results_df['predicted_subsidence'] - results_df['actual_subsidence']
    plt.hist(errors, bins=30)
    plt.xlabel('Prediction Error (mm)')
    plt.ylabel('Count')
    plt.title('Error Distribution')

    # Station performance
    plt.subplot(2, 2, 3)
    metrics_df['RMSE'].sort_values().plot(kind='bar')
    plt.xlabel('Station')
    plt.ylabel('RMSE (mm)')
    plt.title('RMSE by Station')
    plt.xticks(rotation=45)

    # R² values
    plt.subplot(2, 2, 4)
    metrics_df['R2'].sort_values().plot(kind='bar')
    plt.xlabel('Station')
    plt.ylabel('R²')
    plt.title('R² by Station')
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.show()


In [None]:

# Run the analysis
results_df, metrics_df = analyze_all_stations(dfLayerajusted, dfWaterlevel, dfSubsidence, lookback=2)

# Print summary
print("\nModel Performance Summary")
print("========================")
print(f"\nNumber of stations analyzed: {len(metrics_df)}")
print(f"Total number of predictions: {len(results_df)}")

overall_rmse = np.sqrt(mean_squared_error(
    results_df['actual_subsidence'],
    results_df['predicted_subsidence']
))
overall_r2 = r2_score(
    results_df['actual_subsidence'],
    results_df['predicted_subsidence']
)

print(f"\nOverall RMSE: {overall_rmse:.2f} mm")
print(f"Overall R²: {overall_r2:.3f}")

print("\nStation Performance:")
print(metrics_df.sort_values('RMSE'))

# Plot results
plot_results(results_df, metrics_df)

# Save results
results_df.to_csv('geological_model_predictions.csv', index=False)
metrics_df.to_csv('geological_model_metrics.csv')

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.metrics import r2_score, mean_squared_error

class ModelOptimizer:
    def __init__(self, dfLayers, dfWaterlevel, dfSubsidence):
        self.dfLayers = dfLayers
        self.dfWaterlevel = dfWaterlevel
        self.dfSubsidence = dfSubsidence
        self.thickness_threshold = 2.0  # Minimum layer thickness

    def _evaluate_model(self, params, station_layers, station_id):
        """Evaluate model with given parameters"""
        thicknesses = params

        # Update layer thicknesses
        station_layers = station_layers.copy()
        active_layers = thicknesses >= self.thickness_threshold

        if not any(active_layers):
            return float('inf')

        # Update active layers
        station_layers = station_layers[active_layers].copy()
        station_layers['thickness'] = thicknesses[active_layers]

        try:
            # Initialize model
            model = GeologicalSubsidenceModel(station_layers)

            # Get station data
            station_subsidence = self.dfSubsidence[self.dfSubsidence['station_id'] == station_id]
            station_water = self.dfWaterlevel[self.dfWaterlevel['station_id'] == station_id]
            water_changes = station_water.set_index('year')['level_change_mean'].to_dict()

            predictions = []
            actuals = []

            # Make predictions
            for _, row in station_subsidence.iterrows():
                year = row['Year']
                actual_change = row['yearly_change']

                current_water_change = water_changes.get(year)
                prev_changes = [water_changes.get(year - i) for i in range(1, 3)]

                if pd.notna(current_water_change) and pd.notna(actual_change):
                    predicted = model.predict_subsidence(current_water_change, prev_changes)
                    predictions.append(predicted)
                    actuals.append(actual_change)

            if len(predictions) < 3:
                return float('inf')

            # Calculate RMSE (to minimize)
            return np.sqrt(mean_squared_error(actuals, predictions))

        except Exception as e:
            print(f"Error in model evaluation: {str(e)}")
            return float('inf')

    def optimize_station(self, station_id):
        """Optimize layer thicknesses for a station"""
        print(f"\nOptimizing station: {station_id}")

        # Get station layers
        station_layers = self.dfLayers[self.dfLayers['station_id'] == station_id].copy()
        if len(station_layers) == 0:
            print(f"No layer data for station {station_id}")
            return None

        initial_thicknesses = station_layers['thickness'].values

        def objective(x):
            return self._evaluate_model(x, station_layers, station_id)

        # Set bounds for thicknesses (0 to 1000m)
        bounds = [(0, 1000) for _ in range(len(initial_thicknesses))]

        # Try multiple starting points
        best_result = None
        best_score = float('inf')

        # Starting points: original and slightly modified thicknesses
        starting_points = [
            initial_thicknesses,
            initial_thicknesses * 0.8,
            initial_thicknesses * 1.2
        ]

        for start_point in starting_points:
            try:
                result = minimize(
                    objective,
                    start_point,
                    method='SLSQP',
                    bounds=bounds,
                    options={'ftol': 1e-6, 'maxiter': 1000}
                )

                if result.success and result.fun < best_score:
                    best_result = result
                    best_score = result.fun

            except Exception as e:
                print(f"Optimization error with starting point: {str(e)}")
                continue

        if best_result is None:
            return {
                'station_id': station_id,
                'success': False,
                'original_thicknesses': initial_thicknesses,
                'optimized_thicknesses': initial_thicknesses,
                'active_layers': np.ones(len(initial_thicknesses), dtype=bool),
                'original_rmse': objective(initial_thicknesses),
                'optimized_rmse': objective(initial_thicknesses),
                'n_layers_removed': 0
            }

        optimized_thicknesses = best_result.x
        active_layers = optimized_thicknesses >= self.thickness_threshold

        result = {
            'station_id': station_id,
            'success': True,
            'original_thicknesses': initial_thicknesses,
            'optimized_thicknesses': optimized_thicknesses,
            'active_layers': active_layers,
            'original_rmse': objective(initial_thicknesses),
            'optimized_rmse': best_result.fun,
            'n_layers_removed': np.sum(~active_layers)
        }

        self._print_optimization_results(result)
        return result

    def _print_optimization_results(self, result):
        """Print optimization results for a station"""
        print(f"\nResults for station {result['station_id']}:")
        print(f"Original RMSE: {result['original_rmse']:.6f}")
        print(f"Optimized RMSE: {result['optimized_rmse']:.6f}")
        print(f"Improvement: {result['original_rmse'] - result['optimized_rmse']:.6f}")
        print(f"Layers removed: {result['n_layers_removed']}")

        print("\nLayer Status:")
        print("Layer | Original (m) | Optimized (m) | Status")
        print("-" * 45)
        for i, (orig, opt, active) in enumerate(zip(
            result['original_thicknesses'],
            result['optimized_thicknesses'],
            result['active_layers']
        )):
            status = "Active" if active else "Removed"
            print(f"{i+1:5d} | {orig:11.1f} | {opt:11.1f} | {status}")

def optimize_all_stations(dfLayers, dfWaterlevel, dfSubsidence):
    """Optimize all stations and return results"""
    optimizer = ModelOptimizer(dfLayers, dfWaterlevel, dfSubsidence)
    results = []

    for station in dfLayers['station_id'].unique():
        result = optimizer.optimize_station(station)
        if result is not None:
            results.append(result)

    results_df = pd.DataFrame(results)

    # Print summary statistics
    print("\nOptimization Summary")
    print("===================")
    print(f"Total stations processed: {len(results_df)}")

    successful = results_df[results_df['success']]
    if len(successful) > 0:
        print(f"Successful optimizations: {len(successful)}/{len(results_df)}")
        print(f"Average RMSE improvement: {(successful['original_rmse'] - successful['optimized_rmse']).mean():.6f}")
        print(f"Average layers removed: {successful['n_layers_removed'].mean():.1f}")

        # Distribution of removed layers
        n_removed = successful['n_layers_removed'].value_counts().sort_index()
        print("\nDistribution of removed layers:")
        for n, count in n_removed.items():
            print(f"{n} layers removed: {count} stations")

    return results_df

In [None]:
# Run the optimization
optimization_results = optimize_all_stations(dfLayers, dfWaterlevel, dfSubsidence)

# Save results
optimization_results.to_csv('optimization_results.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
import ast

def parse_array_string(array_string):
    """Parse string representation of array into numpy array"""
    try:
        # Remove any whitespace and convert to list
        clean_string = array_string.strip('[]').replace('\n', '')
        return np.array([float(x) for x in clean_string.split()])
    except:
        return np.array([])

def parse_boolean_array(array_string):
    """Parse string representation of boolean array"""
    try:
        # Convert string "True" and "False" to boolean values
        clean_string = array_string.strip('[]').replace('\n', '')
        return np.array([x.strip().lower() == 'true' for x in clean_string.split()])
    except:
        return np.array([])

def process_optimized_layers(input_df, optimization_df):
    """
    Process layer data based on optimization results

    Parameters:
    input_df: DataFrame with original layer data
    optimization_df: DataFrame with optimization results
    """
    result_rows = []

    # Process each station
    for station_id in input_df['station_id'].unique():
        # Get optimization data for this station
        opt_row = optimization_df[optimization_df['station_id'] == station_id].iloc[0]

        # Get station data
        station_data = input_df[input_df['station_id'] == station_id]

        # Parse the string representations into arrays
        active_layers = parse_boolean_array(opt_row['active_layers'])
        new_thicknesses = parse_array_string(opt_row['optimized_thicknesses'])

        # Filter and update thicknesses
        for idx, (is_active, new_thickness) in enumerate(zip(active_layers, new_thicknesses)):
            if is_active and new_thickness > 1e-10:  # Filter out very small thicknesses
                if idx < len(station_data):  # Check if index exists in station_data
                    row = station_data.iloc[idx].copy()
                    row['thickness'] = new_thickness
                    result_rows.append(row)

    # Create final dataframe
    result_df = pd.DataFrame(result_rows)
    return result_df

# Example usage:
# Read the input data (assuming this is your layer data)
# dfLayers = pd.read_csv('original_layers.csv')

# Read optimization results
optimization_df = pd.read_csv('optimization_results.csv')

# Process the data
result_df = process_optimized_layers(dfLayers, optimization_df)

# Save to CSV
result_df.to_csv('optimized_layers.csv', index=False)