# Well Time Series Analysis

This notebook manages the workflow for analyzing and predicting well sensor data using our LSTM-based model.

In [None]:
# Import required libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import json
from datetime import datetime

# Import our custom modules
from src.models.well_ts_model import WellTimeSeriesModel
from src.utils.data_processor import prepare_dataloader, WellTimeSeriesDataset
from src.train import train_model, load_config

# Set plotting style
plt.style.use('seaborn')
%matplotlib inline

## 1. Data Loading and Preprocessing

Load the time series data from well sensors and the configuration for tag aliases.

In [None]:
# Load your time series data
# Modify the path to your data file
data_path = '../data/well_sensor_data.csv'  # Update this path
config_path = '../data/tag_config.json'     # Update this path

# Load data
try:
    data = pd.read_csv(data_path)
    print("Data shape:", data.shape)
    print("\nColumns:", data.columns.tolist())
    print("\nSample data:")
    display(data.head())
except FileNotFoundError:
    print(f"Please place your data file at {data_path}")

# Load configuration
try:
    config = load_config(config_path)
    print("\nLoaded tag configuration:")
    print(json.dumps(config, indent=2))
except FileNotFoundError:
    print(f"Please place your configuration file at {config_path}")

## 2. Data Visualization and Analysis

In [None]:
def plot_time_series(data, columns, figsize=(15, 8)):
    """Plot time series data for specified columns"""
    plt.figure(figsize=figsize)
    for col in columns:
        plt.plot(data[col], label=col)
    plt.legend()
    plt.title('Time Series Data')
    plt.xlabel('Time Steps')
    plt.ylabel('Value')
    plt.show()

# Plot your sensor data
# Modify the columns list based on your actual data columns
sensor_columns = ['pressure', 'temperature', 'vibration', 'depth']  # Update these
try:
    plot_time_series(data, sensor_columns)
except NameError:
    print("Please load the data first")

## 3. Model Training

In [None]:
# Set training parameters
params = {
    'epochs': 100,
    'batch_size': 32,
    'sequence_length': 24,  # 24 time steps for prediction
    'model_save_path': '../models/well_ts_model.pth'
}

# Train the model
try:
    model = train_model(
        train_data=data,
        config_data=config,
        **params
    )
    print("\nModel training complete!")
except NameError:
    print("Please load the data and configuration first")

## 4. Model Evaluation and Prediction

In [None]:
def make_predictions(model, data, sequence_length=24):
    """Generate predictions using the trained model"""
    model.eval()
    dataset = WellTimeSeriesDataset(data, sequence_length=sequence_length)
    loader = prepare_dataloader(data, batch_size=1, sequence_length=sequence_length)
    
    predictions = []
    with torch.no_grad():
        for batch_data, _ in loader:
            output = model(batch_data)
            predictions.append(output.numpy())
    
    return np.vstack(predictions)

# Make predictions
try:
    predictions = make_predictions(model, data)
    print("Generated predictions shape:", predictions.shape)
    
    # Plot actual vs predicted values
    plt.figure(figsize=(15, 8))
    for i, col in enumerate(sensor_columns):
        plt.subplot(len(sensor_columns), 1, i+1)
        plt.plot(data[col].iloc[params['sequence_length']:], label='Actual')
        plt.plot(predictions[:, i], label='Predicted')
        plt.title(f'{col} - Actual vs Predicted')
        plt.legend()
    plt.tight_layout()
    plt.show()
except NameError:
    print("Please train the model first")