In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the data
file_path = '/content/ee-chart (2).csv'  # Update this path
data = pd.read_csv(file_path)

# Convert date column to datetime and CH4 concentration to float
data['system:time_start'] = pd.to_datetime(data['system:time_start'])
data['CH4_column_volume_mixing_ratio_dry_air'] = data['CH4_column_volume_mixing_ratio_dry_air'].str.replace(',', '').astype(float)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
data['CH4_scaled'] = scaler.fit_transform(data[['CH4_column_volume_mixing_ratio_dry_air']])

# Function to create sequences for LSTM input
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length-1):
        x = data.iloc[i:(i+seq_length), -1]
        y = data.iloc[i+seq_length, -1]
        xs.append(np.array(x))
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences
seq_length = 10
X, y = create_sequences(data, seq_length)

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

# DataLoader
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Define the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])  # Select the last time-step output
        return out

# Instantiate the model
model = LSTMModel(input_dim=1, hidden_dim=100, num_layers=2, output_dim=1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training the model
def train_model(model, train_loader, criterion, optimizer, num_epochs=100):
    for epoch in range(num_epochs):
        for seq, labels in train_loader:
            optimizer.zero_grad()
            seq = seq.unsqueeze(-1)  # Adding the required extra dimension
            y_pred = model(seq)
            loss = criterion(y_pred.flatten(), labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs} Loss: {loss.item()}')

# Run training
train_model(model, train_loader, criterion, optimizer)

# Evaluation
model.eval()
with torch.no_grad():
    predictions = []
    for inputs, labels in DataLoader(TensorDataset(X_test, y_test), batch_size=64):
        inputs = inputs.unsqueeze(-1)  # Ensure correct input shape
        outputs = model(inputs)
        predictions.extend(outputs.flatten().tolist())

    # Convert predictions to numpy for evaluation
    predictions = np.array(predictions)
    true_values = y_test.numpy()

    mse = mean_squared_error(true_values, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_values, predictions)
    mape = np.mean(np.abs((true_values - predictions) / true_values)) * 100  # Avoid division by zero issues

    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


Epoch 1/100 Loss: 0.035688046365976334
Epoch 2/100 Loss: 0.004759605973958969
Epoch 3/100 Loss: 0.00297067710198462
Epoch 4/100 Loss: 0.0033418412785977125
Epoch 5/100 Loss: 0.003591222921386361
Epoch 6/100 Loss: 0.001556761679239571
Epoch 7/100 Loss: 0.002942205173894763
Epoch 8/100 Loss: 0.0038987204898148775
Epoch 9/100 Loss: 0.002072230912744999
Epoch 10/100 Loss: 0.0028234061319381
Epoch 11/100 Loss: 0.059303879737854004
Epoch 12/100 Loss: 0.002344263019040227
Epoch 13/100 Loss: 0.005316002760082483
Epoch 14/100 Loss: 0.000706276623532176
Epoch 15/100 Loss: 0.0030195508152246475
Epoch 16/100 Loss: 0.0013986527919769287
Epoch 17/100 Loss: 0.00221039867028594
Epoch 18/100 Loss: 0.0031271325424313545
Epoch 19/100 Loss: 0.001509130815975368
Epoch 20/100 Loss: 0.001249003573320806
Epoch 21/100 Loss: 0.0019638296216726303
Epoch 22/100 Loss: 0.0020698413718491793
Epoch 23/100 Loss: 0.0023856668267399073
Epoch 24/100 Loss: 0.001040204893797636
Epoch 25/100 Loss: 0.0613897442817688
Epoch 2

In [2]:
# Function to make a prediction
def make_prediction(model, sequence):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Convert the sequence to a tensor, add necessary dimensions (batch_size and number_of_features)
        sequence_tensor = torch.tensor(sequence).float().unsqueeze(0).unsqueeze(-1)
        # Predict
        prediction = model(sequence_tensor)
        # Flatten to get the prediction value and scale it back using the scaler used during training
        predicted_value = prediction.item()
        predicted_value_scaled_back = scaler.inverse_transform([[predicted_value]])[0][0]
    return predicted_value_scaled_back

# Select a sequence from the test set
test_index = 0  # Change this index to select different parts of the test set
test_sequence = X_test[test_index]

# Make a prediction
predicted_value = make_prediction(model, test_sequence)
print(f"Predicted value for the selected test sequence: {predicted_value:.4f}")


Predicted value for the selected test sequence: 1871.5427


  sequence_tensor = torch.tensor(sequence).float().unsqueeze(0).unsqueeze(-1)


In [3]:
# Assuming `data` is a DataFrame with a column 'CH4_scaled' for scaled methane concentrations

# Function to predict a future point given the last known sequence
def predict_future(model, last_sequence, steps_ahead):
    current_sequence = last_sequence.copy()
    for _ in range(steps_ahead):
        next_value_scaled = make_prediction(model, current_sequence)
        next_value = scaler.inverse_transform([[next_value_scaled]])[0][0]  # Rescale to original range
        current_sequence = np.roll(current_sequence, -1)  # Shift the sequence one step left
        current_sequence[-1] = next_value_scaled  # Insert the predicted value as the last item
    return next_value

# Helper function to make a prediction given a sequence
def make_prediction(model, sequence):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Ensure sequence is a numpy array and reshape appropriately
        sequence_tensor = torch.tensor(sequence).float().unsqueeze(0).unsqueeze(-1)
        prediction = model(sequence_tensor)
        return prediction.item()

# Prepare the last known sequence from the scaled data
if isinstance(data, pd.DataFrame) and 'CH4_column_volume_mixing_ratio_dry_air' in data.columns:
    # Extract the last 10 scaled values as a numpy array
    last_sequence = data['CH4_scaled'].values[-10:]
else:
    raise ValueError("Data is not in the expected format or 'CH4_scaled' column is missing.")

# Example usage
prediction = predict_future(model, last_sequence, 28)  # Predict 28 days ahead

# Print the predicted methane concentration for March 7, 2024
print(f"Predicted methane concentration : {prediction:.4f}")


Predicted methane concentration : 1909.1511


In [4]:
# prompt: plot the time series grpah in plotly

import plotly.graph_objects as go

# Create a figure
fig = go.Figure()

# Add traces for actual and predicted values
fig.add_trace(go.Scatter(x=data['system:time_start'], y=data['CH4_column_volume_mixing_ratio_dry_air'], name='Actual'))
fig.add_trace(go.Scatter(x=pd.date_range(start=data['system:time_start'].iloc[-1], periods=29), y=predictions, name='Predicted'))

# Update layout
fig.update_layout(title='Methane Concentration Time Series', xaxis_title='Date', yaxis_title='Methane Concentration (ppbv)')

# Show the plot
fig.show()
