In [None]:
# Importing necessary libraries
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

# Load the MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the data
x_train, x_test = x_train / 255.0, x_test / 255.0

# Building a simple neural network model
model = models.Sequential(
    [
        layers.Flatten(input_shape=(28, 28)),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(10, activation="softmax"),
    ]
)

# Compile the model with loss function and optimizer
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

# Train the model with training and validation data
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

# Plotting Training and Validation Loss
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

# Define dataset path dynamically
root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
dataset_path: str = os.path.join(root_dir, "data", "smart_compost_dataset101.csv")


class CompostDataset(Dataset):
    def __init__(self):
        # Load data using pandas
        df = pd.read_csv(dataset_path, delimiter=",")

        # Select numerical columns for features (X) and target (y)
        # Assuming the target variable is the first column (e.g., "Temperature")
        self.y = torch.tensor(df.iloc[:, 0].values, dtype=torch.float32)  # Target
        self.x = torch.tensor(df.iloc[:, 1:].values, dtype=torch.float32)  # Features
        self.n_samples = df.shape[0]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        # Return a single sample
        return self.x[index], self.y[index]


# Instantiate dataset
dataset = CompostDataset()
first_data = dataset[0]
features, labels = first_data
print("Features:", features)
print("Labels:", labels)

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader


class CompostDataset(Dataset):
    def __init__(self, csv_path):
        # Load data
        df = pd.read_csv(csv_path, delimiter=",", skip_blank_lines=True)

        # Clean column names
        df.columns = df.columns.str.strip()

        # Remove any metadata or header rows (keeping only the actual data)
        df = df[df["Variable"].notna()]

        # Create a mapping of variable names to numeric indices
        self.variable_mapping = {
            var: idx for idx, var in enumerate(df["Variable"].unique())
        }

        # Convert optimal ranges to numeric values (taking the midpoint)
        def extract_range_midpoint(range_str):
            try:
                # Handle special cases like '>7'
                if ">" in range_str:
                    return float(range_str.replace(">", ""))

                # Handle normal ranges like '45-65'
                low, high = map(float, range_str.split("-"))
                return (low + high) / 2
            except:
                return None

        df["OptimalRange"] = (
            df["OptimalRange"].str.strip().apply(extract_range_midpoint)
        )

        # Select numeric columns for the feature set
        numeric_columns = ["OptimalRange"]  # Add other numeric columns as needed

        # Convert to numeric, handling errors
        for col in numeric_columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

        # Drop rows with NaN values
        df = df.dropna(subset=numeric_columns)

        if df.empty:
            raise ValueError(
                "Dataset is empty after preprocessing. Please check your data format."
            )

        # Store processed data
        self.variable_names = df["Variable"].values
        self.features = torch.tensor(df[numeric_columns].values, dtype=torch.float32)
        self.n_samples = len(df)

        # Store additional metadata that might be useful
        self.metadata = df[
            ["Type", "Unit", "Dependencies", "IntroductionStage", "Frequency", "Notes"]
        ]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        variable_name = self.variable_names[index]
        variable_idx = torch.tensor(
            self.variable_mapping[variable_name], dtype=torch.long
        )
        return self.features[index], variable_idx

    def get_metadata(self, index):
        """Return metadata for a given index"""
        return self.metadata.iloc[index]



root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
dataset_path = os.path.join(root_dir, "data", "smart_compost_dataset101.csv")

try:
    dataset = CompostDataset(dataset_path)
    # print(f"Dataset size: {len(dataset)}")

    # Get first item
    # features, variable_idx = dataset[0]
    # print(f"Features for first item: {features}")
    # print(f"Variable index for first item: {variable_idx}")

    # Get metadata for first item
    metadata = dataset.get_metadata(0)
    # print(f"Metadata for first item: {metadata}")


    # dataloader
    dataloader =  DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)


    datatiter  = iter(dataloader)
    data  =  datatiter.__next__()
    features, labels = data

    print(f'features: {features}')
    print(f'Labels : {labels}')


except Exception as e:
    print(f"Error: {e}")

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader


class CompostDataset(Dataset):
    def __init__(self, csv_path):
        # Load data
        df = pd.read_csv(csv_path, delimiter=",", skip_blank_lines=True)

        # Clean column names
        df.columns = df.columns.str.strip()

        # Remove any metadata or header rows (keeping only the actual data)
        df = df[df["Variable"].notna()]

        # Create a mapping of variable names to numeric indices
        self.variable_mapping = {
            var: idx for idx, var in enumerate(df["Variable"].unique())
        }

        # Convert optimal ranges to numeric values (taking the midpoint)
        def extract_range_midpoint(range_str):
            try:
                if ">" in range_str:
                    return float(range_str.replace(">", ""))
                low, high = map(float, range_str.split("-"))
                return (low + high) / 2
            except:
                return None

        df["OptimalRange"] = (
            df["OptimalRange"].str.strip().apply(extract_range_midpoint)
        )

        # Encode categorical columns as numeric
        df["Frequency"] = df["Frequency"].astype("category").cat.codes
        df["IntroductionStage"] = df["IntroductionStage"].astype("category").cat.codes

        # Add these columns to the feature set
        numeric_columns = ["OptimalRange", "Frequency", "IntroductionStage"]

        # Normalize features
        df[numeric_columns] = df[numeric_columns].apply(
            lambda x: (x - x.mean()) / x.std()
        )

        # Convert to numeric and handle errors
        for col in numeric_columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

        # Drop rows with NaN values
        df = df.dropna(subset=numeric_columns)

        if df.empty:
            raise ValueError(
                "Dataset is empty after preprocessing. Please check your data format."
            )

        # Store processed data
        self.variable_names = df["Variable"].values
        self.features = torch.tensor(df[numeric_columns].values, dtype=torch.float32)
        self.labels = torch.tensor(
            [self.variable_mapping[var] for var in df["Variable"].values],
            dtype=torch.long,
        )
        self.n_samples = len(df)

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        return self.features[index], self.labels[index]


root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
dataset_path = os.path.join(root_dir, "data", "smart_compost_dataset101.csv")

try:
    dataset = CompostDataset(dataset_path)

    # DataLoader
    dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

    data_iter = iter(dataloader)
    features, labels = next(data_iter)

    print(f"Features: {features}")
    print(f"Labels: {labels}")

except Exception as e:
    print(f"Error: {e}")

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np


class CompostDataset(Dataset):
    def __init__(self, csv_path):
        # Load data
        df = pd.read_csv(csv_path, delimiter=",", skip_blank_lines=True)

        # Clean column names
        df.columns = df.columns.str.strip()

        # Remove any metadata or header rows
        df = df[df["Variable"].notna()]

        # Create feature mappings
        self.variable_mapping = {
            var: idx for idx, var in enumerate(df["Variable"].unique())
        }

        # Convert all ranges to numeric values
        def extract_range_values(range_str):
            try:
                if isinstance(range_str, str):
                    range_str = range_str.strip()
                    if ">" in range_str:
                        return (
                            float(range_str.replace(">", "")),
                            float(range_str.replace(">", "")) * 1.2,
                        )
                    elif ":" in range_str:  # Handle ratios like "25-30:1"
                        range_part = range_str.split(":")[0]
                        low, high = map(float, range_part.split("-"))
                        return low, high
                    else:
                        low, high = map(float, range_str.split("-"))
                        return low, high
                return None, None
            except:
                return None, None

        # Extract ranges and create feature bounds
        ranges = df["OptimalRange"].apply(extract_range_values)
        df["lower_bound"] = ranges.apply(lambda x: x[0])
        df["upper_bound"] = ranges.apply(lambda x: x[1])

        # Create feature matrix
        feature_cols = ["lower_bound", "upper_bound"]
        self.features = torch.tensor(df[feature_cols].values, dtype=torch.float32)

        # Create normalized features
        self.features = self.normalize_features(self.features)

        # Store variable information
        self.variables = df["Variable"].values
        self.units = df["Unit"].values
        self.types = df["Type"].values
        self.dependencies = df["Dependencies"].str.split("|").values
        self.n_samples = len(df)

        # Store metadata
        self.metadata = {
            "variable_names": self.variables,
            "units": self.units,
            "types": self.types,
            "dependencies": self.dependencies,
        }

    def normalize_features(self, features):
        """Normalize features to [0,1] range"""
        min_vals = features.min(dim=0)[0]
        max_vals = features.max(dim=0)[0]
        return (features - min_vals) / (max_vals - min_vals)

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        """Return features and variable index for the given index"""
        features = self.features[index]
        variable_idx = torch.tensor(
            self.variable_mapping[self.variables[index]], dtype=torch.long
        )

        # Create dependency mask
        deps = self.dependencies[index]
        if isinstance(deps, (list, np.ndarray)):
            dep_mask = torch.zeros(len(self.variable_mapping))
            for dep in deps:
                if dep in self.variable_mapping and dep != "All_Above_Variables":
                    dep_mask[self.variable_mapping[dep]] = 1
            return features, variable_idx, dep_mask
        return features, variable_idx, torch.zeros(len(self.variable_mapping))

    def get_variable_name(self, index):
        """Get variable name for given index"""
        return self.variables[index]

    def get_unit(self, index):
        """Get unit for given index"""
        return self.units[index]

    def get_dependencies(self, index):
        """Get dependencies for given index"""
        return self.dependencies[index]

    def get_optimal_range(self, index):
        """Get optimal range for given index"""
        return self.features[index].numpy()


# Example usage
def create_dataloader(dataset_path, batch_size=4, shuffle=True):
    try:
        dataset = CompostDataset(dataset_path)
        dataloader = DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=2,
            collate_fn=lambda x: tuple(zip(*x)),  # Handle variable length dependencies
        )
        return dataset, dataloader
    except Exception as e:
        print(f"Error creating dataloader: {e}")
        return None, None


if __name__ == "__main__":
    root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
    dataset_path = os.path.join(root_dir, "data", "smart_compost_dataset101.csv")

    dataset, dataloader = create_dataloader(dataset_path)
    if dataloader:
        # Get a batch of data
        data_iter = iter(dataloader)
        features, variable_indices, dependency_masks = next(data_iter)

        print("Batch features shape:", torch.stack(features).shape)
        print("Batch variable indices:", variable_indices)
        print("First item dependencies:", dataset.get_dependencies(0))

In [None]:
# Create dataset and dataloader
dataset, dataloader = create_compost_dataloader(
    dataset_path, batch_size=32, sequence_length=30
)

# Training loop example
for batch_features, batch_targets in dataloader:
    # batch_features: shape [batch_size, sequence_length, n_features]
    # batch_targets: shape [batch_size, n_targets]

    # Features include:
    # - Measured values for all variables
    # - Waste type encoding
    # - Weather data

    # Targets are next-day predictions for:
    # - Temperature
    # - Moisture content

    # Your model training code here
    pass

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Tuple


class WasteType:
    """Classification of different waste types and their properties"""

    TYPES = {
        "GREEN": {
            "examples": ["grass", "vegetable_scraps", "coffee_grounds", "fruit_waste"],
            "nitrogen_content": "high",
            "moisture_content": "high",
            "decomposition_rate": "fast",
        },
        "BROWN": {
            "examples": ["leaves", "straw", "paper", "cardboard", "wood_chips"],
            "nitrogen_content": "low",
            "moisture_content": "low",
            "decomposition_rate": "slow",
        },
        "MIXED": {
            "examples": ["food_scraps", "garden_waste"],
            "nitrogen_content": "medium",
            "moisture_content": "medium",
            "decomposition_rate": "medium",
        },
    }

    @staticmethod
    def get_type_encoding(waste_type: str) -> torch.Tensor:
        """Convert waste type to one-hot encoding"""
        encoding = torch.zeros(len(WasteType.TYPES))
        if waste_type in WasteType.TYPES:
            encoding[list(WasteType.TYPES.keys()).index(waste_type)] = 1
        return encoding


class SeasonalEffects:
    """Handle seasonal and weather effects on composting"""

    SEASONS = {
        "SPRING": {"temp_modifier": 0.2, "moisture_modifier": 0.3},
        "SUMMER": {"temp_modifier": 0.4, "moisture_modifier": -0.2},
        "FALL": {"temp_modifier": 0.0, "moisture_modifier": 0.1},
        "WINTER": {"temp_modifier": -0.3, "moisture_modifier": 0.2},
    }

    @staticmethod
    def get_season(date: datetime) -> str:
        """Determine season from date"""
        month = date.month
        if month in [3, 4, 5]:
            return "SPRING"
        elif month in [6, 7, 8]:
            return "SUMMER"
        elif month in [9, 10, 11]:
            return "FALL"
        else:
            return "WINTER"


class CompostTimeSeriesDataset(Dataset):
    def __init__(self, csv_path: str, sequence_length: int = 30):
        super().__init__()
        self.sequence_length = sequence_length
        self.base_data = pd.read_csv(csv_path, delimiter=",", skip_blank_lines=True)
        self.base_data.columns = self.base_data.columns.str.strip()

        # Initialize time series data structures
        self.time_series_data = self._generate_time_series_data()
        self.waste_types = self._initialize_waste_types()
        self.weather_data = self._initialize_weather_data()

        # Process optimal ranges
        self._process_optimal_ranges()

        # Create feature matrices
        self._create_feature_matrices()

    def _generate_time_series_data(self) -> Dict[str, List[float]]:
        """Generate synthetic time series data for each variable"""
        time_series = {}
        start_date = datetime.now() - timedelta(days=365)
        dates = [start_date + timedelta(days=i) for i in range(365)]

        for _, row in self.base_data.iterrows():
            variable = row["Variable"]
            optimal_range = self._parse_range(row["OptimalRange"])
            if optimal_range:
                mid_point = (optimal_range[0] + optimal_range[1]) / 2
                # Generate daily values with realistic variations
                values = [
                    self._generate_realistic_value(mid_point, optimal_range, dates[i])
                    for i in range(365)
                ]
                time_series[variable] = values

        return time_series

    def _generate_realistic_value(
        self, mid_point: float, optimal_range: Tuple[float, float], date: datetime
    ) -> float:
        """Generate realistic value considering seasonal effects and daily variations"""
        season = SeasonalEffects.get_season(date)
        season_effect = SeasonalEffects.SEASONS[season]["temp_modifier"]

        # Add daily variation (random walk with bounds)
        daily_variation = np.random.normal(0, 0.05)
        value = mid_point * (1 + season_effect + daily_variation)

        # Ensure value stays within optimal range
        return max(optimal_range[0], min(optimal_range[1], value))

    def _initialize_waste_types(self) -> List[str]:
        """Initialize waste type sequence"""
        return np.random.choice(list(WasteType.TYPES.keys()), size=365).tolist()

    def _initialize_weather_data(self) -> Dict[str, List[float]]:
        """Generate synthetic weather data"""
        weather_data = {"temperature": [], "humidity": [], "rainfall": []}

        start_date = datetime.now() - timedelta(days=365)
        for i in range(365):
            date = start_date + timedelta(days=i)
            season = SeasonalEffects.get_season(date)
            season_effects = SeasonalEffects.SEASONS[season]

            # Generate weather data with seasonal effects
            weather_data["temperature"].append(
                20 + season_effects["temp_modifier"] * 30 + np.random.normal(0, 2)
            )
            weather_data["humidity"].append(
                60 + season_effects["moisture_modifier"] * 30 + np.random.normal(0, 5)
            )
            weather_data["rainfall"].append(
                max(
                    0,
                    np.random.exponential(scale=2)
                    * (1 + season_effects["moisture_modifier"]),
                )
            )

        return weather_data

    def _parse_range(self, range_str: str) -> Tuple[float, float]:
        """Parse range string to tuple of floats"""
        try:
            if isinstance(range_str, str):
                range_str = range_str.strip()
                if ">" in range_str:
                    val = float(range_str.replace(">", ""))
                    return (val, val * 1.2)
                elif ":" in range_str:
                    range_part = range_str.split(":")[0]
                    low, high = map(float, range_part.split("-"))
                    return (low, high)
                else:
                    return tuple(map(float, range_str.split("-")))
            return None
        except:
            return None

    def _process_optimal_ranges(self):
        """Process optimal ranges for all variables"""
        self.optimal_ranges = {}
        for _, row in self.base_data.iterrows():
            range_vals = self._parse_range(row["OptimalRange"])
            if range_vals:
                self.optimal_ranges[row["Variable"]] = range_vals

    def _create_feature_matrices(self):
        """Create feature matrices for the dataset"""
        self.features = []
        self.targets = []

        for i in range(
            len(self.time_series_data["Temperature"]) - self.sequence_length
        ):
            # Create sequence of features
            sequence = []
            for day in range(self.sequence_length):
                idx = i + day

                # Measurements
                measurements = [
                    self.time_series_data[var][idx]
                    for var in self.time_series_data.keys()
                ]

                # Waste type encoding
                waste_encoding = WasteType.get_type_encoding(self.waste_types[idx])

                # Weather data
                weather = [self.weather_data[w][idx] for w in self.weather_data.keys()]

                # Combine all features
                day_features = measurements + waste_encoding.tolist() + weather
                sequence.append(day_features)

            self.features.append(torch.tensor(sequence, dtype=torch.float32))

            # Target is the next day's temperature and moisture content
            target_idx = i + self.sequence_length
            target = [
                self.time_series_data["Temperature"][target_idx],
                self.time_series_data["Moisture_Content"][target_idx],
            ]
            self.targets.append(torch.tensor(target, dtype=torch.float32))

    def __len__(self) -> int:
        return len(self.features)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        return self.features[idx], self.targets[idx]

    def get_optimal_ranges(self) -> Dict[str, Tuple[float, float]]:
        """Return optimal ranges for all variables"""
        return self.optimal_ranges

    def get_variable_metadata(self) -> pd.DataFrame:
        """Return metadata for all variables"""
        return self.base_data[
            [
                "Variable",
                "Type",
                "Unit",
                "Dependencies",
                "IntroductionStage",
                "Frequency",
                "Notes",
            ]
        ]


# Example usage
def create_compost_dataloader(
    dataset_path: str, batch_size: int = 32, sequence_length: int = 30
):
    try:
        dataset = CompostTimeSeriesDataset(
            dataset_path, sequence_length=sequence_length
        )
        dataloader = DataLoader(
            dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2
        )
        return dataset, dataloader
    except Exception as e:
        print(f"Error creating dataloader: {e}")
        return None, None


if __name__ == "__main__":
    root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
    dataset_path = os.path.join(root_dir, "data", "smart_compost_dataset101.csv")

    dataset, dataloader = create_compost_dataloader(dataset_path)
    if dataloader:
        print('looping ...')
        # Get a batch of data
        for batch_features, batch_targets in dataloader:
            print("Batch features shape:", batch_features.shape)
            print("Batch targets shape:", batch_targets.shape)
            print(
                "\nFeature sequence (first day of first sample):", batch_features[0][0]
            )
            print("Target values:", batch_targets[0])
            break

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load latest dataset
df = pd.read_csv("smart_compost_dataset101.csv")  # Change file as needed

# Set seaborn style
sns.set_style("whitegrid")


# Function to plot variable trends over time
def plot_variable(df, variable):
    plt.figure(figsize=(12, 5))
    plt.plot(df["Date"], df[variable], label=variable, color="blue")
    plt.xticks(rotation=45)
    plt.xlabel("Date")
    plt.ylabel(variable)
    plt.title(f"{variable} Over Time")
    plt.legend()
    plt.show()


# Example: Plot temperature and moisture trends
plot_variable(df, "Temperature")
plot_variable(df, "Moisture_Content")

# Correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(df.drop(columns=["Date", "Waste_Type"]).corr(), annot=True, cmap="coolwarm")
plt.title("Variable Correlation Heatmap")
plt.show()

# Scatterplot: Temperature vs Moisture Content
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df["Temperature"], y=df["Moisture_Content"], hue=df["Waste_Type"])
plt.title("Temperature vs Moisture Content")
plt.show()

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from typing import Tuple, Dict
import matplotlib.pyplot as plt


class CompostLSTM(nn.Module):
    def __init__(
        self,
        input_size: int,
        hidden_size: int = 64,
        num_layers: int = 2,
        dropout: float = 0.2,
    ):
        super(CompostLSTM, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
        )

        # Attention layer
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1)
        )

        # Output layers for temperature and moisture prediction
        self.regression_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, 2),  # 2 outputs: temperature and moisture
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # LSTM forward pass
        lstm_out, _ = self.lstm(x)  # Shape: [batch, seq_len, hidden_size]

        # Apply attention
        attention_weights = self.attention(lstm_out)  # Shape: [batch, seq_len, 1]
        attention_weights = torch.softmax(attention_weights, dim=1)

        # Weighted sum of LSTM outputs
        context = torch.sum(
            attention_weights * lstm_out, dim=1
        )  # Shape: [batch, hidden_size]

        # Generate predictions
        predictions = self.regression_head(context)  # Shape: [batch, 2]

        return predictions


class CompostModelTrainer:
    def __init__(self, model: nn.Module, learning_rate: float = 0.001):
        self.model = model
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        # Training history
        self.train_losses = []
        self.val_losses = []

    def train_epoch(self, train_loader: DataLoader) -> float:
        self.model.train()
        total_loss = 0

        for batch_features, batch_targets in train_loader:
            batch_features = batch_features.to(self.device)
            batch_targets = batch_targets.to(self.device)

            # Forward pass
            self.optimizer.zero_grad()
            predictions = self.model(batch_features)
            loss = self.criterion(predictions, batch_targets)

            # Backward pass
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(train_loader)

    def validate(self, val_loader: DataLoader) -> float:
        self.model.eval()
        total_loss = 0

        with torch.no_grad():
            for batch_features, batch_targets in val_loader:
                batch_features = batch_features.to(self.device)
                batch_targets = batch_targets.to(self.device)

                predictions = self.model(batch_features)
                loss = self.criterion(predictions, batch_targets)
                total_loss += loss.item()

        return total_loss / len(val_loader)

    def train(
        self,
        train_loader: DataLoader,
        val_loader: DataLoader,
        epochs: int = 100,
        early_stopping_patience: int = 10,
    ) -> Dict:

        best_val_loss = float("inf")
        patience_counter = 0

        for epoch in range(epochs):
            # Train and validate
            train_loss = self.train_epoch(train_loader)
            val_loss = self.validate(val_loader)

            # Store losses
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)

            # Print progress
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")

            # Early stopping check
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                # Save best model
                torch.save(self.model.state_dict(), "best_compost_model.pth")
            else:
                patience_counter += 1

            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered!")
                break

        return {"train_losses": self.train_losses, "val_losses": self.val_losses}

    def plot_training_history(self):
        plt.figure(figsize=(10, 6))
        plt.plot(self.train_losses, label="Training Loss")
        plt.plot(self.val_losses, label="Validation Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Training History")
        plt.legend()
        plt.grid(True)
        plt.show()


ModuleNotFoundError: No module named 'torch'

Here's what this model implementation provides:

Model Architecture:

LSTM-based network for time series processing
Attention mechanism to focus on important time steps
Dropout for regularization
Separate regression heads for temperature and moisture prediction


Training Infrastructure:

Comprehensive training loop with validation
Early stopping to prevent overfitting
Learning rate optimization with Adam
Loss tracking and visualization



To use this model:

First, prepare your data:
```python 
dataset_path = "path_to_your_dataset.csv"
dataset, full_dataloader = create_compost_dataloader(dataset_path)

# Split into train/validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


```


python

The next step would be to create a neural network model to train on this composting data. Let me create an appropriate model architecture for this task.



```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from typing import Tuple, Dict
import matplotlib.pyplot as plt

class CompostLSTM(nn.Module):
    def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 2, dropout: float = 0.2):
        super(CompostLSTM, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )

        # Attention layer
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1)
        )

        # Output layers for temperature and moisture prediction
        self.regression_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, 2)  # 2 outputs: temperature and moisture
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # LSTM forward pass
        lstm_out, _ = self.lstm(x)  # Shape: [batch, seq_len, hidden_size]

        # Apply attention
        attention_weights = self.attention(lstm_out)  # Shape: [batch, seq_len, 1]
        attention_weights = torch.softmax(attention_weights, dim=1)

        # Weighted sum of LSTM outputs
        context = torch.sum(attention_weights * lstm_out, dim=1)  # Shape: [batch, hidden_size]

        # Generate predictions
        predictions = self.regression_head(context)  # Shape: [batch, 2]

        return predictions

class CompostModelTrainer:
    def __init__(self, model: nn.Module, learning_rate: float = 0.001):
        self.model = model
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        # Training history
        self.train_losses = []
        self.val_losses = []

    def train_epoch(self, train_loader: DataLoader) -> float:
        self.model.train()
        total_loss = 0

        for batch_features, batch_targets in train_loader:
            batch_features = batch_features.to(self.device)
            batch_targets = batch_targets.to(self.device)

            # Forward pass
            self.optimizer.zero_grad()
            predictions = self.model(batch_features)
            loss = self.criterion(predictions, batch_targets)

            # Backward pass
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(train_loader)

    def validate(self, val_loader: DataLoader) -> float:
        self.model.eval()
        total_loss = 0

        with torch.no_grad():
            for batch_features, batch_targets in val_loader:
                batch_features = batch_features.to(self.device)
                batch_targets = batch_targets.to(self.device)

                predictions = self.model(batch_features)
                loss = self.criterion(predictions, batch_targets)
                total_loss += loss.item()

        return total_loss / len(val_loader)

    def train(self,
              train_loader: DataLoader,
              val_loader: DataLoader,
              epochs: int = 100,
              early_stopping_patience: int = 10) -> Dict:

        best_val_loss = float('inf')
        patience_counter = 0

        for epoch in range(epochs):
            # Train and validate
            train_loss = self.train_epoch(train_loader)
            val_loss = self.validate(val_loader)

            # Store losses
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)

            # Print progress
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")

            # Early stopping check
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                # Save best model
                torch.save(self.model.state_dict(), 'best_compost_model.pth')
            else:
                patience_counter += 1

            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered!")
                break

        return {
            'train_losses': self.train_losses,
            'val_losses': self.val_losses
        }

    def plot_training_history(self):
        plt.figure(figsize=(10, 6))
        plt.plot(self.train_losses, label='Training Loss')
        plt.plot(self.val_losses, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training History')
        plt.legend()
        plt.grid(True)
        plt.show()

# Example usage
def train_compost_model(train_loader: DataLoader, val_loader: DataLoader) -> CompostLSTM:
    # Calculate input size from data
    sample_features = next(iter(train_loader))[0]
    input_size = sample_features.shape[-1]  # number of features

    # Initialize model and trainer
    model = CompostLSTM(input_size=input_size)
    trainer = CompostModelTrainer(model)

    # Train the model
    history = trainer.train(
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=100,
        early_stopping_patience=10
    )

    # Plot training history
    trainer.plot_training_history()

    return model

if __name__ == "__main__":
    # Assuming you have already created your dataset and dataloaders
    from compost_dataset import create_compost_dataloader

    # Create dataset and split into train/val
    dataset_path = "path_to_your_dataset.csv"
    dataset, full_dataloader = create_compost_dataloader(dataset_path)

    # Split data into train and validation sets
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Train the model
    model = train_compost_model(train_loader, val_loader)

```

Here's what this model implementation provides:

1. Model Architecture:
   - LSTM-based network for time series processing
   - Attention mechanism to focus on important time steps
   - Dropout for regularization
   - Separate regression heads for temperature and moisture prediction

2. Training Infrastructure:
   - Comprehensive training loop with validation
   - Early stopping to prevent overfitting
   - Learning rate optimization with Adam
   - Loss tracking and visualization

To use this model:

1. First, prepare your data:
```python
dataset_path = "path_to_your_dataset.csv"
dataset, full_dataloader = create_compost_dataloader(dataset_path)

# Split into train/validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
```

2. Train the model:
```python
model = train_compost_model(train_loader, val_loader)
```

3. Make predictions:
```python
model.eval()
with torch.no_grad():
    sample_input = next(iter(val_loader))[0]
    predictions = model(sample_input)
    # predictions[:, 0] is temperature
    # predictions[:, 1] is moisture content
```

Next steps could include:
1. Hyperparameter tuning
2. Adding more prediction targets
3. Implementing real-time monitoring
4. Creating a user interface for the model
5. Adding interpretability features

Would you like me to implement any of these next steps or explain any part of the current implementation in more detail?