In [None]:
!pip install zarr
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import zarr
import os
import matplotlib.pyplot as plt
import joblib
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pickle

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Cell 2: Set paths
train_path = "/kaggle/input/czii-cryo-et-object-identification/train"
test_path = "/kaggle/input/czii-cryo-et-object-identification/test"
submission_path = "/kaggle/input/czii-cryo-et-object-identification/sample_submission.csv"
model_path = "/kaggle/working/trained_model.pkl"  # Path for the pickled model

In [None]:
# Cell 3: Load data function
def load_data(path):
    """Load zarr files from the specified path."""
    return zarr.open(path, mode='r')

In [None]:
# Cell 4: Define Dataset Class
class CryoETDataset(Dataset):
    def __init__(self, path):
        self.path = path
        self.data = self.load_data()

    def load_data(self):
        """Load zarr files from the specified path."""
        return zarr.open(self.path, mode='r')

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tomogram = self.data[idx]
        tomogram_normalized = tomogram / np.max(tomogram)  # Normalize
        tomogram_tensor = torch.tensor(tomogram_normalized, dtype=torch.float32)
        return tomogram_tensor

In [None]:
# Cell 5: Define the Model
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        # Define the layers of the U-Net model
        self.encoder = nn.Sequential(
            nn.Conv3d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=2, stride=2)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(64, 1, kernel_size=2, stride=2),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
# Cell 6: Training Function
def train_model(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for inputs in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))  # Add channel dimension
            loss = criterion(outputs, inputs.unsqueeze(1))  # Assuming reconstruction task
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
# Cell 7: Evaluation Function
def evaluate_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs in dataloader:
            outputs = model(inputs.unsqueeze(1))
            loss = criterion(outputs, inputs.unsqueeze(1))
            total_loss += loss.item()
    average_loss = total_loss / len(dataloader)
    print(f'Validation Loss: {average_loss:.4f}')

In [None]:
# Cell 8: Save the Model
def save_model(model, filename):
    """Save the trained model."""
    torch.save(model.state_dict(), filename)

In [None]:
# Cell 9: Visualization Function
def visualize_predictions(predictions):
    """Visualize the predicted tomogram slices."""
    plt.figure(figsize=(10, 10))
    # Example: Plotting the first tomogram slice
    plt.imshow(predictions[0].cpu().numpy(), cmap='gray')  # Ensure to move tensor to CPU for numpy conversion
    plt.title('Predicted Tomogram Slice')
    plt.axis('off')
    plt.show()

In [None]:
# Cell 10: Implementing a simple model evaluation metric - F-beta score
def f_beta_score(y_true, y_pred, beta=4):
    """Calculate F-beta score based on true and predicted values."""
    precision = np.sum(y_pred * y_true) / (np.sum(y_pred) + 1e-6)
    recall = np.sum(y_pred * y_true) / (np.sum(y_true) + 1e-6)
    f_beta = (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall + 1e-6)
    return f_beta

In [None]:
# Cell 11: Main Execution
if __name__ == "__main__":
    # Construct the training data path
    train_data_path = os.path.join(train_path, "static", "ExperimentRuns")
    
    # Check if the path exists
    if not os.path.exists(train_data_path):
        raise FileNotFoundError(f"Training data path does not exist: {train_data_path}")
    else:
        print(f"Training data path exists: {train_data_path}")

In [None]:
# Cell 12: Load and preprocess training data
try:
    train_dataset = CryoETDataset(train_data_path)
    print(f"Loaded dataset with {len(train_dataset)} items.")
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
except Exception as e:
    print(f"Error loading training data: {e}")
    train_loader = None

In [None]:
# Cell 13: Initialize model, criterion, and optimizer
model = UNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Cell 14: Train the model
if train_loader is not None:
    try:
        train_model(model, train_loader, criterion, optimizer, num_epochs=10)
    except Exception as e:
        print(f"Error during training: {e}")

In [None]:
# Cell 15: Construct the test data path
test_data_path = os.path.join(test_path, "static", "ExperimentRuns")

In [None]:
# Cell 16: Load test dataset and make predictions
try:
    test_dataset = CryoETDataset(test_data_path)
    print(f"Loaded test dataset with {len(test_dataset)} items.")
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
except Exception as e:
    print(f"Error loading test data: {e}")
    test_loader = None  # Ensure test_loader is defined

In [None]:
# Cell 17: Make predictions on the test set
def make_predictions(model, dataloader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs in dataloader:
            outputs = model(inputs.unsqueeze(1))
            predictions.append(outputs)
    return torch.cat(predictions)

if test_loader is not None:
    try:
        predictions = make_predictions(model, test_loader)
    except Exception as e:
        print(f"Error during predictions: {e}")

In [None]:
# Cell 18: Evaluate the model
if train_loader is not None:
    evaluate_model(model, train_loader, criterion)

In [None]:
# Cell 19: Save the Model
save_model(model, 'trained_model.pth')

In [None]:
# Cell 20: Prepare Submission File
if 'predictions' in locals():
    submission_data = predictions.numpy()
    np.save(submission_path.replace('.csv', '.npy'), submission_data)
    print("Model training and predictions completed. Model saved and predictions saved as .npy file.")
else:
    print("Predictions not available for submission.")

In [None]:
# Cell 21: Visualize Predictions
if 'predictions' in locals():
    visualize_predictions(predictions)

In [None]:
# Cell 22: Example usage of the F-beta score
try:
    ground_truth = load_data("path_to_ground_truth")  # Load ground truth data
    score = f_beta_score(ground_truth, predictions.cpu().numpy())  # Ensure predictions are on CPU for numpy conversion
    print(f'F-beta Score: {score}')
except Exception as e:
    print(f"Error calculating F-beta score: {e}")

In [None]:
# Cell 23: Save the model if needed
save_model(model, 'trained_model.pkl')

In [None]:
# Cell 24: Load the model from .pth file
loaded_model = UNet()  # Replace with your model class
loaded_model.load_state_dict(torch.load('trained_model.pth'))
loaded_model.eval()  # Set model to evaluation mode

In [None]:
# Cell 25: Load a pickled object from .pkl file
try:
    # If the object is a PyTorch model, use torch.load instead of pickle.load
    loaded_model = UNet()  # Initialize the model class
    loaded_model.load_state_dict(torch.load(model_path))  # Load the model state
    loaded_model.eval()  # Set model to evaluation mode
    print("Model loaded successfully.")
except FileNotFoundError as e:
    print(f"Error loading model file: {e}")
except Exception as e:
    print(f"Error loading model: {e}")