In [2]:
from pptx import Presentation
from pptx.util import Inches

# Create a new presentation
presentation = Presentation()

# Title Slide
title_slide = presentation.slides.add_slide(presentation.slide_layouts[0])
title = title_slide.shapes.title
subtitle = title_slide.placeholders[1]

title.text = "Machine Learning Project"
subtitle.text = "Project Team Names\nDate"

# Introduction Slide
intro_slide = presentation.slides.add_slide(presentation.slide_layouts[1])
intro_slide.shapes.title.text = "Introduction"
intro_content = intro_slide.placeholders[1]
intro_content.text = (
    "AIS data is transmitted and recorded in vast quantities, making it a convenient source for training models "
    "to perform tasks such as:\n\n"
    "- Predicting the current (or past) status of a vessel\n"
    "- Forecasting the future path or reaction of the vessel\n"
    "- Analyzing movement patterns of different vessel types\n\n"
    "Applications include safety or security evaluations (e.g., early warning for coastal authorities, detecting illegal fishing), "
    "providing additional information for mariners, or developing realistic behavior patterns for synthetic traffic in simulators.\n\n"
    "This project focuses on a specific binary classification: distinguishing between vessels engaged in fishing and vessels underway using engine. "
    "The models are trained on datasets containing either 1-min records with 10-second intervals or 2-hour records with 1-minute intervals, "
    "including only course, heading, and speed data.\n\n"
    "Datasets are sourced from the Danish Maritime Authority (http://web.ais.dk/aisdata/).\n\n"
    "**Task Type**: Time series binary classification\n"
    "**Results Summary**:\n"
    "- Best Model: LSTM_2_2 on the 2-hour dataset\n"
    "- Evaluation Metrics: Accuracy, Precision, Recall, F1-Score\n"
    "- Result: 99%, 0.99, 0.98, 0.99"
)

# Literature Review Slide
lit_review_slide = presentation.slides.add_slide(presentation.slide_layouts[1])
lit_review_slide.shapes.title.text = "Literature Review"

# Add content to Literature Review
lit_review_content = lit_review_slide.placeholders[1]
lit_review_content.text = (
    "1. Kaggle - AI in Maritime Industry: Predict ship types using AIS data. "
    "Methods include EDA, feature engineering, and LightGBM. "
    "Key insight: AIS data preprocessing and feature selection.\n\n"
    "2. Article - Ship Behavior Prediction: Predict trajectories with VRAE and sequence-to-sequence models. "
    "Key insight: Localized clustering improves predictive accuracy.\n\n"
    "3. Fishing watch - Vessel Scoring: Detect fishing activities using heuristic and ML models. "
    "Key insight: Multi-window, gear-specific models enhance detection."
)

# Add speaker notes
notes = lit_review_slide.notes_slide.notes_text_frame
notes.text = (
    "The Literature Review focuses on three sources:\n"
    "1. Kaggle - AI in Maritime Industry: This study demonstrates how AIS data can be preprocessed and "
    "used for classification tasks like ship type prediction using LightGBM.\n"
    "2. Article - Ship Behavior Prediction: This paper highlights the effectiveness of localized clustering "
    "with VRAE and sequence-to-sequence models for predicting ship trajectories.\n"
    "3. Fishing watch - Vessel Scoring: Explores detecting fishing activities with heuristic and machine learning models, "
    "emphasizing the benefits of gear-specific multi-window approaches."
)

# Placeholder for other sections (to be updated)
sections = [
    "Dataset Characteristics",
    "Baseline Model",
    "Model Definition and Evaluation",
    "Results",
    "Challenges and Errors",
    "Discussion",
    "Conclusion and Future Work",
    "Q&A"
]

for section in sections:
    slide = presentation.slides.add_slide(presentation.slide_layouts[1])
    slide.shapes.title.text = section
    content = slide.placeholders[1]
    content.text = f"Content for {section} will go here."

# Save the presentation
presentation.save("Machine_Learning_Project_Presentation.pptx")
print("Presentation updated: Machine_Learning_Project_Presentation.pptx")


Presentation updated: Machine_Learning_Project_Presentation.pptx


In [1]:
import os
from pptx import Presentation
from pptx.util import Inches

def create_presentation(output_path):
    # Create a PowerPoint presentation
    prs = Presentation()

    # Title Slide
    slide = prs.slides.add_slide(prs.slide_layouts[0])
    title = slide.shapes.title
    subtitle = slide.placeholders[1]
    title.text = "Machine Learning Project Overview"
    subtitle.text = "Predicting Navigational Status of Vessels"

    # Slide 1: Introduction
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Introduction"
    content.text = (
        "This project focuses on predicting the navigational status of vessels based on time-series motion data. "
        "The key objective is to classify the status into categories such as 'Under way using engine' and 'Engaged in fishing.' "
        "The data includes features like Speed Over Ground (SOG), Course Over Ground (COG), and Heading."
    )

    # Slide 2: Dataset Characteristics
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Dataset Characteristics"
    content.text = (
        "- Grouped by vessel and filtered by area to ensure clarity (e.g., open sea vs. anchorage areas).\n"
        "- Created time sets of equal navigational status (e.g., 15 minutes or 2 hours).\n"
        "- Key features: Timestamp, Navigational Status (target), SOG, COG, Heading.\n"
        "- Preprocessing steps included:\n"
        "  - Interpolating columns with less than 50% missing values.\n"
        "  - Standardizing datasets to the same length and interval.\n"
        "  - Subtracting the initial course from all COG and Heading values.\n"
        "  - Cyclical encoding of circular values like COG and Heading.\n"
    )

    # Slide 3: Baseline Model
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Baseline Model"
    content.text = (
        "- Random Forest models were tested:\n"
        "  - Model 1: Line-by-line predictions.\n"
        "  - Model 2: Aggregated time-set predictions.\n"
        "- Tested on 15-minute and 2-hour sets.\n"
        "- Accuracy ranged between 0.68 and 0.78, with some classes not recognized.\n"
        "- Visualization and feature importance analysis helped understand the model's behavior."
    )

    # Slide 4: Model Definition and Evaluation
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Model Definition and Evaluation"
    content.text = (
        "- LSTM models were developed to analyze time-series data.\n"
        "- Challenges:\n"
        "  - Moderate results with positional data included.\n"
        "  - Poor results without positional data, even with class weights.\n"
        "- Improvements:\n"
        "  - Subtracting the initial course and cyclical encoding improved accuracy.\n"
        "  - Aggregating datasets into vectors for uniform time-slots showed better performance.\n"
        "  - Sliding window approach significantly enhanced accuracy (up to 0.99)."
    )

    # Slide 5: Results
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Results"
    content.text = (
        "- Best-performing model achieved:\n"
        "  - Accuracy: 0.99\n"
        "  - Precision: 0.99\n"
        "  - Recall: 0.98\n"
        "  - F1 Score: 0.99\n"
        "- Key optimization: Sliding window approach.\n"
        "- Visualizations (e.g., color-coded trajectories) aided in understanding results."
    )

    # Slide 6: Challenges and Errors
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Challenges and Errors"
    content.text = (
        "- Data size: Processing large datasets required significant time and resources.\n"
        "- Standardization: Ensuring uniformity across datasets was complex.\n"
        "- Model architecture: Balancing aggregated vs. flattened data for optimal results.\n"
        "- Hyperparameter tuning: Limited impact despite extensive trials.\n"
        "- Dynamic time-slot modeling remains an area for future work."
    )

    # Slide 7: Future Work
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    title = slide.shapes.title
    content = slide.placeholders[1]
    title.text = "Future Work"
    content.text = (
        "- Expand to more vessel categories.\n"
        "- Develop dynamic time-slot modeling for real-time applications.\n"
        "- Incorporate additional features like wind data.\n"
        "- Predict vessel movements based on historical patterns."
    )

    # Save the presentation
    prs.save(output_path)

# Example usage
output_file = "machine_learning_project.pptx"
create_presentation(output_file)
print(f"Presentation saved to {output_file}")


Presentation saved to machine_learning_project.pptx
