# Data Analysis Template

This template provides the correct setup for using Sunstone's lineage tracking.

**Important**: Always use `from sunstone import pandas as pd` instead of `import pandas as pd` to enable lineage tracking.

In [None]:
# Standard imports for Sunstone projects
from pathlib import Path
from sunstone import pandas as pd
import sunstone

# Set project path (update this to your actual project directory)
PROJECT_PATH = Path.cwd()
print(f"Project path: {PROJECT_PATH}")

## Load Data

Load datasets that are registered in `datasets.yaml`. All reads are automatically tracked in lineage.

In [None]:
# Read a dataset (must be in datasets.yaml inputs)
# Update the filename to match your actual input dataset
df = pd.read_csv(
    'input_data.csv',
    project_path=PROJECT_PATH
)

# Display the data
display(df.data.head())

# Check lineage
print(f"\nLineage: {len(df.lineage.sources)} source(s), {len(df.lineage.operations)} operation(s)")

## Transform Data

All pandas operations work as normal. The underlying data is accessed via `.data` when needed.

In [None]:
# Filter data
filtered_df = df[df['column_name'] > 0]

# Or use apply_operation for complex transformations
result_df = df.apply_operation(
    lambda data: data[data['column_name'] > 100],
    description="Filter rows where column_name > 100"
)

display(result_df.data.head())

## Combine Datasets

Use pandas-like functions to merge, join, or concatenate datasets while preserving lineage.

In [None]:
# Example: Load another dataset
# df2 = pd.read_csv('other_data.csv', project_path=PROJECT_PATH)

# Merge datasets
# merged_df = pd.merge(df, df2, on='key_column', how='inner')

# Concatenate datasets
# combined_df = pd.concat([df, df2], ignore_index=True)

# Check combined lineage
# print(f"Combined lineage: {len(merged_df.lineage.sources)} source(s)")

## Save Results

Save your results to create output datasets. In relaxed mode, outputs are auto-registered in `datasets.yaml`.

In [None]:
# Save the result
# result_df.to_csv(
#     'output_data.csv',
#     slug='output-data',
#     name='Output Data',
#     publish=False,  # Set to True when ready to publish
#     index=False
# )

# print("Data saved successfully with full lineage tracking!")

## Validate Imports

Use this cell to check if your notebook is using the correct imports.

In [None]:
# Check if this notebook uses correct imports
result = sunstone.check_notebook_imports('current_notebook.ipynb')
print(result.summary())