
**File Name:** sundial_time_prediction.py

**Description:**
  This script demonstrates how to:
  1. Generate a synthetic sundial shadow dataset using the Astral library.
  2. Save the dataset as a CSV file.
  3. Train a predictive model (Random Forest) to estimate time-of-day from shadow length & angle.
  4. Evaluate the model using cross-validation and a final test split.
  5. Print performance metrics (MAE, R^2).

**Usage:**
  python sundial_time_prediction.py

**Dependencies:**
  pip install astral scikit-learn pandas


**STEP 1: IMPORT LIBRARIES**

In [27]:
import math
import datetime
import pandas as pd

# Astral for sun position calculations
from astral import LocationInfo
from astral.sun import sun, elevation, azimuth

# Scikit-learn for model building
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

**STEP 2: DEFINE LOCATION & DATE/TIME RANGE**

In [35]:
def define_location_and_time_ranges():
    city = LocationInfo(
        name="Pune",
        region="India",
        timezone="Asia/Kolkata",
        latitude=52.5200,
        longitude=13.4050
    )

    # Define date range
    start_date = datetime.date(2025, 2, 23)
    end_date   = datetime.date(2025, 2, 26)

    date_list = []
    delta = datetime.timedelta(days=1)
    current_date = start_date
    while current_date <= end_date:
        date_list.append(current_date)
        current_date += delta

    # Define times-of-day (every 15 minutes from 5:00 to 20:00)
    times = []
    for hour in range(5, 21):
        for minute in [0, 15, 30, 45]:
            times.append(datetime.time(hour, minute))

    return city, date_list, times

**STEP 3: GENERATE SUNDIAL DATA USING ASTRAL**

In [36]:
def generate_sundial_data(city, date_list, times, stick_height=1.0, noise_factor=0.0):
    data_records = []

    for date_ in date_list:
        for t_ in times:
            dt = datetime.datetime.combine(date_, t_)

            elev_deg = elevation(city.observer, dt)
            azim_deg = azimuth(city.observer, dt)

            # Only consider times when sun is above horizon
            if elev_deg > 0:
                # Convert elevation from degrees to radians
                elev_rad = math.radians(elev_deg)

                # Compute shadow length = stick_height / tan(elev)
                base_shadow_length = stick_height / math.tan(elev_rad)

                # Introduce optional noise
                shadow_length = base_shadow_length * (1 + random.uniform(-noise_factor, noise_factor))

                # Shadow angle = sun azimuth (could also add a small random offset if desired)
                base_shadow_angle = azim_deg
                shadow_angle = base_shadow_angle + random.uniform(-2, 2)*noise_factor

                # Time in decimal (e.g., 5:15 => 5.25)
                time_decimal = t_.hour + t_.minute/60.0

                # Store record
                data_records.append({
                    'date': str(date_),
                    'time_str': t_.strftime('%H:%M'),
                    'time_of_day': time_decimal,
                    'shadow_length': shadow_length,
                    'shadow_angle': shadow_angle
                })

    df = pd.DataFrame(data_records)
    return df

**STEP 4: SAVE DATA TO CSV**

In [37]:
def save_data_to_csv(df, filename='sundial_astral_data.csv'):
    """
    Saves the generated dataset to a CSV file.
    """
    df.to_csv(filename, index=False)
    print(f"[INFO] Dataset saved to {filename} with {len(df)} entries.")

**STEP 5: TRAIN AND EVALUATE MODEL WITH CROSS-VALIDATION**

In [38]:
def train_and_evaluate_model(df):
    """
    Trains a RandomForestRegressor to predict time_of_day from (shadow_length, shadow_angle).
    Demonstrates:
      - Train/Test split
      - K-Fold Cross Validation
      - Final evaluation
    """
    # Features (X) and target (y)
    X = df[['shadow_length', 'shadow_angle']].values
    y = df['time_of_day'].values

    # 5.1: Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # 5.2: Define Model
    model = RandomForestRegressor(n_estimators=100, random_state=42)

    # 5.3: (Optional) K-Fold Cross Validation on entire dataset
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_absolute_error')
    cv_mae = -cv_scores.mean()
    print(f"[INFO] 5-Fold CV MAE: {cv_mae:.2f} hours")

    # 5.4: Train on Train Split
    model.fit(X_train, y_train)

    # 5.5: Predict on Test Split
    y_pred = model.predict(X_test)

    # 5.6: Evaluate
    mae = mean_absolute_error(y_test, y_pred)
    r2  = r2_score(y_test, y_pred)

    print(f"[RESULT] Test MAE: {mae:.2f} hours")
    print(f"[RESULT] Test R^2 Score: {r2:.2f}")

    # Return the trained model in case we want to do further predictions
    return model

**STEP 6: MAIN EXECUTION FLOW**

In [39]:
def main():
    """
    Main function to tie all steps together.
    1. Define location & time ranges.
    2. Generate synthetic data.
    3. Save CSV.
    4. Train & evaluate model.
    """
    # (1) Define location and time range
    city, date_list, times = define_location_and_time_ranges()

    # (2) Generate data (optionally add noise_factor if you want realism, e.g. 0.05)
    df_sundial = generate_sundial_data(city, date_list, times, stick_height=1.0, noise_factor=0.02)

    # (3) Save to CSV
    save_data_to_csv(df_sundial, filename='sundial_astral_data.csv')

    # (4) Train & evaluate with cross-validation
    trained_model = train_and_evaluate_model(df_sundial)

    # End of main
    print("[INFO] Pipeline completed successfully!")

**STEP 7: ENTRY POINT**

In [40]:
if __name__ == "__main__":
    main()

[INFO] Dataset saved to sundial_astral_data.csv with 168 entries.
[INFO] 5-Fold CV MAE: 0.03 hours
[RESULT] Test MAE: 0.03 hours
[RESULT] Test R^2 Score: 1.00
[INFO] Pipeline completed successfully!
