In [14]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from datetime import timedelta

# Load dataset
file_path = "givenData.csv"  # Update with your actual file path
data = pd.read_csv(file_path, delimiter="\t")

# Preprocessing
data['Date & Time'] = pd.to_datetime(data['Date & Time'], format='%d-%m-%Y %H:%M')
data['Water Level'] = data['Water Level (m)'].abs()  # Use absolute values
data = data[['Telemetry_UID', 'Date & Time', 'Water Level']]

# Initialize result dataframe
future_predictions = pd.DataFrame()

# Process each Telemetry_UID
for uid in data['Telemetry_UID'].unique():
    # Filter data for the current Telemetry_UID
    telemetry_data = data[data['Telemetry_UID'] == uid].copy()
    telemetry_data['Days Since Start'] = (telemetry_data['Date & Time'] - telemetry_data['Date & Time'].min()).dt.days
    
    # Prepare features and target
    X = telemetry_data[['Days Since Start']]
    y = telemetry_data['Water Level']
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the model
    model = XGBRegressor(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
    model.fit(X_train, y_train)
    
    # Predict future values for 20 days
    last_day = telemetry_data['Days Since Start'].max()
    future_days = np.arange(last_day + 1, last_day + 21).reshape(-1, 1)
    future_levels = model.predict(future_days)
    
    # Append predictions to the result
    future_dates = telemetry_data['Date & Time'].max() + pd.to_timedelta(np.arange(1, 21), unit='D')
    prediction_df = pd.DataFrame({
        'Telemetry_UID': uid,
        'Date & Time': future_dates,
        'Predicted Water Level': future_levels
    })
    future_predictions = pd.concat([future_predictions, prediction_df], ignore_index=True)

# Save or display results
print(future_predictions)
future_predictions.to_csv("future_predictions.csv", index=False)


KeyError: 'Date & Time'