In [None]:
# Project: Spatio-temporal pest forecasting
# Author: Dr. Dinesh Kumar P
# Data understanding and Exploration

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.rcParams["figure.figsize"] = (8, 5)

In [None]:
"""
Steps:
1. Load the pest incidence data
2. load climate data
3. Inspect data structure and missing values
4. visualize the basic temporal pattern
"""

In [None]:
# Load sample pest-climate data
data_path = "../data/processed/pest_climate_sample.csv"
df = pd.read_csv(data_path)

df.head()

In [None]:
# Basic information
df.info()

In [None]:
# Check missing values
df.isna().sum()

In [None]:
# Pest count over time for Region_A
region_a = df[df["region"] == "Region_A"]

plt.plot(region_a["date"], region_a["pest_count"], marker="o")
plt.xlabel("Date")
plt.ylabel("Pest Count")
plt.title("Pest Trend Over Time - Region A")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Convert date to datetime
df["date"] = pd.to_datetime(df["date"])

# Sort data by region and date
df = df.sort_values(["region", "date"])

df.head()

In [None]:
# Create lag features for pest count
for lag in range(1, 6):
    df[f"pest_lag_{lag}"] = df.groupby("region")["pest_count"].shift(lag)

df.head(10)

In [None]:
# Missing values due to lag creation
df.isna().sum()

In [None]:
# Drop rows with missing lag values
df_lagged = df.dropna().reset_index(drop=True)

df_lagged.head()

Lag features (1â€“5 weeks) are created to capture delayed effects of pest population dynamics, consistent with biological life-cycle behavior.