<a href="https://colab.research.google.com/github/pablex72/time-series-crop/blob/main/spline_interpolation_crop_only_fill_gaps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline

df = pd.read_csv('/content/s2-r2-2019-2022-correct-2.csv')

# Convert the "Date" column to datetime
df["Date"] = pd.to_datetime(df["Date"])

# Generate a regular date range with the desired interval
start_date = df["Date"].min()
end_date = df["Date"].max()
date_range = pd.date_range(start=start_date, end=end_date, freq="5D")

# Reindex the DataFrame with the new date range
df = df.set_index("Date")
df = df.reindex(date_range)

# Replace missing values with NaN
df["NDVI"] = df["NDVI"].replace(0, np.nan)


# Interpolate missing values using UnivariateSpline for different values of s
x = np.arange(len(df))
mask = ~np.isnan(df["NDVI"])
s_values = [0.35]
interpolated_values = []

for s in s_values:
    spline = UnivariateSpline(x[mask], df["NDVI"][mask], k=3, s=s)
    interpolated_values.append(spline(x))

# Reset the index and rename the columns
df = df.reset_index()
df = df.rename(columns={"index": "Date"})

# Plot the original and interpolated datasets for different values of s
plt.figure(figsize=(12, 6))



# Plot interpolated datasets for different values of s
for i, s in enumerate(s_values):
    plt.plot(df["Date"], interpolated_values[i], "o-", label=f"s={s}")
# Plot original data
plt.plot(df["Date"], df["NDVI"], "x-", label="Original",color ="yellow")
plt.xlabel("Date")
plt.ylabel("NDVI")
plt.title("Original and Interpolated NDVI Dataset")
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
