<a href="https://colab.research.google.com/github/thomaslu678/gee-test/blob/main/clean/4_create_sine_fit_df" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NOTE: Requires export_clean.csv (cleaned up export CSV with sorts and time columns)

In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from datetime import timedelta
import scipy.stats as stats
import rasterio
from rasterio.transform import from_origin
from rasterio.features import rasterize
import geopandas as gpd
from shapely.geometry import Point
import requests

# Read cleaned up export csv

In [5]:
export_clean_df = pd.read_csv('/content/sample_data/export_clean.csv')

# Build sine df

In [6]:
sine_columns = ["lat", "long", "year", "A", "f", "phi", "C"]

In [7]:
sine_df = pd.DataFrame(columns=sine_columns)

## Populate rows of sine df.
Duration: (15 min/n5), (6 min/n10)

In [8]:
def sine_model(t, A, f, phi, C):
    return A * np.sin(2 * np.pi * f * t + phi) + C

In [27]:
def populate_rows(rows, min_count):
    runtime_error_count = 0
    coordinate_count = 0
    for (lat, lon), coord_df in export_clean_df.groupby(['lat', 'long']):
        # Uncomment and use for progress
        if (coordinate_count % 100 == 0):
            print(coordinate_count)
        for year, year_df in coord_df.groupby('year'):
            # need enough points to fit a sine, 5 points chosen arbitrarily
            if len(year_df) < min_count:
                continue

            test_dates = year_df['time'].to_numpy()
            test_dates = np.array(test_dates, dtype="datetime64[s]")
            test_values = year_df['LST_K'].to_numpy()

            # convert time to days since first observation
            t0 = test_dates[0]
            t = np.array(
                [((d - t0) / np.timedelta64(1, 's')) / 86400 for d in test_dates]
            )

            # initial guesses
            A_guess = (test_values.max() - test_values.min()) / 2
            C_guess = test_values.mean()
            f_guess = 1 / 365
            phi_guess = 0
            p0 = [A_guess, f_guess, phi_guess, C_guess]

            try:
                params, cov = curve_fit(
                    sine_model,
                    t,
                    test_values,
                    p0=p0,
                    maxfev=10000
                )

                A, f, phi, C = params

                rows.append({
                    "lat": lat,
                    "long": lon,
                    "year": year,
                    "A": A,
                    "f": f,
                    "phi": phi,
                    "C": C
                })

            except RuntimeError:
                # fit failed — skip this year
                runtime_error_count += 1
                continue
        coordinate_count += 1

In [29]:
rows = []  # collect rows first (much faster than appending to df)
min_count = 5
populate_rows(rows, min_count)
sine_df = pd.DataFrame(rows)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600


In [30]:
sine_df

Unnamed: 0,lat,long,year,A,f,phi,C
0,37.565193,126.987004,2000,32885.355593,0.000046,1.524055,-32572.872374
1,37.565193,126.987004,2001,65698.941448,0.000032,1.539390,-65391.174033
2,37.565193,126.987004,2002,20.084628,0.002772,-1.631377,298.591862
3,37.565193,126.987004,2003,22836.352706,0.000060,1.524318,-22523.744092
4,37.565193,126.987004,2004,20.242575,0.002671,-1.691777,299.666486
...,...,...,...,...,...,...,...
131074,37.574882,127.041636,2021,24.414483,0.002203,5.248312,289.834540
131075,37.574882,127.041636,2022,37.725811,0.001623,-0.284917,275.273167
131076,37.574882,127.041636,2023,21.067508,0.002452,-1.030775,292.311670
131077,37.574882,127.041636,2024,22.473501,0.002433,-1.175560,295.178531


In [32]:
sine_df.to_csv(f"/content/sample_data/sine_years_n{min_count}.csv", index=False)