# Goal

Our goal in this notebook is to take a time series and fill the gap. We will do this in 3 ways.
   
   1. Let $g_1$, $g_2$, $\cdots$, $g_5$ be the 5 gaps. $T_0$ and $T_6$ be the two data points before and after $g_1$ and $g_5$. A linear line is drawn from $T_0$ to $T_6$ and then those gaps follow that line.
   2. figure out what is time series linear regression and implement it. or, perhaps it is already.

In [1]:
import csv
import numpy as np
import pandas as pd
# import geopandas as gpd
from IPython.display import Image
# from shapely.geometry import Point, Polygon
from math import factorial
import scipy
import scipy.signal
import os, os.path

from datetime import date
import datetime
import time

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.linear_model import LinearRegression
from patsy import cr

# from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sb


import sys

sys.path.append('/Users/hn/Documents/00_GitHub/Ag/remote_sensing/python/')
import remote_sensing_core as rc
import remote_sensing_core as rcp


In [2]:
data_dir =  "/Users/hn/Documents/01_research_data/remote_sensing/test_data/"

In [101]:
f_name = "a_regularized_TS.csv"
a_regularized_TS = pd.read_csv(data_dir + f_name, low_memory=False)
a_regularized_TS.sort_values(by=['image_year', 'doy'], inplace=True)

# Parameters

In [102]:
indeks = "EVI"
SF_year = 2017

# Linear Line

In [103]:
a_regularized_TS.head(2)

Unnamed: 0,ID,Acres,county,CropGrp,CropTyp,DataSrc,ExctAcr,IntlSrD,Irrigtn,LstSrvD,Notes,RtCrpTy,Shap_Ar,Shp_Lng,TRS,image_year,SF_year,doy,EVI
0,115184_WSDA_SF_2017,72.0,Grant,Orchard,apple,wsda,72.371199,2006/08/31,micro-sprinkler,2017/04/24,,,292875.850894,3308.620365,T13R24E4,2016,2017,274,0.417561
1,115184_WSDA_SF_2017,72.0,Grant,Orchard,apple,wsda,72.371199,2006/08/31,micro-sprinkler,2017/04/24,,,292875.850894,3308.620365,T13R24E4,2016,2017,284,-2.0


In [104]:
x_axis = rc.extract_XValues_of_RegularizedTS_3Yrs(regularized_TS = a_regularized_TS, \
                                                  SF_yr = SF_year)
TS_array = a_regularized_TS[indeks].copy().values
TS_array

array([ 0.4175614 , -2.        ,  0.74173785,  0.72328165,  0.24971688,
       -2.        ,  0.45789181, -2.        ,  0.37367702, -2.        ,
       -2.        , -2.        , -2.        , -2.        ,  0.24103474,
        0.22458873, -2.        , -2.        ,  0.24738865,  0.41210467,
        0.49268638,  0.58170796,  0.54144885,  0.62809484,  0.75182253,
        0.72722519,  0.8272405 ,  0.88276165,  0.86827636,  0.73957461,
        0.66953682,  0.72046171,  0.63967863,  0.64954225,  0.69626907,
        0.72598694,  0.72678947, -2.        ,  0.73320505,  0.7036713 ,
        0.70267974,  0.48580156,  0.54348966, -2.        ,  0.4343211 ,
        0.36825969, -2.        ,  0.32663178,  0.21325563,  0.30671939,
       -2.        ,  0.30430575,  0.3123942 ,  0.26554537])

In [33]:
TS_array[0] = -2
TS_array[51] = -2
TS_array[52] = -2
TS_array[53] = -2
TS_array.shape

(54,)

In [105]:
missing_indicies = np.where(TS_array == -2)[0]
Notmissing_indicies = np.where(TS_array != -2)[0]
print(missing_indicies)
print(Notmissing_indicies)

[ 1  5  7  9 10 11 12 13 16 17 37 43 46 50]
[ 0  2  3  4  6  8 14 15 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 34 35 36 38 39 40 41 42 44 45 47 48 49 51 52 53]


In [107]:
missing_indicies = np.where(TS_array == -2)[0]
Notmissing_indicies = np.where(TS_array != -2)[0]

#
#    Check if the first or last k values are missing
#    if so, replace them with proper number and shorten the task
#
left_pointer = Notmissing_indicies[0]
right_pointer = Notmissing_indicies[-1]

if left_pointer > 0:
    TS_array[:left_pointer] = TS_array[left_pointer]

if right_pointer < (len(TS_array) - 1):
    TS_array[right_pointer:] = TS_array[right_pointer]
#    
# update indexes.
#
missing_indicies = np.where(TS_array == -2)[0]
Notmissing_indicies = np.where(TS_array != -2)[0]

# left_pointer = Notmissing_indicies[0]
stop = right_pointer
right_pointer = left_pointer + 1


In [109]:
missing_indicies = np.where(TS_array == -2)[0]

while len(missing_indicies) > 0:
    left_pointer = missing_indicies[0] - 1
    left_value = TS_array[left_pointer]
    
    right_pointer = missing_indicies[0]
    
    while TS_array[right_pointer] == -2:
        right_pointer += 1
    
    right_value = TS_array[right_pointer]
    
    if (right_pointer - left_pointer) == 2:
        # if there is a single gap, then we have just average of the
        # values
        # Avoid extra computation!
        #
        TS_array[left_pointer + 1] = 0.5 * (TS_array[left_pointer] + TS_array[right_pointer])
    else:
        # form y= ax + b
        slope = (right_value - left_value) / (x_axis[right_pointer] - x_axis[left_pointer]) # a
        b = right_value - (slope * x_axis[right_pointer])
        TS_array[left_pointer+1 : right_pointer] = slope * x_axis[left_pointer+1 : right_pointer] + b
        missing_indicies = np.where(TS_array == -2)[0]
        
    
a_regularized_TS[indeks] = TS_array

# Time Series Linear Regression