In [None]:
import datetime
import geopandas
import numpy as np
import pandas as pd
import copy

#from sklearn.datasets import fetch_mldata
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

 # to enable ipympl interactive interface for plots
%matplotlib widget

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
import time
import matplotlib
from distinctipy import distinctipy # generate N distinct colors


In [None]:
BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12']
NUMERIC_COLS = BANDS + ['NDVI'] #[ 'NDVI_diff'] 

In [None]:
DF = geopandas.read_file('../data/merged_samples_gdf.geojson')
DF.rename(columns = {'is_within_period':'har_evnt'}, inplace = True)
DF.shape

In [None]:
NUM_SAMPLES = len(np.unique(DF.sample_idx))
DF.head()

In [None]:
indices = (DF.loc[(DF.har_evnt == True)]).point_idx
points_with_change_df = DF.loc[(DF.point_idx).isin(indices)]
points_with_change_df.shape # 366 * NUM_SAMPLES rows
df = points_with_change_df

In [None]:
stretch_numeric_df = pd.DataFrame({'value':np.array([]), 'class':np.array([]), 'har_evnt': np.array([])})
for col_name in NUMERIC_COLS:
    curr_df = pd.DataFrame({'value': df[col_name], 'class':np.tile(np.array([col_name]), df.shape[0]), 'har_evnt':(df['har_evnt'] == True)})

    stretch_numeric_df = pd.concat([stretch_numeric_df, curr_df])
stretch_numeric_df

In [None]:
fig = plt.figure(figsize=(16,10))

sns.boxplot(data=stretch_numeric_df, x="value", y="class", hue="har_evnt")

In [None]:
pca = PCA(n_components=3)
numeric_col_values = (df.loc[:, NUMERIC_COLS]).copy().values
pca_result = pca.fit_transform(numeric_col_values)

df['pca_one'] = pca_result[:,0]
df['pca_two'] = pca_result[:,1] 
df['pca_three'] = pca_result[:,2]

print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

In [None]:
fig = plt.figure(figsize=(16,10))
sns.scatterplot(
    x="pca_one", y="pca_two",
    hue= 'sample_idx',
    palette=sns.color_palette("brg", NUM_SAMPLES),
    data=df,
    alpha=0.4,
    legend="full"
)
plt.show()

In [None]:
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(projection='3d')

p = ax.scatter(
    xs=df["pca_one"], 
    ys=df["pca_two"], 
    zs=df["pca_three"], 
    color= df['har_evnt'].apply(lambda x: "red" if(x) else "blue"),
    alpha=0.5
    )

ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')
plt.show()

In [None]:
colors = distinctipy.get_colors(NUM_SAMPLES)

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(projection='3d')

p = ax.scatter(
    xs=df["pca_one"], 
    ys=df["pca_two"], 
    zs=df["pca_three"], 
    color= df['sample_idx'].apply(lambda x: colors[int(x[1:])]),
    alpha=0.5,
    )

ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')

plt.show()

In [None]:

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(projection='3d')

p = ax.scatter(
    xs=df["pca_one"], 
    ys=df["pca_two"], 
    zs=df["pca_three"], 
    c=df["NDVI"], 
    cmap='brg'
    )

ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')

fig.colorbar(p, ax=ax)
plt.show()

In [None]:
time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(numeric_col_values)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

df['tsne_2d_one'] = tsne_results[:,0]
df['tsne_2d_two'] = tsne_results[:,1]

In [None]:
fig = plt.figure(figsize=(16,10))

sns.scatterplot(
    x="tsne_2d_one", y="tsne_2d_two",
    hue="sample_idx",
    palette=sns.color_palette("hls", NUM_SAMPLES),
    data=df,
    legend="full",
    alpha=0.5
)

fig = plt.figure(figsize=(16,10))
sns.scatterplot(
    x="tsne_2d_one", y="tsne_2d_two",
    hue="har_evnt",
    palette=sns.color_palette("hls", 2),
    data=df,
    legend="full",
    alpha=0.5
)

In [None]:
time_start = time.time()
tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(numeric_col_values.copy())

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

df['tsne_3d_one'] = tsne_results[:,0]
df['tsne_3d_two'] = tsne_results[:,1]
df['tsne_3d_three'] = tsne_results[:,2]

In [None]:
fig = plt.figure()
ax = fig.add_subplot(projection='3d')

p = ax.scatter(
    xs=df["tsne_3d_one"], 
    ys=df["tsne_3d_two"], 
    zs=df["tsne_3d_three"], 
    c=df["NDVI"], 
    cmap='brg'
    )

ax.set_xlabel('tsne_3d_one')
ax.set_ylabel('tsne_3d_two')
ax.set_zlabel('tsne_3d_three')

fig.colorbar(p, ax=ax)
plt.show()



fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(projection='3d')

p = ax.scatter(
    xs=df["tsne_3d_one"], 
    ys=df["tsne_3d_two"], 
    zs=df["tsne_3d_three"], 
    c=df["har_evnt"],
    cmap='Paired'
    )

ax.set_xlabel('tsne_3d_one')
ax.set_ylabel('tsne_3d_two')
ax.set_zlabel('tsne_3d_three')

fig.colorbar(p, ax=ax)
plt.show()

In [None]:
x = np.empty(NUM_SAMPLES)
y = np.zeros(NUM_SAMPLES)

for sample_idx in range(NUM_SAMPLES):
    curr_df = df.loc[df['sample_idx'] == 's'+sample_idx]
    x = curr_df['']

In [None]:

title = "finHarvDat By Sample's Date Range"
my_bins = pd.date_range(start=min(df.start_date), end=max(df.end_date),freq='3W')
sns.displot(data=curr_df,x="finHarvDat", bins=matplotlib.dates.date2num(my_bins)).set(title=title)


In [None]:
sns.displot(data=curr_df,x="finHarvDat", bins=40)