In [1]:
# Copyright (c) Microsoft Corporation. All rights reserved
# Licensed under the MIT License.
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")

import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import StandardScaler

from temporal_cluster_matching import utils, DataInterface, algorithms

## NAIP / Poultry barn data

In [2]:
geoms = utils.get_poultry_barn_geoms("../data/")
len(geoms)

6013

In [3]:
dataloader = DataInterface.NAIPDataLoader()

In [None]:
all_distances = []
for i, geom in enumerate(geoms):
    if i % 100 == 0:
        print(i, len(geoms))
    data_images, masks, years = dataloader.get_data_stack_from_geom(geom, buffer=0)
    
    previous_year_footprint_color = None
    distances = [None]
    for image, mask in zip(data_images, masks):
        
        average_footprint_color = image[mask==1].mean(axis=0)
        if previous_year_footprint_color is not None:
            distances.append(np.linalg.norm(
                previous_year_footprint_color - average_footprint_color
            ))
        previous_year_footprint_color = average_footprint_color
        
    all_distances.append(distances)
    
## One of the geoms only intersects with 4 years
for i, distances in enumerate(all_distances):
    if len(distances) == 3:
        all_distances[i] = [0] + distances
        
all_distances = np.array(all_distances)
np.save("../results/poultry_barn_inter_year_color_distances.npy", all_distances)

In [5]:
all_distances = np.load("../results/poultry_barn_inter_year_color_distances.npy")

In [6]:
labeled_idxs, labeled_years = utils.get_poultry_barn_labels("../data/")

In [7]:
x_all = []
y_all = []

for idx, year in zip(labeled_idxs, labeled_years):
    x_all.append(all_distances[idx])
    y_all.append(year)
    
x_all = np.array(x_all)
y_all = np.array(y_all)

In [8]:
accs = []
maes = []
for repeat in range(50):
    
    x_train, x_test, y_train, y_test = train_test_split(
        x_all, y_all, test_size=0.2
    )
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    
    model = LogisticRegression()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    y_pred = np.round(y_pred).astype(int)
    
    acc = accuracy_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    accs.append(acc)
    maes.append(mae)
    
print(np.mean(accs), np.std(accs))
print(np.mean(maes), np.std(maes))

0.9075 0.017557049866079442
0.39289999999999997 0.08057971208685223


## S2 / Solar farm data

In [9]:
geoms = utils.get_solar_farm_geoms("../data/")
len(geoms)

935

In [10]:
dataloader = DataInterface.S2DataLoader()

In [None]:
all_distances = []
for i, geom in enumerate(geoms):
    if i % 10 == 0:
        print(i, len(geoms))
        
    data_images, masks, years = dataloader.get_data_stack_from_geom(geom, buffer=0.004)
    
    previous_year_footprint_color = None
    distances = []
    for image, mask in zip(data_images, masks):
        image = image[:,:,:12]
        if image.shape[0] == mask.shape[0] and image.shape[1] == mask.shape[1]:        
            average_footprint_color = image[mask==1].mean(axis=0)
            if previous_year_footprint_color is not None:
                distances.append(np.linalg.norm(
                    previous_year_footprint_color - average_footprint_color
                ))
            previous_year_footprint_color = average_footprint_color
        else:
            distances = [0,0,0,0]
            break
            
    all_distances.append(distances)
    
all_distances = np.array(all_distances)
np.save("../results/solar_farm_inter_year_color_distances.npy", all_distances)

In [11]:
all_distances = np.load("../results/solar_farm_inter_year_color_distances.npy")

In [13]:
labeled_idxs, labeled_years = utils.get_solar_farm_labels("../data/")

In [18]:
x_all = []
y_all = []

for idx, year in zip(labeled_idxs, labeled_years):
    if year != -1:
        x_all.append(all_distances[idx])
        y_all.append(year)
    
x_all = np.array(x_all)
y_all = np.array(y_all)

In [19]:
mask = y_all != -1
x_all = x_all[mask]
y_all = y_all[mask]

x_all[np.isnan(x_all)] = 0

In [22]:
accs = []
maes = []
for repeat in range(50):
    
    x_train, x_test, y_train, y_test = train_test_split(
        x_all, y_all, test_size=0.8
    )
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    
    model = LogisticRegression()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    y_pred = np.round(y_pred).astype(int)
    
    mask = y_test != -1
    acc = accuracy_score(y_test[mask], y_pred[mask])
    mae = mean_absolute_error(y_test[mask], y_pred[mask])
    
    accs.append(acc)
    maes.append(mae)
    
print(np.mean(accs), np.std(accs))
print(np.mean(maes), np.std(maes))

0.7907565789473685 0.01304345795255656
0.28638157894736843 0.020847686652987744
