In [183]:
import sklearn as skl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import pipeline, metrics
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn import linear_model, ensemble
from sklearn.metrics import make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_validate, ShuffleSplit, GridSearchCV
import numpy as np
import json
from functools import reduce
import os
from sklearn.base import BaseEstimator, TransformerMixin
from breizhcrops.models import TransformerModel
import torch



In [326]:
dataset_path = '/workspaces/BreizhCrops/data/serbia'
fold = 4

folds = {
    1: {'train' : ['data_serbia_01', 'data_serbia_02', 'data_serbia_03'], 'test': ['data_serbia_04']},
    2: {'train' : ['data_serbia_01', 'data_serbia_02', 'data_serbia_04'], 'test': ['data_serbia_03']},
    3: {'train' : ['data_serbia_01', 'data_serbia_03', 'data_serbia_04'], 'test': ['data_serbia_02']},
    4: {'train' : ['data_serbia_02', 'data_serbia_03', 'data_serbia_04'], 'test': ['data_serbia_01']},
}


fold_train = folds[fold]['train']
fold_test= folds[fold]['test']

X_train = []
y_train = []
X_test = []
y_test = []

for dataset_name in fold_train:
    num_instances = len(os.listdir(os.path.join(dataset_path,dataset_name))) - 1
    for i in range(num_instances):
        x = np.load(os.path.join(dataset_path,dataset_name,f"{i}.npy"))
        X_train.append(x)
    y_train += list(np.load(os.path.join(dataset_path,dataset_name,"y.npy")))
    
for dataset_name in fold_test:
    num_instances = len(os.listdir(os.path.join(dataset_path,dataset_name))) - 1
    for i in range(num_instances):
        x = np.load(os.path.join(dataset_path,dataset_name,f"{i}.npy"))
        X_test.append(x)
    y_test += list(np.load(os.path.join(dataset_path,dataset_name,"y.npy")))
    
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test= np.array(y_test)
    

In [328]:
weights = '/workspaces/BreizhCrops/results/france/2/TransformerEncoder/TransformerEncoder_input-dim=38_num-classes=9_d-model=64_d-inner=128_n-layers=5_n-head=2_dropout=0.017998950510888446_learning-rate=0.00017369201853408445_weight-decay=3.5156458637523697e-06.pth'

In [329]:
class TransformerFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, weights):
        self.weights = weights
        self.model = TransformerModel(input_dim=38, num_classes=9, d_model=64, d_inner=128, n_layers=5, n_head=2, dropout=0.017998950510888446)
        self.model.load_state_dict(torch.load(weights, map_location=torch.device('cpu'))['model_state'])
        self.feature_names_in_ = []
        self.n_features_in_ = 0
    def fit(self, X, y=None):
        self.feature_names_in_ = np.array( [f"feature_{i}" for i in range(X.shape[2])], dtype = object)
        self.n_features_in_ = X.shape[2]
        return self
    def transform(self, X, y=None):
        X_transformed = []
        for x in X:
            x = torch.from_numpy(np.array([x])).type(torch.FloatTensor)
            x = self.model.inlinear(x)
            x = self.model.relu(x)
            x = x.transpose(0, 1) # N x T x D -> T x N x D
            x = self.model.transformerencoder(x)
            x = x.transpose(0, 1) # T x N x D -> N x T x D
            x = x.max(1)[0]
            x = self.model.relu(x)
            X_transformed.append(x.reshape(-1).detach().numpy())
        X_transformed = np.array(X_transformed)
        X_t = pd.DataFrame(columns = [f'feature_{i}' for i in range(64)])
        X_t[[f'feature_{i}' for i in range(64)]] = X_transformed
        return X_t 
    
    def get_feature_names_out(self, input_features = None):
        return np.array([f'feature_{i}' for i in range(64)], dtype = object)


In [330]:
transformer = TransformerFeatures(weights)
transformer.fit(X_train)
X_train_t = transformer.transform(X_train)
X_test_t = transformer.transform(X_test)

In [331]:
estimator= ensemble.RandomForestClassifier(bootstrap=True, max_features=int(64/3), min_samples_split = 2, min_samples_leaf=2, oob_score=True, n_jobs=-1, random_state = 0, n_estimators=500,  max_samples = 0.7, class_weight='balanced')

In [332]:
gs_all_metric_results = estimator.fit(X_train_t, y_train)

In [333]:
gs_all_metric_results.score(X_test_t, y_test)

y_pred = gs_all_metric_results.predict(X_test_t)

y_score = gs_all_metric_results.predict_proba(X_test_t)
field_ids = np.array(list(range(len(y_test))))

In [336]:
logdir = f"/workspaces/BreizhCrops/results/serbia/{fold}/TransformerEncoderRF"

print(metrics.classification_report(y_test, y_pred),
          file=open(os.path.join(logdir, "classification_report.txt"), "w"))
np.save(os.path.join(logdir,"y_pred.npy"), y_pred)
np.save(os.path.join(logdir, "y_true.npy"), y_test)
np.save(os.path.join(logdir, "y_score.npy"), y_score)
np.save(os.path.join(logdir, "field_ids.npy"), field_ids)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [337]:
print(f"Test set accuracy: {gs_all_metric_results.score(X_test_t,y_test)}")

Test set accuracy: 0.8290909090909091


In [338]:
model = TransformerModel(input_dim=38, num_classes=9, d_model=64, d_inner=128, n_layers=5, n_head=2, dropout=0.017998950510888446)
model.load_state_dict(torch.load(weights, map_location=torch.device('cpu'))['model_state'])

<All keys matched successfully>

In [340]:
for param in model.parameters():
    param.requires_grad = False

In [341]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0124, -0.1171,  0.0790,  ...,  0.0614,  0.0485,  0.0673],
        [-0.0134, -0.1975, -0.0741,  ..., -0.1090, -0.0673,  0.0851],
        [ 0.0334,  0.0656, -0.1366,  ..., -0.0906,  0.0578,  0.0067],
        ...,
        [-0.1708, -0.0688, -0.0212,  ...,  0.1266, -0.1107,  0.0827],
        [ 0.1776, -0.1023, -0.0364,  ..., -0.1165, -0.0023, -0.0486],
        [-0.1048, -0.0877,  0.0147,  ..., -0.1536,  0.0669,  0.0832]])
Parameter containing:
tensor([ 0.0229, -0.1043,  0.0119, -0.1681, -0.0313,  0.0760,  0.0971, -0.0218,
         0.0164,  0.0329,  0.0855, -0.0355,  0.0537,  0.0866,  0.1069,  0.0937,
         0.0522,  0.0140, -0.0917,  0.0339, -0.0914,  0.0476, -0.1136, -0.0874,
         0.0461,  0.0759, -0.0323,  0.0610,  0.0787,  0.0433,  0.0864, -0.0235,
        -0.0778,  0.0966,  0.1567, -0.0188, -0.0356, -0.0260, -0.0805, -0.0565,
        -0.0173, -0.0359, -0.0624, -0.1068,  0.1289, -0.0092,  0.1243, -0.0008,
         0.0077, -0.1144,  0.0559,  0.0535