In [None]:
pip install rasterio

In [None]:
pip install rioxarray

In [None]:
pip install xarray-spatial

In [None]:
pip install pystac-client

In [None]:
pip install planetary-computer

In [None]:
pip install odc-stac

In [None]:
pip install odc-algo

In [34]:
# Supress Warnings 
import warnings
warnings.filterwarnings('ignore')

# Import common GIS tools
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import rasterio.features
import rioxarray as rio
import xrspatial.multispectral as ms

# Import Planetary Computer tools
import pystac_client
import planetary_computer as pc
pc.settings.set_subscription_key('99d0028b4d864147958f0b6b44663e5f')
#import odc
from odc.stac import stac_load
from odc.algo import to_rgba
from tqdm import tqdm

# For finetuning ResNet-18
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import models
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

import time

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
crop_presence_data = pd.read_csv("/content/drive/MyDrive/Crop_Location_Data_20221201.csv")

In [11]:
crop_presence_data["Latitude"] = crop_presence_data["Latitude and Longitude"].apply(lambda x: float(x[1:-1].split(",")[0]))
crop_presence_data["Longitude"] = crop_presence_data["Latitude and Longitude"].apply(lambda x: float(x[1:-1].split(",")[1]))

In [12]:
crop_presence_data["Class"] = crop_presence_data["Class of Land"].apply(lambda x: 1 if x == "Rice" else 0)

In [13]:
crop_presence_data.head()

Unnamed: 0,Latitude and Longitude,Class of Land,Latitude,Longitude,Class
0,"(10.323727047081501, 105.2516346045924)",Rice,10.323727,105.251635,1
1,"(10.322364360592521, 105.27843410554115)",Rice,10.322364,105.278434,1
2,"(10.321455902933202, 105.25254306225168)",Rice,10.321456,105.252543,1
3,"(10.324181275911162, 105.25118037576274)",Rice,10.324181,105.25118,1
4,"(10.324635504740822, 105.27389181724476)",Rice,10.324636,105.273892,1


In [14]:
box_size_deg = 0.10
resolution = 20  # meters per pixel 
scale = resolution / 111320.0 # degrees per pixel for CRS:4326 
stac = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

In [15]:
def get_data_latlong(lat, long, time_window = "2020-03-20/2020-03-21"):
    min_lon = long-box_size_deg/2
    min_lat = lat-box_size_deg/2
    max_lon = long+box_size_deg/2
    max_lat = lat+box_size_deg/2
    bounds = (min_lon, min_lat, max_lon, max_lat)
    search = stac.search(collections=["sentinel-2-l2a"], bbox=bounds, datetime=time_window)
    items = list(search.get_all_items())
    xx = stac_load(
        items,
        bands=["red", "green", "blue", "nir", "SCL"],
        crs="EPSG:4326", # Latitude-Longitude
        resolution=scale, # Degrees
        chunks={"x": 2048, "y": 2048},
        dtype="uint16",
        patch_url=pc.sign,
        bbox=bounds
    )
    return xx

In [16]:
lat_arr = crop_presence_data["Latitude"]
long_arr = crop_presence_data["Longitude"]

In [27]:
sample_lst = [0] * 20 
stac = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
for inx in tqdm(range(20), desc = "Running"):
    xx = get_data_latlong(lat_arr[inx], long_arr[inx])
    sample_lst[inx] = xx

Running: 100%|██████████| 20/20 [00:05<00:00,  3.71it/s]


In [28]:
sample = [0] * 20
for inx in tqdm(range(20), desc = "Running"):
    temp = sample_lst[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    sample[inx] = temp.data.compute()

Running: 100%|██████████| 20/20 [00:23<00:00,  1.19s/it]


In [29]:
for inx in range(20):
  sample[inx] = np.resize(sample[inx], (3, 557, 557))
# Representing sample input

In [30]:
transformations = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [31]:
# Preprocessing data
for i in range(20):
  sample[i] = transformations(sample[i].reshape(557, 557, 3).astype('uint8'))

In [35]:
model = models.resnet18(pretrained=True)   #load resnet18 model
num_features = model.fc.in_features     #extract fc layers features
model.fc = nn.Linear(num_features, 2) #(num_of_class == 2)
model = model.to(device) 
criterion = nn.CrossEntropyLoss()  #(set loss function)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [134]:
sample_train = torch.stack(sample[0:16])
sample_test = torch.stack(sample[16:])

In [135]:
train_label = torch.tensor(np.array(crop_presence_data["Class"].iloc[0:16]))
test_label = torch.tensor(np.array(crop_presence_data["Class"].iloc[16:20]))

In [136]:
sample_train = torch.FloatTensor(sample_train)
train_label = torch.LongTensor(train_label)
sample_test = torch.FloatTensor(sample_test)
test_label = torch.LongTensor(test_label)

In [139]:
train_dataset = TensorDataset(sample_train, train_label)
test_dataset = TensorDataset(sample_test, test_label)

In [149]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=8)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=8)

In [None]:
num_epochs = 60   #(set no of epochs)
start_time = time.time() #(for showing time)
for epoch in range(num_epochs): #(loop for every epoch)
    print("Epoch {} running".format(epoch)) #(printing message)
    """ Training Phase """
    model.train()    #(training model)
    running_loss = 0.   #(set loss 0)
    running_corrects = 0 
    # load a batch data of images
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device) 
        # forward inputs and get output
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        # get loss value and update the network weights
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(sample_train)
    epoch_acc = running_corrects / len(sample_train) * 100.
    print('[Train #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() -start_time))
    
    """ Testing Phase """
    model.eval()
    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / len(sample_test)
        epoch_acc = running_corrects / len(sample_test) * 100.
        print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time()- start_time))

In [17]:
xx_lst_1 = [0]*len(crop_presence_data)
stac = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
for inx in tqdm(range(len(crop_presence_data)), desc = "Running"):
    xx = get_data_latlong(lat_arr[inx], long_arr[inx])
    xx_lst_1[inx] = xx

Running: 100%|██████████| 600/600 [02:47<00:00,  3.58it/s]


In [18]:
xx_lst_np_11 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_1[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    xx_lst_np_11[inx] = temp.data.compute()

Running: 100%|██████████| 200/200 [02:51<00:00,  1.17it/s]


In [19]:
xx_lst_np_12 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_1[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    xx_lst_np_12[inx] = temp.data.compute()

Running: 100%|██████████| 200/200 [03:01<00:00,  1.10it/s]


In [21]:
xx_lst_np_13 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_1[400+inx].isel(time=0)[["red", "green", "blue"]].to_array()
    xx_lst_np_13[inx] = temp.data.compute()

Running: 100%|██████████| 200/200 [04:18<00:00,  1.29s/it]


In [22]:
for inx in range(200):
    xx_lst_np_11[inx] = np.resize(xx_lst_np_11[inx], (3, 557, 557))
    xx_lst_np_12[inx] = np.resize(xx_lst_np_12[inx], (3, 557, 557))
    xx_lst_np_13[inx] = np.resize(xx_lst_np_13[inx], (3, 557, 557))

Code for Resnet-18

In [37]:
data = xx_lst_np_11 + xx_lst_np_12 + xx_lst_np_13

In [38]:
transformations = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [40]:
for i in range(600):
  data[i] = transformations(data[i].reshape(557,557,3).astype('uint8'))

In [41]:
model = models.resnet18(pretrained=True)   #load resnet18 model
num_features = model.fc.in_features     #extract fc layers features
model.fc = nn.Linear(num_features, 2) #(num_of_class == 2)
model = model.to(device) 
criterion = nn.CrossEntropyLoss()  #(set loss function)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [44]:
import random
# get sample index
num_index = 600
random_sample_80 = random.sample(range(num_index), int(num_index * 0.8))
train_arr = []
test_arr = []
train_label = []
test_label = []
for i in range(num_index):
  if i in random_sample_80:
    train_arr.append(data[i])
    train_label.append(crop_presence_data["Class"].iloc[i])
  else:
    test_arr.append(data[i])
    test_label.append(crop_presence_data["Class"].iloc[i])

In [45]:
train_arr = torch.stack(train_arr)
test_arr = torch.stack(test_arr)

In [46]:
train_label = torch.tensor(np.array(train_label))
test_label = torch.tensor(np.array(test_label))

In [47]:
train_arr = torch.FloatTensor(train_arr)
train_label = torch.LongTensor(train_label)
test_arr = torch.FloatTensor(test_arr)
test_label = torch.LongTensor(test_label)

In [50]:
train_dataset = TensorDataset(train_arr, train_label)
test_dataset = TensorDataset(test_arr, test_label)

In [51]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=20, shuffle=True, num_workers=8)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=8)

In [None]:
num_epochs = 60   #(set no of epochs)
start_time = time.time() #(for showing time)
for epoch in range(num_epochs): #(loop for every epoch)
    print("Epoch {} running".format(epoch)) #(printing message)
    """ Training Phase """
    model.train()    #(training model)
    running_loss = 0.   #(set loss 0)
    running_corrects = 0 
    # load a batch data of images
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device) 
        # forward inputs and get output
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        # get loss value and update the network weights
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects / len(train_dataset) * 100.
    print('[Train #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() -start_time))
    
    """ Testing Phase """
    model.eval()
    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / len(test_dataset)
        epoch_acc = running_corrects / len(test_dataset) * 100.
        print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time()- start_time))

In [55]:
save_path = '/content/drive/MyDrive/resnet18-eychallenge.pth'
torch.save(model.state_dict(), save_path)

In [None]:
model = models.resnet18(pretrained=True)   #load resnet18 model
num_features = model.fc.in_features #extract fc layers features
model.fc = nn.Linear(num_features, 2)#(num_of_class == 2)
model.load_state_dict(torch.load('/content/drive/MyDrive/resnet18-eychallenge.pth'))
model.to(device)

In [57]:
model.eval()
start_time = time.time()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
with torch.no_grad():
    running_loss = 0.
    running_corrects = 0
    for i, (inputs, labels) in enumerate(test_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(test_dataset)
    epoch_acc = running_corrects / len(test_dataset) * 100.
    print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.
          format(epoch, epoch_loss, epoch_acc, time.time() - start_time))

[Test #59] Loss: 1.0771 Acc: 69.1667% Time: 2.1985s


Code for Decision Tree and Logistic Regression

In [None]:
# Now convert to 2D array to use
for inx in range(200):
    xx_lst_np_11[inx] = xx_lst_np_11[inx].reshape(3*557*557)
    xx_lst_np_12[inx] = xx_lst_np_12[inx].reshape(3*557*557)
    xx_lst_np_13[inx] = xx_lst_np_13[inx].reshape(3*557*557)

In [None]:
x = np.concatenate([np.array(xx_lst_np_11), np.array(xx_lst_np_12), np.array(xx_lst_np_13)], axis = 0)
# np.array(xx_lst_np_21), np.array(xx_lst_np_22), np.array(xx_lst_np_23)

In [None]:
from sklearn.preprocessing import MinMaxScaler

Scaler = MinMaxScaler()
x = Scaler.fit_transform(x)

In [None]:
y = np.concatenate([np.array(crop_presence_data["Class"])])

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 101)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
LR = LogisticRegression()

In [None]:
LR.fit(x_train, y_train)

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier()

In [None]:
clf.fit(x_train, y_train)

In [None]:
pred = clf.predict(x_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.64      0.78      0.70       166
           1       0.77      0.62      0.69       194

    accuracy                           0.69       360
   macro avg       0.70      0.70      0.69       360
weighted avg       0.71      0.69      0.69       360



In [58]:
test_data = pd.read_csv("/content/drive/MyDrive/challenge_1_submission_template_correct_columns_fixed.csv")

In [59]:
test_data.head()

Unnamed: 0,id,target
0,"(10.18019073690894, 105.32022315786804)",
1,"(10.561107033461816, 105.12772097986661)",
2,"(10.623790611954897, 105.13771401411867)",
3,"(10.583364246115156, 105.23946127195805)",
4,"(10.20744446668854, 105.26844107128906)",


In [60]:
test_data["Latitude"] = test_data["id"].apply(lambda x: float(x[1:-1].split(",")[0]))
test_data["Longitude"] = test_data["id"].apply(lambda x: float(x[1:-1].split(",")[1]))

In [61]:
test_data.head()

Unnamed: 0,id,target,Latitude,Longitude
0,"(10.18019073690894, 105.32022315786804)",,10.180191,105.320223
1,"(10.561107033461816, 105.12772097986661)",,10.561107,105.127721
2,"(10.623790611954897, 105.13771401411867)",,10.623791,105.137714
3,"(10.583364246115156, 105.23946127195805)",,10.583364,105.239461
4,"(10.20744446668854, 105.26844107128906)",,10.207444,105.268441


In [62]:
test_lst = [0]*250
lat_arr_test = test_data["Latitude"]
long_arr_test = test_data["Longitude"]
for inx in tqdm(range(250), desc="Running"):
    xx = get_data_latlong(lat_arr_test[inx], long_arr_test[inx], "2022-03-20/2022-03-21")
    test_lst[inx] = xx

Running: 100%|██████████| 250/250 [01:09<00:00,  3.59it/s]


In [63]:
test_lst_np_1 = [0] * 125
for inx in tqdm(range(125), desc="Running"):
    temp = test_lst[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    test_lst_np_1[inx] = temp.data.compute()

Running: 100%|██████████| 125/125 [02:12<00:00,  1.06s/it]


In [64]:
test_lst_np_2 = [0] * 125
for inx in tqdm(range(125), desc="Running"):
    temp = test_lst[125+inx].isel(time=0)[["red", "green", "blue"]].to_array()
    test_lst_np_2[inx] = temp.data.compute()

Running: 100%|██████████| 125/125 [01:57<00:00,  1.06it/s]


In [65]:
for inx in range(125):
    test_lst_np_1[inx] = np.resize(test_lst_np_1[inx], (3, 557, 557))
    test_lst_np_2[inx] = np.resize(test_lst_np_2[inx], (3, 557, 557))

Test for Resnet-18

In [78]:
my_test_data = test_lst_np_1 + test_lst_np_2

In [80]:
transformations = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [81]:
for i in range(250):
  my_test_data[i] = transformations(my_test_data[i].reshape(557,557,3).astype('uint8'))

In [82]:
my_test_arr = torch.stack(my_test_data)
my_test_arr = torch.FloatTensor(my_test_arr)

In [91]:
my_dummy_label = [0] * 250
my_dummy_label = torch.LongTensor(np.array(my_dummy_label))

In [92]:
my_test_dataset = TensorDataset(my_test_arr, my_dummy_label)

In [93]:
my_test_dataloader = torch.utils.data.DataLoader(my_test_dataset, batch_size=20, shuffle=False, num_workers=8)

In [94]:
# Loop over the DataLoader and pass the data through the model
predictions = []
with torch.no_grad():
    for i, (inputs, labels) in enumerate(my_test_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        predictions += preds.tolist()

# Save the predictions to a file
df = pd.DataFrame({'target': predictions})

In [95]:
df

Unnamed: 0,target
0,1
1,0
2,1
3,1
4,1
...,...
245,0
246,1
247,1
248,1


In [97]:
test_data["target"] = df["target"]

Test for DT and LR

In [None]:
for inx in range(125):
    test_lst_np_1[inx] = np.resize(test_lst_np_1[inx], (3, 557, 557)).reshape(3*557*557)
    test_lst_np_2[inx] = np.resize(test_lst_np_2[inx], (3, 557, 557)).reshape(3*557*557)

In [None]:
my_test = np.concatenate([np.array(test_lst_np_1), np.array(test_lst_np_2)], axis = 0)

In [None]:
pred_test = clf.predict(my_test)

In [None]:
len(pred_test)

In [None]:
test_data["target"] = pd.DataFrame({"target": pred_test})

In [None]:
test_data.head()

In [98]:
export = test_data[["id", "target"]]

In [99]:
export["target"] = export["target"].apply(lambda x: "Rice" if 1 else "Non Rice")

In [100]:
export.to_csv("/content/drive/MyDrive/Test_result_colab.csv", index = False)

In [None]:
"""
Code for getting data for another date
xx_lst_2 = [0]*len(crop_presence_data)
stac = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
for inx in tqdm(range(len(crop_presence_data)), desc = "Running"):
    xx = get_data_latlong(lat_arr[inx], long_arr[inx], "2021-03-20/2021-03-21")
    xx_lst_2[inx] = xx
xx_lst_np_21 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_2[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    xx_lst_np_21[inx] = temp.data.compute()
xx_lst_np_22 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_2[inx].isel(time=0)[["red", "green", "blue"]].to_array()
    xx_lst_np_22[inx] = temp.data.compute()
xx_lst_np_23 = [0] * 200
for inx in tqdm(range(200), desc = "Running"):
    temp = xx_lst_2[inx].isel(time=0)[["red", "green", "blue"]].to_array()
xx_lst_np_23[inx] = temp.data.compute()xx_lst_np_21[inx] = np.resize(xx_lst_np_21[inx], (3, 557, 557)).reshape(3*557*557)
xx_lst_np_22[inx] = np.resize(xx_lst_np_22[inx], (3, 557, 557)).reshape(3*557*557)
xx_lst_np_23[inx] = np.resize(xx_lst_np_23[inx], (3, 557, 557)).reshape(3*557*557)
"""

Running: 100%|██████████| 600/600 [02:18<00:00,  4.32it/s]
