In [1]:
from google.colab import userdata
username = userdata.get('KAGGLE_USER')
key = userdata.get('KAGGLE_KEY')
# Echo the credentials into the kaggle.json file
!mkdir -p ~/.kaggle
!echo '{{"username":"{username}","key":"{key}"}}' > ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

In [2]:
!kaggle competitions download -c forest-type-classification-spai
!unzip forest-type-classification-spai

Downloading forest-type-classification-spai.zip to /content
  0% 0.00/463k [00:00<?, ?B/s]
100% 463k/463k [00:00<00:00, 37.0MB/s]
Archive:  forest-type-classification-spai.zip
  inflating: metaData.csv            
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


# NN

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim
import joblib

In [4]:
data = pd.read_csv('train.csv')

In [5]:
data = data.drop(columns=['id'])

In [6]:
X = data.drop(columns=['nforest_type'])
y = data['nforest_type']

In [7]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
joblib.dump(scaler,'scaler.pkl')

['scaler.pkl']

In [10]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [11]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [12]:

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=128)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.2)
        self.linear2 = nn.Linear(in_features=128, out_features=128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.2)
        self.linear3 = nn.Linear(in_features=128, out_features=128)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(p=0.2)
        self.linear4 = nn.Linear(in_features=128, out_features=128)
        self.relu4 = nn.ReLU()
        self.dropout4 = nn.Dropout(p=0.2)
        self.linear5 = nn.Linear(in_features=128, out_features=output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu1(self.linear1(x))
        x = self.dropout1(x)
        x = self.relu2(self.linear2(x))
        x = self.dropout2(x)
        x = self.relu3(self.linear3(x))
        x = self.dropout3(x)
        x = self.relu4(self.linear4(x))
        x = self.dropout4(x)
        x = self.softmax(self.linear5(x))
        return x

In [13]:
input_size = X_train.shape[1]  # Number of features
output_size = len(label_encoder.classes_)  # Number of classes
learning_rate = 0.001
num_epochs = 200

In [14]:
model = NeuralNetwork(input_size=input_size, output_size=output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
label_encoder.classes_

array(['DDF', 'DEF', 'MDF'], dtype=object)

In [16]:
# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for features, labels in train_loader:
        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training complete")

Epoch [10/200], Loss: 1.1371
Epoch [20/200], Loss: 0.8886
Epoch [30/200], Loss: 0.8860
Epoch [40/200], Loss: 0.6809
Epoch [50/200], Loss: 0.8740
Epoch [60/200], Loss: 0.9895
Epoch [70/200], Loss: 0.7246
Epoch [80/200], Loss: 0.8259
Epoch [90/200], Loss: 0.8787
Epoch [100/200], Loss: 0.8176
Epoch [110/200], Loss: 0.9602
Epoch [120/200], Loss: 0.9173
Epoch [130/200], Loss: 0.6516
Epoch [140/200], Loss: 0.7602
Epoch [150/200], Loss: 0.9198
Epoch [160/200], Loss: 0.8583
Epoch [170/200], Loss: 0.6152
Epoch [180/200], Loss: 0.9093
Epoch [190/200], Loss: 0.7629
Epoch [200/200], Loss: 0.8769
Training complete


In [17]:
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [18]:
# Evaluation
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for features, labels in test_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test set: {100 * correct / total} %')


Accuracy of the model on the test set: 72.46265798544619 %


In [19]:
torch.save(model, 'model.pth')

In [20]:
# Load the new data
new_data = pd.read_csv('test.csv')

# Extract the 'id' column
ids = new_data['id']

# Drop the 'id' column
new_data = new_data.drop(columns=['id'])

# Load the saved scaler and transform the new data
X_new = scaler.transform(new_data)

# Convert to PyTorch tensor
X_new_tensor = torch.tensor(X_new, dtype=torch.float32)


In [21]:
with torch.no_grad():
    outputs = model(X_new_tensor)
    _, predicted = torch.max(outputs, 1)

In [22]:
input_size = X_new.shape[1]
output_size = len(label_encoder.classes_)
model = NeuralNetwork(input_size=input_size, output_size=output_size)
model.eval()

NeuralNetwork(
  (linear1): Linear(in_features=12, out_features=128, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (linear2): Linear(in_features=128, out_features=128, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.2, inplace=False)
  (linear3): Linear(in_features=128, out_features=128, bias=True)
  (relu3): ReLU()
  (dropout3): Dropout(p=0.2, inplace=False)
  (linear4): Linear(in_features=128, out_features=128, bias=True)
  (relu4): ReLU()
  (dropout4): Dropout(p=0.2, inplace=False)
  (linear5): Linear(in_features=128, out_features=3, bias=True)
  (softmax): Softmax(dim=1)
)

In [23]:
with torch.no_grad():
    outputs = model(X_new_tensor)
    _, predicted = torch.max(outputs, 1)

# Convert numerical predictions back to original class labels
predicted_labels = label_encoder.inverse_transform(predicted.numpy())

# Create a DataFrame with 'id' and 'predicted_nforest_type'
results = pd.DataFrame({
    'id': ids,
    'predicted_nforest_type': predicted_labels
})

# Save the results to a new CSV file
results.to_csv('yay.csv', index=False)

print("yay.csv")


yay.csv


# Adding feature engineering

In [24]:
def ften(df):
  df['NDWI'] = (df['b3'] - df['b8']) / (df['b3'] + df['b8'])

  df['NDSI'] = (df['b3'] - df['b11']) / (df['b3'] + df['b11'])

  df['BSI'] = ((df['b11'] + df['b4']) - (df['b8'] + df['b2'])) / ((df['b11'] + df['b4']) + (df['b8'] + df['b2']))

  df['NBR1'] = (df['b8'] - df['b11']) / (df['b8'] + df['b11'])

  df['NBR3'] = (df['b8'] - df['b12']) / (df['b8'] + df['b12'])

  df['NBR4'] = (df['b8_a'] - df['b12']) / (df['b8_a'] + df['b12'])

  df['AFRI1'] = (df['b8'] - (0.66 * df['b11'])) / (df['b8'] + (0.66 * df['b11']))

  df['AFRI2'] = (df['b8_a'] - (0.66 * df['b11'])) / (df['b8_a'] + (0.66 * df['b11']))

  df['AFRI3'] = (df['b8'] - (0.66 * df['b12'])) / (df['b8'] + (0.66 * df['b12']))

  df['AFRI4'] = (df['b8_a'] - (0.66 * df['b12'])) / (df['b8_a'] + (0.66 * df['b12']))

  df['BNDVI1'] = (df['b8'] - df['b2']) / (df['b8'] + df['b2'])

  df['BNDVI2'] = (df['b8_a'] - df['b2']) / (df['b8_a'] + df['b2'])

  df['BWDRVI1'] = ((0.1 * df['b8']) - df['b2']) / ((0.1 * df['b8']) + df['b2'])

  df['BWDRVI2'] = ((0.1 * df['b8_a']) - df['b2']) / ((0.1 * df['b8_a']) + df['b2'])

  df['NDVI1'] = (df['b8'] - df['b4']) / (df['b8'] + df['b4'])

  df['NDVI2'] = (df['b8_a'] - df['b4']) / (df['b8_a'] + df['b4'])

  df['WDRVI1'] = ((0.1 * df['b8']) - df['b4']) / ((0.1 * df['b8']) + df['b4'])

  df['WDRVI2'] = ((0.1 * df['b8_a']) - df['b4']) / ((0.1 * df['b8_a']) + df['b4'])

  df['SAVI1'] = ((1 + 0.5) * (df['b8'] - df['b4'])) / (df['b8'] + df['b4'] + 0.5)

  df['SAVI2'] = ((1 + 0.5) * (df['b8_a'] - df['b4'])) / (df['b8_a'] + df['b4'] + 0.5)

  df['GNDVI1'] = (df['b8'] - df['b3']) / (df['b8'] + df['b3'])

  df['GNDVI2'] =(df['b8_a'] - df['b3']) / (df['b8_a'] + df['b3'])

  df['NDRE1'] =(df['b8'] - df['b5']) / (df['b8'] + df['b5'])

  df['NDRE2'] =(df['b8'] - df['b6']) / (df['b8'] + df['b6'])

  df['NDRE3'] =(df['b8'] - df['b7']) / (df['b8'] + df['b7'])

  df['NDRE4'] =(df['b8_a'] - df['b5']) / (df['b8_a'] + df['b5'])

  df['NDRE5'] =(df['b8_a'] - df['b6']) / (df['b8_a'] + df['b6'])

  df['NDRE6'] =(df['b8_a'] - df['b7']) / (df['b8_a'] + df['b7'])

  df['VIgreen'] =(df['b3'] - df['b4']) / (df['b3'] + df['b4'])

  df['CIgreen1'] =(df['b8'] / df['b3']) - 1

  df['CIgreen2'] =(df['b8_a'] / df['b3']) - 1

  df['CIrededge1'] =(df['b8'] / df['b5']) - 1

  df['CIrededge2'] =(df['b8'] / df['b6']) - 1

  df['CIrededge3'] =(df['b8'] / df['b7']) - 1

  df['CIrededge4'] =(df['b8_a'] / df['b5']) - 1

  df['CIrededge5'] =(df['b8_a'] / df['b6']) - 1

  df['CIrededge6'] =(df['b8_a'] / df['b7']) - 1

  df['CI'] =(df['b4'] - df['b2']) / df['b4']

  df['CVI1'] =df['b8'] * (df['b4'] / (df['b3']^2))

  df['CVI2'] = df['b8_a'] * (df['b4'] / (df['b3']^2))

  df['CCCI1'] = ((df['b8'] - df['b5']) / (df['b8'] + df['b5'])) / ((df['b8'] - df['b4']) / (df['b8'] + df['b4']))

  df['CCCI2'] = ((df['b8'] - df['b6']) / (df['b8'] + df['b6'])) / ((df['b8'] - df['b4']) / (df['b8'] + df['b4']))

  df['CCCI3'] = ((df['b8'] - df['b7']) / (df['b8'] + df['b7'])) / ((df['b8'] - df['b4']) / (df['b8'] + df['b4']))

  df['CCCI4'] = ((df['b8_a'] - df['b5']) / (df['b8_a'] + df['b5'])) / ((df['b8_a'] - df['b4']) / (df['b8_a'] + df['b4']))

  df['CCCI5'] = ((df['b8_a'] - df['b6']) / (df['b8_a'] + df['b6'])) / ((df['b8_a'] - df['b4']) / (df['b8_a'] + df['b4']))

  df['CCCI6'] = ((df['b8_a'] - df['b7']) / (df['b8_a'] + df['b7'])) / ((df['b8_a'] - df['b4']) / (df['b8_a'] + df['b4']))

  df['EVI1'] = 2.5 * ((df['b8'] - df['b4']) / (df['b8'] + (6 * df['b4']) - (7.5 * df['b2']) + 1))

  df['EVI2'] = 2.5 * ((df['b8_a'] - df['b4']) / (df['b8_a'] + (6 * df['b4']) - (7.5 * df['b2']) + 1))

  df['GARI1'] = (df['b8'] - (df['b3'] - (df['b2'] - df['b4']))) / (df['b8'] - (df['b3'] + (df['b2'] - df['b4'])))

  df['GARI2'] = (df['b8_a'] - (df['b3'] - (df['b2'] - df['b4']))) / (df['b8_a'] - (df['b3'] + (df['b2'] - df['b4'])))

  df['GLT'] = ((2 * df['b3']) - (df['b4'] + df['b2'])) / ((2 * df['b3']) + (df['b4'] + df['b2']))

  df['GBNDVI1'] = (df['b8'] - (df['b3'] + df['b2'])) / (df['b8'] + (df['b3'] + df['b2']))

  df['GBNDVI2'] = (df['b8_a'] - (df['b3'] + df['b2'])) / (df['b8_a'] + (df['b3'] + df['b2']))

  df['GRNDVI1'] = (df['b8'] - (df['b3'] + df['b4'])) / (df['b8'] + (df['b3'] + df['b4']))

  df['GRNDVI2'] = (df['b8_a'] - (df['b3'] + df['b4'])) / (df['b8_a'] + (df['b3'] + df['b4']))

  df['SLAVI1'] = df['b8'] / (df['b4'] + df['b11'])

  df['SLAVI2'] = df['b8_a'] / (df['b4'] + df['b11'])

  df['SLAVI3'] = df['b8'] / (df['b4'] + df['b12'])

  df['SLAVI4'] = df['b8_a'] / (df['b4'] + df['b12'])

  df = df.drop(['b1','b12','b2','b3','b4','b5','b6','b7','b8','b8_a'],axis=1)
  return df

In [25]:
add_ft_data = ften(data.copy())
add_ft_data

Unnamed: 0,b11,b9,nforest_type,NDWI,NDSI,BSI,NBR1,NBR3,NBR4,AFRI1,...,GARI2,GLT,GBNDVI1,GBNDVI2,GRNDVI1,GRNDVI2,SLAVI1,SLAVI2,SLAVI3,SLAVI4
0,1927,3039,MDF,-0.600000,-0.604496,0.044318,-0.007055,0.293397,0.385980,0.198050,...,0.828683,0.130280,0.432341,0.513566,0.343706,0.432589,0.798319,0.984454,1.274313,1.571429
1,1598,2690,DDF,-0.733180,-0.643188,-0.146928,0.170301,0.527618,0.587818,0.362477,...,0.977167,0.235975,0.608851,0.660996,0.593496,0.647239,1.234392,1.470427,2.436757,2.902703
2,1975,2683,MDF,-0.402110,-0.318865,-0.087510,0.095489,0.397604,0.443004,0.294547,...,1.165246,0.052089,0.088757,0.143163,0.120900,0.174841,0.844931,0.943483,1.267621,1.415474
3,1560,2955,MDF,-0.762238,-0.585366,-0.298707,0.319372,0.628872,0.626963,0.492007,...,1.010840,0.383051,0.670257,0.668517,0.676740,0.675028,1.742939,1.731988,3.500000,3.478009
4,1944,2043,MDF,-0.519643,-0.566479,0.081646,-0.066374,0.201553,0.294888,0.140353,...,0.849760,0.117922,0.308224,0.395364,0.248258,0.339136,0.700123,0.854381,1.051916,1.283684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13048,1940,2100,DDF,-0.577412,-0.548902,-0.010753,0.041739,0.333544,0.369806,0.244468,...,0.874050,0.124378,0.380236,0.415071,0.329760,0.366130,0.865053,0.939705,1.358892,1.476160
13049,3602,6053,DDF,-0.375394,-0.338039,-0.016309,0.042785,0.181216,0.201995,0.245453,...,0.882879,0.025316,0.070961,0.092388,0.050321,0.071812,0.730999,0.763227,0.874721,0.913286
13050,2007,2170,DDF,-0.285242,-0.300292,0.009844,-0.016460,0.256958,0.323711,0.188997,...,1.012069,0.052888,-0.028271,0.044630,-0.026566,0.046333,0.652773,0.755294,0.917769,1.061909
13051,2312,3380,MDF,-0.460728,-0.344968,-0.130517,0.137635,0.438679,0.481029,0.333065,...,1.217880,0.059017,0.153555,0.205538,0.204344,0.255221,0.952827,1.060918,1.467051,1.633478


In [45]:
X = add_ft_data.drop(columns=['nforest_type'])

In [46]:
X

Unnamed: 0,b11,b9,NDWI,NDSI,BSI,NBR1,NBR3,NBR4,AFRI1,AFRI2,...,GARI2,GLT,GBNDVI1,GBNDVI2,GRNDVI1,GRNDVI2,SLAVI1,SLAVI2,SLAVI3,SLAVI4
0,1927,3039,-0.600000,-0.604496,0.044318,-0.007055,0.293397,0.385980,0.198050,0.296330,...,0.828683,0.130280,0.432341,0.513566,0.343706,0.432589,0.798319,0.984454,1.274313,1.571429
1,1598,2690,-0.733180,-0.643188,-0.146928,0.170301,0.527618,0.587818,0.362477,0.435952,...,0.977167,0.235975,0.608851,0.660996,0.593496,0.647239,1.234392,1.470427,2.436757,2.902703
2,1975,2683,-0.402110,-0.318865,-0.087510,0.095489,0.397604,0.443004,0.294547,0.344068,...,1.165246,0.052089,0.088757,0.143163,0.120900,0.174841,0.844931,0.943483,1.267621,1.415474
3,1560,2955,-0.762238,-0.585366,-0.298707,0.319372,0.628872,0.626963,0.492007,0.489615,...,1.010840,0.383051,0.670257,0.668517,0.676740,0.675028,1.742939,1.731988,3.500000,3.478009
4,1944,2043,-0.519643,-0.566479,0.081646,-0.066374,0.201553,0.294888,0.140353,0.236295,...,0.849760,0.117922,0.308224,0.395364,0.248258,0.339136,0.700123,0.854381,1.051916,1.283684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13048,1940,2100,-0.577412,-0.548902,-0.010753,0.041739,0.333544,0.369806,0.244468,0.282970,...,0.874050,0.124378,0.380236,0.415071,0.329760,0.366130,0.865053,0.939705,1.358892,1.476160
13049,3602,6053,-0.375394,-0.338039,-0.016309,0.042785,0.181216,0.201995,0.245453,0.265616,...,0.882879,0.025316,0.070961,0.092388,0.050321,0.071812,0.730999,0.763227,0.874721,0.913286
13050,2007,2170,-0.285242,-0.300292,0.009844,-0.016460,0.256958,0.323711,0.188997,0.258253,...,1.012069,0.052888,-0.028271,0.044630,-0.026566,0.046333,0.652773,0.755294,0.917769,1.061909
13051,2312,3380,-0.460728,-0.344968,-0.130517,0.137635,0.438679,0.481029,0.333065,0.379949,...,1.217880,0.059017,0.153555,0.205538,0.204344,0.255221,0.952827,1.060918,1.467051,1.633478


In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:
X_train

Unnamed: 0,b11,b9,NDWI,NDSI,BSI,NBR1,NBR3,NBR4,AFRI1,AFRI2,...,GARI2,GLT,GBNDVI1,GBNDVI2,GRNDVI1,GRNDVI2,SLAVI1,SLAVI2,SLAVI3,SLAVI4
3689,1436,2122,-0.699713,-0.549083,-0.208517,0.244608,0.563264,0.576960,0.427985,0.444416,...,0.972048,0.233948,0.562232,0.575951,0.547417,0.561470,1.383626,1.440936,2.530481,2.635294
8798,1860,3313,-0.749020,-0.634446,-0.193050,0.218323,0.555258,0.582788,0.405031,0.438503,...,0.971821,0.328013,0.653736,0.676439,0.635543,0.659193,1.387081,1.504785,2.737488,2.969783
6698,1426,3270,-0.736383,-0.493194,-0.347064,0.381881,0.693043,0.699272,0.544140,0.552591,...,1.029916,0.249839,0.600803,0.608471,0.617453,0.624876,1.879717,1.925708,3.759434,3.851415
10471,2186,2535,-0.617938,-0.538353,-0.093875,0.119259,0.422791,0.435512,0.316351,0.330313,...,0.974978,0.113752,0.409437,0.422333,0.399496,0.412518,1.020573,1.052902,1.670475,1.723391
8355,2448,2721,-0.542393,-0.512979,0.012791,0.040752,0.275390,0.315336,0.243539,0.284255,...,0.757702,0.064505,0.330995,0.369391,0.240542,0.281326,0.808278,0.882228,1.131657,1.235194
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11964,1499,3165,-0.707245,-0.580390,-0.189903,0.215183,0.535561,0.556678,0.402273,0.427190,...,0.998067,0.230294,0.564543,0.584699,0.563489,0.583681,1.327044,1.409377,2.438025,2.589286
5191,1874,2420,-0.566105,-0.586119,0.055129,-0.029953,0.206013,0.287772,0.175945,0.258898,...,0.845449,0.108215,0.368217,0.441015,0.300184,0.377253,0.755889,0.899786,1.087492,1.294516
5390,2123,2326,-0.605054,-0.590262,0.014776,0.023010,0.291691,0.289627,0.226760,0.224621,...,0.807922,0.116896,0.427746,0.425902,0.350957,0.348979,0.840772,0.836989,1.277586,1.271839
860,1440,3148,-0.762963,-0.578947,-0.297161,0.329609,0.648485,0.673983,0.500631,0.533821,...,0.997815,0.251834,0.647059,0.672640,0.645635,0.671299,1.709156,1.871335,3.400000,3.722619


In [49]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
joblib.dump(scaler, 'scaler_for_add_data.pkl')

['scaler_for_add_data.pkl']

In [50]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [51]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [52]:
label_encoder.classes_

array(['DDF', 'DEF', 'MDF'], dtype=object)

In [53]:
input_size = X_train.shape[1]  # Number of features
output_size = len(label_encoder.classes_)  # Number of classes
learning_rate = 0.001
num_epochs = 2000

In [54]:
model = NeuralNetwork(input_size=input_size, output_size=output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [55]:
# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for features, labels in train_loader:
        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training complete")

Epoch [10/2000], Loss: 1.0117
Epoch [20/2000], Loss: 0.7180
Epoch [30/2000], Loss: 0.9783
Epoch [40/2000], Loss: 0.7775
Epoch [50/2000], Loss: 0.7992
Epoch [60/2000], Loss: 0.5747
Epoch [70/2000], Loss: 0.9169
Epoch [80/2000], Loss: 0.5604
Epoch [90/2000], Loss: 0.7591
Epoch [100/2000], Loss: 0.9606
Epoch [110/2000], Loss: 0.9792
Epoch [120/2000], Loss: 0.7978
Epoch [130/2000], Loss: 0.8609
Epoch [140/2000], Loss: 0.8971
Epoch [150/2000], Loss: 0.8670
Epoch [160/2000], Loss: 0.7493
Epoch [170/2000], Loss: 0.6825
Epoch [180/2000], Loss: 0.8970
Epoch [190/2000], Loss: 0.6514
Epoch [200/2000], Loss: 1.0262
Epoch [210/2000], Loss: 0.7957
Epoch [220/2000], Loss: 0.9384
Epoch [230/2000], Loss: 0.8464
Epoch [240/2000], Loss: 0.8518
Epoch [250/2000], Loss: 0.8512
Epoch [260/2000], Loss: 1.0506
Epoch [270/2000], Loss: 0.5621
Epoch [280/2000], Loss: 0.8550
Epoch [290/2000], Loss: 0.8952
Epoch [300/2000], Loss: 0.5514
Epoch [310/2000], Loss: 0.7543
Epoch [320/2000], Loss: 0.7514
Epoch [330/2000],

In [56]:
joblib.dump(label_encoder, 'label_encoder_add_data.pkl')

['label_encoder_add_data.pkl']

In [57]:
# Evaluation
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for features, labels in test_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test set: {100 * correct / total} %')


Accuracy of the model on the test set: 69.59019532746075 %


In [58]:
torch.save(model, 'model_add_data.pth')

In [41]:
X_train.shape

(10442, 12)

In [59]:
# Load the new data
new_data = pd.read_csv('test.csv')

# Extract the 'id' column
ids = new_data['id']

# Drop the 'id' column
new_data = new_data.drop(columns=['id'])

new_data = ften(new_data)

# Load the saved scaler and transform the new data
X_new = scaler.transform(new_data)

# Convert to PyTorch tensor
X_new_tensor = torch.tensor(X_new, dtype=torch.float32)


In [60]:
new_data

Unnamed: 0,b11,b9,NDWI,NDSI,BSI,NBR1,NBR3,NBR4,AFRI1,AFRI2,...,GARI2,GLT,GBNDVI1,GBNDVI2,GRNDVI1,GRNDVI2,SLAVI1,SLAVI2,SLAVI3,SLAVI4
0,1425,2595,-0.638870,-0.462288,-0.197951,0.250592,0.548681,0.580508,0.433178,0.470365,...,0.940493,0.207373,0.479776,0.514941,0.450885,0.487326,1.320378,1.449750,2.224509,2.442470
1,1514,2582,-0.651303,-0.487230,-0.209972,0.240341,0.563073,0.546885,0.424275,0.404903,...,1.020902,0.220339,0.481570,0.463400,0.490054,0.472075,1.344940,1.283460,2.435468,2.324138
2,2354,3149,-0.699521,-0.595932,-0.139879,0.177642,0.501894,0.530548,0.369034,0.402290,...,0.925482,0.258712,0.583001,0.608206,0.540324,0.567405,1.219609,1.318741,2.206152,2.385471
3,2013,2345,-0.598174,-0.557447,-0.018347,0.061101,0.334702,0.337617,0.262633,0.265690,...,0.820880,0.188571,0.443070,0.445707,0.369657,0.372490,0.914389,0.920418,1.413922,1.423244
4,1450,2193,-0.668300,-0.534392,-0.154839,0.208299,0.513162,0.510989,0.396215,0.393728,...,0.901675,0.130379,0.502376,0.500170,0.457359,0.455026,1.206652,1.199564,2.019161,2.007299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,1686,3312,-0.635511,-0.436728,-0.229069,0.275150,0.570559,0.588011,0.454364,0.474960,...,0.987102,0.208410,0.463968,0.484330,0.458210,0.478713,1.394452,1.469676,2.369010,2.496805
3996,2694,2856,-0.549899,-0.551843,0.045179,-0.002792,0.281205,0.362460,0.202143,0.287364,...,0.791682,0.119827,0.364400,0.440359,0.272684,0.354417,0.777198,0.931825,1.187500,1.423759
3997,1486,3087,-0.762140,-0.568338,-0.305075,0.341895,0.661184,0.666307,0.510936,0.517670,...,0.991815,0.232856,0.643167,0.648504,0.638281,0.643675,1.735395,1.767468,3.451025,3.514806
3998,1840,3161,-0.763221,-0.613327,-0.260357,0.281811,0.618531,0.658730,0.460075,0.511936,...,1.008467,0.298969,0.654408,0.691547,0.660263,0.696888,1.585707,1.816514,3.267662,3.743284


In [61]:
with torch.no_grad():
    outputs = model(X_new_tensor)
    _, predicted = torch.max(outputs, 1)

In [62]:
input_size = X_new.shape[1]
output_size = len(label_encoder.classes_)
model = NeuralNetwork(input_size=input_size, output_size=output_size)
model.eval()

NeuralNetwork(
  (linear1): Linear(in_features=61, out_features=128, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (linear2): Linear(in_features=128, out_features=128, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.2, inplace=False)
  (linear3): Linear(in_features=128, out_features=128, bias=True)
  (relu3): ReLU()
  (dropout3): Dropout(p=0.2, inplace=False)
  (linear4): Linear(in_features=128, out_features=128, bias=True)
  (relu4): ReLU()
  (dropout4): Dropout(p=0.2, inplace=False)
  (linear5): Linear(in_features=128, out_features=3, bias=True)
  (softmax): Softmax(dim=1)
)

In [63]:
with torch.no_grad():
    outputs = model(X_new_tensor)
    _, predicted = torch.max(outputs, 1)

# Convert numerical predictions back to original class labels
predicted_labels = label_encoder.inverse_transform(predicted.numpy())

# Create a DataFrame with 'id' and 'predicted_nforest_type'
results = pd.DataFrame({
    'id': ids,
    'predicted_nforest_type': predicted_labels
})

# Save the results to a new CSV file
results.to_csv('NN_feature_eng_2000ep.csv', index=False)

print("NN_feature_eng_2000ep.csv")


NN_feature_eng_2000ep.csv


In [64]:
results

Unnamed: 0,id,predicted_nforest_type
0,13467,MDF
1,12719,MDF
2,1054,MDF
3,13747,MDF
4,9453,MDF
...,...,...
3995,115,MDF
3996,10654,MDF
3997,5718,MDF
3998,13054,MDF


In [65]:

yay = pd.read_csv('yay.csv')
yay

Unnamed: 0,id,predicted_nforest_type
0,13467,DDF
1,12719,DDF
2,1054,DDF
3,13747,DDF
4,9453,DDF
...,...,...
3995,115,DDF
3996,10654,DDF
3997,5718,DDF
3998,13054,DDF
