In [1]:
import pandas as pd
from torch.utils.data import Dataset
from imblearn.over_sampling import RandomOverSampler
import lightgbm as lgb
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import trange

## Bayesian Optimization
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events


# DEL Mini-Challenge 1
Es sollen Immobillienobjekte klassifiziert werden. Von der FHNW haben wir einen Datensatz erhalten (Siehe beiliegendes EDA-Notebook), in denen wir vorraussagen sollen, um was für ein Wohnobjekt es sich handelt; Ist es ein Zimmer? Wohnung? Haus? Diese Challenge hatten wir gelöst, aber ohne Deep Learning.
## Einführung
### Lösung der Mini-Challenge 1 für DEL
Wir benutzen ein Neutal-Network. Zur Erstellung wir das Deep-Learning Framework Pytorch benutzt.
### Ground Truth
Als Ground Truth bentzen wir das Modell, welches wir in der Challenge Immobillienrehcner benutzt haben. Mit diesem haben wir die Macro-F1 Score maximiert.


## Erstellung Modell Ground Truth

In [None]:
#read in data
csv_data = pd.read_csv('immo_dev_data.csv')
csv_data.head()

In [None]:
csv_data_fe = csv_data.copy()
csv_data_fe['Zip'] = (csv_data_fe['Zip']/100).astype('int8')
prepro = preprocessing.preprocessor(csv_data_fe, y_var='GroupNameDe', method_to_encode='onehot_encode', cols_to_drop=['Id', 'LastUpdate', 'Locality', 'StreetAndNr', 'Longitude', 'Latitude', 'HouseObject', 'RealEstateTypeId'], 
                                    numbers_to_encode=['Zip'], test_frac= 0.1)
prepro.preprocess()

X_train = prepro.X_train
X_test = prepro.X_test

y_train = prepro.y_train
y_test = prepro.y_test

In [None]:
ros = RandomOverSampler(random_state = 69)
X_random_, y_random_ = ros.fit_resample(X_train, y_train)
X_random = X_random_.sample(len(X_train), random_state = 69)
y_random = y_random_.iloc[X_random.index]

In [None]:
lgbc_best = lgb.LGBMClassifier(
        n_jobs = -1, seed = 42, learning_rate = 0.07517, max_depth = 340, n_estimators = 344,
        num_leaves = 90, reg_alpha = 1.136, reg_lambda = 4.348
)

lgbc_best.fit(X_random, y_random)
lgbc_y_pred_best = lgbc_best.predict(X_test)

In [None]:
 print(sklearn.metrics.classification_report(y_test, knn_y_pred_smote))

In [2]:
if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("Cuda Device Available")
  print("Name of the Cuda Device: ", torch.cuda.get_device_name())
  print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

Cuda Device Available
Name of the Cuda Device:  GeForce RTX 3060 Ti
GPU Computational Capablity:  (8, 6)


In [None]:
# Parameter for the model
input_dim = X.shape[1]
output_dim = 3
nr_of_neuros = calc_nr_neuros(X, output_dim, 2)


class Model(nn.Module):

    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(input_dim, nr_of_neuros)
        self.b1 = nn.BatchNorm1d(nr_of_neuros)
        self.d1 = nn.Dropout(0.1)
        self.fc2 = nn.Linear(nr_of_neuros, nr_of_neuros)
        self.b2 = nn.BatchNorm1d(nr_of_neuros)
        self.d2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(nr_of_neuros, nr_of_neuros)
        self.b3 = nn.BatchNorm1d(nr_of_neuros)
        self.d3 = nn.Dropout(0.1)
        self.fc4 = nn.Linear(nr_of_neuros, nr_of_neuros)
        self.b4 = nn.BatchNorm1d(nr_of_neuros)
        self.d4 = nn.Dropout(0.1)
        self.fc5 = nn.Linear(nr_of_neuros, nr_of_neuros)
        self.b5 = nn.BatchNorm1d(nr_of_neuros)
        self.d5 = nn.Dropout(0.1)
        self.fc6 = nn.Linear(nr_of_neuros, output_dim)

    def forward(self,x):
        x = F.leaky_relu(self.fc1(x))
        x = self.b1(x)
        x = self.d1(x)
        x = F.leaky_relu(self.fc2(x))
        x = self.b2(x)
        x = self.d2(x)
        x = F.leaky_relu(self.fc3(x))
        x = self.b3(x)
        x = self.d3(x)
        x = F.leaky_relu(self.fc4(x))
        x = self.b4(x)
        x = self.d4(x)
        x = F.leaky_relu(self.fc5(x))
        x = self.b5(x)
        x = self.d5(x)
        x = F.softmax(self.fc6(x))

        return x

model = Model()

In [None]:
# Falls wir etwas auf der GPU laufen möchten, müssen wir Pytorch explizit sagen, das es das Model und die benötigten Daten auf die GPU schreiben soll:
if torch.cuda.is_available():
    device = torch.device("cuda" if use_cuda else "cpu")
    model.to(device)

    y_ohc = pd.get_dummies(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y_ohc, test_size=0.2, random_state=42, shuffle = False)
    ## To tensors and put them on the gpu:
    y_ohc_train_t = torch.tensor(y_train.astype('int8').values).cuda()
    X_train_t = torch.tensor(X_train.astype('float32').values).cuda()
    y_ohc_test_t = torch.tensor(y_test.astype('int8').values).cuda()
    X_test_t = torch.tensor(X_test.astype('float32').values).cuda()
else: 
    NotImplementedError

In [None]:
EPOCHS = 15000
aggregated_losses = []

t = trange(EPOCHS)


for i in t:

    y_pred = model(X_train_t)
    single_loss = one_hot_ce_loss(y_pred, y_ohc_train_t)
    aggregated_losses.append(single_loss.cpu().detach().numpy())

    t.set_description(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')