In [1]:

from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler,QuantileTransformer ,FunctionTransformer, LabelEncoder, RobustScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd 
from torch.utils.data import DataLoader, TensorDataset,Dataset
import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn as nn
import os
import torch.optim as optim


damage_assessment=pd.read_csv('csv_building_damage_assessment.csv',sep=',') 

building_structure=pd.read_csv('csv_building_structure.csv',sep=',')

# veri setlerini 'building_id', 'district_id', 'vdcmun_id', 'ward_id' sütunlarına göre birleştiriyoruz

merge=damage_assessment.merge(building_structure , on=['building_id', 'district_id', 'vdcmun_id', 'ward_id'])

#Belirli kolonlarda Nan şeklinde değer taşımayan kolanları siliyoruz
merge.dropna(subset=['building_id','district_id', 'vdcmun_id', 'ward_id',
            'has_geotechnical_risk_land_settlement', 
            'has_geotechnical_risk',
           'has_geotechnical_risk_fault_crack',
           'has_geotechnical_risk_liquefaction',
           'has_geotechnical_risk_flood',
           'has_geotechnical_risk_landslide',
           'has_geotechnical_risk_rock_fall',
           'has_geotechnical_risk_other',
           'age_building',
           'count_floors_pre_eq',
           'plinth_area_sq_ft',
           'land_surface_condition',
           'has_superstructure_adobe_mud',
           'has_superstructure_mud_mortar_stone',
           'has_superstructure_stone_flag', 
           'has_superstructure_cement_mortar_stone',
           'has_superstructure_mud_mortar_brick', 
           'has_superstructure_cement_mortar_brick',
           'has_superstructure_timber',
           'has_superstructure_bamboo', 
           'has_superstructure_rc_non_engineered',
           'has_superstructure_rc_engineered',
            'count_floors_pre_eq',
           'count_floors_post_eq',
           'height_ft_pre_eq',
           'height_ft_post_eq',
           'damage_grade_y',
            ], inplace=True)


#birleşik veri setinden belirli deprem öncesi bina , arazi , jeoteknik konularıyla ilgili kolonları seçerek devam ediyoruz
merge= merge[['building_id','district_id', 'vdcmun_id', 'ward_id',
    'has_geotechnical_risk',
    'has_geotechnical_risk_land_settlement', 
           'has_geotechnical_risk_fault_crack',
           'has_geotechnical_risk_liquefaction',
           'has_geotechnical_risk_flood',
           'has_geotechnical_risk_landslide',
           'has_geotechnical_risk_rock_fall',
           'has_geotechnical_risk_other',
           'age_building',
           'plinth_area_sq_ft',
           'land_surface_condition',
           'has_superstructure_adobe_mud',
           'has_superstructure_mud_mortar_stone',
           'has_superstructure_stone_flag', 
           'has_superstructure_cement_mortar_stone',
           'has_superstructure_mud_mortar_brick', 
           'has_superstructure_cement_mortar_brick',
           'has_superstructure_timber',
           'has_superstructure_bamboo', 
           'has_superstructure_rc_non_engineered',
           'has_superstructure_rc_engineered', 
           'count_floors_pre_eq',
            'foundation_type',
            'ground_floor_type',
            'height_ft_pre_eq',
           'damage_grade_y',
          ]]



#Bazı kolonların ismini değiştirdik
merge=merge.rename(columns={'has_superstructure_adobe_mud':'adobe_mud',
                           'has_superstructure_mud_mortar_stone':'mud_mortar_stone',
                           'has_superstructure_stone_flag':'stone_flag',
                            'has_superstructure_cement_mortar_brick':'cement_mortar_brick',
                            'has_superstructure_cement_mortar_stone':'cement_mortar_stone',
                           'has_superstructure_mud_mortar_brick':'mud_mortar_brick',
                           'has_superstructure_timber':'timber',
                           'has_superstructure_bamboo':'bamboo',
                           'has_superstructure_rc_non_engineered':'rc_non_engineered',
                            'has_superstructure_rc_engineered':'rc_engineered',
                           'damage_grade_y':'damage_grade',
                          })



#damage_grade kolonundaki hasar derecerinden 3 ve üzeri olanlar orta - şiddetli hasarlı olandan yeni kolon oluşturduk
merge["damage_grade"] = merge["damage_grade"].str[-1].astype(int)
merge["severe_damage"] = (merge["damage_grade"] > 3).astype(int)  


merge.drop(['building_id','damage_grade','district_id','vdcmun_id','ward_id'], axis=1, inplace=True)


label_encoder = LabelEncoder()



#veri setindeki kategorik olan verilere belirli sınıf değerleri atıyruz
merge['land_surface_condition']=label_encoder.fit_transform(merge['land_surface_condition'].values.reshape(-1,1))
merge['foundation_type']=label_encoder.fit_transform(merge['foundation_type'].values.reshape(-1,1))
merge['ground_floor_type']=label_encoder.fit_transform(merge['ground_floor_type'].values.reshape(-1,1))


#veriler  %90 eğitim %10 test verisi olarak ayırdık  
X_train, X_test, y_train, y_test = train_test_split(merge.drop(columns=['severe_damage']),
                                                    merge['severe_damage'],
                                                    test_size = 0.1, 
                                                    random_state =9,
                                                stratify=merge['severe_damage'])


class DepremDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_dataset = DepremDataset(torch.from_numpy(X_train.to_numpy()).float(), torch.from_numpy(y_train.to_numpy()).long())
test_dataset = DepremDataset(torch.from_numpy(X_test.to_numpy()).float(), torch.from_numpy(y_test.to_numpy()).long())

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)




EPOCHS = 5
BATCH_SIZE = 8
LEARNING_RATE = 0.001 
NUM_FEATURES = len(X_train.columns)  #Öznitelik değerleri
NUM_CLASSES = len(y_train.unique())  #Çıktı değerleri 

#Model mimarimizi oluşturduk

class DepremModel(nn.Module):
    def __init__(self, num_feature, num_class):
        super(DepremModel, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x




model = DepremModel(num_feature = NUM_FEATURES, num_class=NUM_CLASSES)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc


accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

print("Eğitim Başlıyor")
for e in tqdm.tqdm(range(1, EPOCHS+1)):
    
 
    train_epoch_loss = 0
    train_epoch_acc = 0
    model.train()
    for X_train_batch, y_train_batch in train_dataloader:
      
        optimizer.zero_grad()

        y_train_pred = model(X_train_batch)

        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)

        train_loss.backward()
        optimizer.step()

        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()


 
   
    # Kayıp ve doğruluk değerlerini yazdırıyoruz

    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_dataloader):.5f} | Train Acc: {train_epoch_acc/len(train_dataloader):.3f}')



  exec(code_obj, self.user_global_ns, self.user_ns)
  y = column_or_1d(y, warn=True)


Eğitim Başlıyor


 20%|████████████████▊                                                                   | 1/5 [01:14<04:56, 74.16s/it]

Epoch 001: | Train Loss: 0.56421 | Train Acc: 72.259


 40%|█████████████████████████████████▌                                                  | 2/5 [02:37<03:57, 79.30s/it]

Epoch 002: | Train Loss: 0.55256 | Train Acc: 72.898


 60%|██████████████████████████████████████████████████▍                                 | 3/5 [04:16<02:57, 88.59s/it]

Epoch 003: | Train Loss: 0.54960 | Train Acc: 73.027


 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [06:07<01:37, 97.43s/it]

Epoch 004: | Train Loss: 0.54807 | Train Acc: 73.117


100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [08:23<00:00, 100.74s/it]

Epoch 005: | Train Loss: 0.54695 | Train Acc: 73.186



