In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 

In [3]:
raw_structure_data = pd.read_csv('csv_building_structure.csv')

In [4]:
raw_structure_data.dropna(inplace=True)
raw_structure_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 762094 entries, 0 to 762105
Data columns (total 31 columns):
 #   Column                                  Non-Null Count   Dtype 
---  ------                                  --------------   ----- 
 0   building_id                             762094 non-null  int64 
 1   district_id                             762094 non-null  int64 
 2   vdcmun_id                               762094 non-null  int64 
 3   ward_id                                 762094 non-null  int64 
 4   count_floors_pre_eq                     762094 non-null  int64 
 5   count_floors_post_eq                    762094 non-null  int64 
 6   age_building                            762094 non-null  int64 
 7   plinth_area_sq_ft                       762094 non-null  int64 
 8   height_ft_pre_eq                        762094 non-null  int64 
 9   height_ft_post_eq                       762094 non-null  int64 
 10  land_surface_condition                  762094 non-null 

In [13]:
distrct_map = pd.read_csv("ward_vdcmun_district_name_mapping.csv")
structure_data = pd.merge(raw_structure_data, distrct_map, on=['ward_id', 'vdcmun_id', 'district_id'], how='left')

In [14]:
structure_data = structure_data.drop(['building_id', 'district_id', 'vdcmun_id', 'ward_id', 'count_floors_post_eq', \
    'height_ft_post_eq', 'condition_post_eq', 'technical_solution_proposed', 'vdcmun_name'], axis=1)

In [15]:
structure_data['damage_grade'].value_counts()

Grade 5    275766
Grade 4    183844
Grade 3    136412
Grade 2     87257
Grade 1     78815
Name: damage_grade, dtype: int64

In [16]:
severity_mapping = {'Grade 1': 1, 'Grade 2': 2, 'Grade 3': 3, 'Grade 4': 4, 'Grade 5': 5}
structure_data['damage_grade'] = structure_data['damage_grade'].map(severity_mapping)

In [23]:
columns_to_encode = ['land_surface_condition', 'foundation_type', 'roof_type', 'ground_floor_type',
                      'other_floor_type', 'position', 'plan_configuration']

structure_data = pd.get_dummies(structure_data, columns=columns_to_encode)

In [24]:
structure_data['district_name'].value_counts()

Kavrepalanchok    98019
Makwanpur         90994
Dhading           89122
Sindhupalchok     88741
Gorkha            78074
Nuwakot           77148
Sindhuli          68749
Dolakha           60639
Ramechhap         58612
Okhaldhunga       39352
Rasuwa            12644
Name: district_name, dtype: int64

In [34]:
grouped_dataframes = structure_data.groupby('district_name')
district_dataframes = {name: group for name, group in grouped_dataframes}

In [45]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
scaler = StandardScaler()

for name, group in grouped_dataframes:
    X = group.drop(['district_name', 'damage_grade'], axis=1)
    y = group['damage_grade']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    rf_classifier = RandomForestClassifier(random_state=42, n_estimators=1000, max_depth=3, n_jobs=-1)
    rf_classifier.fit(X_train_scaled, y_train)

    y_pred = rf_classifier.predict(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy of {name}: {accuracy:.2f}')

    # 打印混淆矩阵和分类报告
    # conf_matrix = confusion_matrix(y_test, y_pred)
    # class_report = classification_report(y_test, y_pred)

    # print('Confusion Matrix:')
    # print(conf_matrix)

    # print('\nClassification Report:')
    # print(class_report)

Accuracy of Dhading: 0.43
Accuracy of Dolakha: 0.59
Accuracy of Gorkha: 0.39
Accuracy of Kavrepalanchok: 0.36
Accuracy of Makwanpur: 0.42
Accuracy of Nuwakot: 0.54
Accuracy of Okhaldhunga: 0.32
Accuracy of Ramechhap: 0.37
Accuracy of Rasuwa: 0.75
Accuracy of Sindhuli: 0.34
Accuracy of Sindhupalchok: 0.82
