In [1]:
import pandas as pd
import numpy as np

data1 = pd.read_csv('train_values.csv')
data2 = pd.read_csv('train_labels.csv')
  
# using merge function by setting how='inner'
df = pd.merge(data1, data2, on='building_id', how='inner')
df.drop(['building_id'],axis=1,inplace=True)

df['plan_configuration'] = df['plan_configuration'].map({'d':0, 'u':1, 's':3, 'q':4, 'm':5, 'c':6, 'a':7, 'n':8, 'f':9, 'o':10}).astype(int) 
df['foundation_type'] = df['foundation_type'].map({'r':0, 'w':1, 'i':2, 'u':3, 'h':4}).astype(int)
df['ground_floor_type'] = df['ground_floor_type'].map({'f':0, 'x':1, 'v':3, 'z':4, 'm':5}).astype(int)
df['other_floor_type'] = df['other_floor_type'].map({'q':0, 'x':1, 'j':2, 's':3}).astype(int)
df['position'] = df['position'].map({'t':0, 's':1, 'j':2, 'o':3}).astype(int)
df['legal_ownership_status'] = df['legal_ownership_status'].map({'v':0, 'a':2, 'r':3, 'w':4})

cols = pd.get_dummies(df['land_surface_condition'])
df[cols.columns] = cols
df.drop('land_surface_condition', axis = 1, inplace = True)

cols = pd.get_dummies(df['roof_type'])
df[cols.columns] = cols
df.drop('roof_type', axis = 1, inplace = True)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

y = df['damage_grade']
X = df.drop('damage_grade', axis =1)


x_train_orginal, x_test_orginal, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2)
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train_orginal)
x_test = scaler.fit_transform(x_test_orginal)

# knn

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=35) 

model.fit(x_train,y_train)

confidence = model.score(x_test,y_test)


print(confidence)

# GradientBoost

In [3]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(max_depth=15, n_estimators=100, learning_rate=0.1, random_state=42)
clf.fit(x_train,y_train)

confidence = clf.score(x_test,y_test)


print(confidence)

0.7329483317664665


# Prediction

In [4]:
df1 = pd.read_csv('test_values.csv') 
df1.drop(['building_id'],axis=1,inplace=True)

df1['plan_configuration'] = df1['plan_configuration'].map({'d':0, 'u':1, 's':3, 'q':4, 'm':5, 'c':6, 'a':7, 'n':8, 'f':9, 'o':10}).astype(int) 
df1['foundation_type'] = df1['foundation_type'].map({'r':0, 'w':1, 'i':2, 'u':3, 'h':4}).astype(int)
df1['ground_floor_type'] = df1['ground_floor_type'].map({'f':0, 'x':1, 'v':3, 'z':4, 'm':5}).astype(int)
df1['other_floor_type'] = df1['other_floor_type'].map({'q':0, 'x':1, 'j':2, 's':3}).astype(int)
df1['position'] = df1['position'].map({'t':0, 's':1, 'j':2, 'o':3}).astype(int)
df1['legal_ownership_status'] = df1['legal_ownership_status'].map({'v':0, 'a':2, 'r':3, 'w':4})

cols = pd.get_dummies(df1['land_surface_condition'])
df1[cols.columns] = cols
df1.drop('land_surface_condition', axis = 1, inplace = True)

cols = pd.get_dummies(df1['roof_type'])
df1[cols.columns] = cols
df1.drop('roof_type', axis = 1, inplace = True)

scaler.transform(df1)

array([[0.56666667, 0.41795231, 0.89988062, ..., 1.        , 0.        ,
        0.        ],
       [0.2       , 0.09887798, 0.9539992 , ..., 1.        , 0.        ,
        0.        ],
       [0.73333333, 0.01332398, 0.79936331, ..., 1.        , 0.        ,
        0.        ],
       ...,
       [0.73333333, 0.79663394, 0.6137684 , ..., 1.        , 0.        ,
        0.        ],
       [0.2       , 0.73001403, 0.07258257, ..., 1.        , 0.        ,
        0.        ],
       [0.86666667, 0.02524544, 0.51221647, ..., 1.        , 1.        ,
        0.        ]])

# submission

In [5]:
df2 = pd.read_csv('test_values.csv')

final = clf.predict(df1)
submission = pd.DataFrame({
    'building_id': np.asarray(df2.building_id), 
    'damage_grade': final.astype(int)
})
submission.to_csv('submission_.csv', index=False)