In [42]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import numpy as np
import pandas as pd
import random

In [0]:
X=pd.read_csv("/content/drive/My Drive/Colab Data/Earthquake/X.csv")
Y=pd.read_csv("/content/drive/My Drive/Colab Data/Earthquake/Y.csv")

In [0]:
X=X.iloc[:20000,1:]
Y=Y.iloc[:20000,1]

In [0]:
X=pd.get_dummies(X,columns=['land_surface_condition','foundation_type','roof_type','ground_floor_type','other_floor_type','position','plan_configuration','legal_ownership_status'])

In [0]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X=sc.fit_transform(X)

In [0]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)

## Baseline Models

### Decision Tree Classifier

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
dc=DecisionTreeClassifier()

In [68]:
dc.fit(X_train,Y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [69]:
Y_pred=dc.predict(X_test)
print(classification_report(Y_test,Y_pred))
print("Accuracy => %.3f" %(dc.score(X_test,Y_test)*100))

              precision    recall  f1-score   support

           1       0.39      0.40      0.40       389
           2       0.67      0.66      0.66      2266
           3       0.56      0.58      0.57      1345

    accuracy                           0.61      4000
   macro avg       0.54      0.55      0.54      4000
weighted avg       0.61      0.61      0.61      4000

Accuracy => 60.650


### Logistic Regression

In [0]:
from sklearn.linear_model import LogisticRegressionCV
lg=LogisticRegressionCV()

In [71]:
lg.fit(X_train,Y_train)

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [72]:
Y_pred=lg.predict(X_test)
print(classification_report(Y_test,Y_pred))
print("Accuracy => %.3f" %(lg.score(X_test,Y_test)*100))

              precision    recall  f1-score   support

           1       0.57      0.31      0.40       389
           2       0.60      0.85      0.70      2266
           3       0.53      0.22      0.31      1345

    accuracy                           0.59      4000
   macro avg       0.56      0.46      0.47      4000
weighted avg       0.57      0.59      0.54      4000

Accuracy => 58.650


## Deep Learning Classifier

In [0]:
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense,Activation
from keras.utils import np_utils,to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [0]:
sess=tf.Session()
K.set_session(sess)
hidden_layers=8
neurons_num=128
feature_dim=X_train.shape[1]

In [0]:
def create_model(optimizer='Adamax',activation='relu',kernel_initializer='normal',neurons_num=128,hidden_layers=8):
  model=Sequential()
  model.add(Dense(feature_dim,input_dim=feature_dim,kernel_initializer=kernel_initializer,activation=activation))
  for _ in range(0,hidden_layers-1):
    model.add(Dense(neurons_num,kernel_initializer=kernel_initializer,activation=activation))
    model.add(Dense(4,kernel_initializer='normal',activation='softmax'))
    model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=["accuracy"])
    return model

In [0]:
def print_best(grid_result):
  print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
  means = grid_result.cv_results_['mean_test_score']
  stds = grid_result.cv_results_['std_test_score']
  params = grid_result.cv_results_['params']
  for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

### Tuning Batch Size and Epochs

In [77]:
model=KerasClassifier(build_fn=create_model,verbose=0)
batch_size=[10,20,40,60,100]
epochs=[10,25,50,100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid=GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1,cv=3)
grid_result = grid.fit(X_train, Y_train)

epochs=grid_result.best_params_['epochs']
batch=grid_result.best_params_['batch_size']

print_best(grid_result)






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where






Best: 0.611750 using {'batch_size': 40, 'epochs': 10}
0.610000 (0.004930) with: {'batch_size': 10, 'epochs': 10}
0.610250 (0.002389) with: {'batch_size': 10, 'epochs': 25}
0.601437 (0.008599) with: {'batch_size': 10, 'epochs': 50}
0.583312 (0.010717) with: {'batch_size': 10, 'epochs': 100}
0.610437 (0.005426) with: {'batch_size': 20, 'epochs': 10}
0.610125 (0.001806) with: {'batch_size': 20, 'epochs': 25}
0.594187 (0.004779) with: {'batch_size': 20, 'epochs': 50}
0.581062 (0.011298) with: {'batch_size': 20, 'epochs': 100}
0.611750 (0.002131) with: {'batch_size': 40, 'epochs': 10}
0.600375 (0.002074) with: {'batch_size': 40, 'epochs': 25}
0.602750 (0.001265) with: {'batch_size': 40, 'epochs': 50}
0.578875 (0.001645) with: {'batch_size': 40, 'epochs': 100}
0.607874 (0.006257) with: {'batch_size': 60, 'epochs': 10}
0.602375 (0.005462) with: {'batch_size': 60, 'epochs': 25}
0.597500 (0.

### Tuning Optimizer

In [78]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch, verbose=0)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

optim=grid_result.best_params_['optimizer']

print_best(grid_result)

Best: 0.609562 using {'optimizer': 'Adamax'}
0.583375 (0.000382) with: {'optimizer': 'SGD'}
0.602125 (0.004203) with: {'optimizer': 'RMSprop'}
0.600375 (0.006591) with: {'optimizer': 'Adagrad'}
0.575375 (0.004877) with: {'optimizer': 'Adadelta'}
0.604937 (0.010399) with: {'optimizer': 'Adam'}
0.609562 (0.003272) with: {'optimizer': 'Adamax'}
0.561755 (0.060145) with: {'optimizer': 'Nadam'}


### Tuning Activation Function

In [79]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch, verbose=0)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

activ=grid_result.best_params_['activation']

print_best(grid_result)

Best: 0.606937 using {'activation': 'relu'}
0.566563 (0.006023) with: {'activation': 'softmax'}
0.584687 (0.008150) with: {'activation': 'softplus'}
0.602687 (0.004413) with: {'activation': 'softsign'}
0.606937 (0.008144) with: {'activation': 'relu'}
0.593562 (0.004918) with: {'activation': 'tanh'}
0.588312 (0.008559) with: {'activation': 'sigmoid'}
0.588812 (0.004237) with: {'activation': 'hard_sigmoid'}
0.582437 (0.002494) with: {'activation': 'linear'}


### Tuning Kernel Initializer

In [80]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch, verbose=0)
kernel_initializer = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(kernel_initializer=kernel_initializer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

kern=grid_result.best_params_['kernel_initializer']

print_best(grid_result)


Best: 0.610062 using {'kernel_initializer': 'uniform'}
0.610062 (0.001371) with: {'kernel_initializer': 'uniform'}
0.608937 (0.008264) with: {'kernel_initializer': 'lecun_uniform'}
0.609062 (0.008806) with: {'kernel_initializer': 'normal'}
0.566563 (0.006023) with: {'kernel_initializer': 'zero'}
0.600937 (0.007468) with: {'kernel_initializer': 'glorot_normal'}
0.604000 (0.002136) with: {'kernel_initializer': 'glorot_uniform'}
0.593937 (0.009202) with: {'kernel_initializer': 'he_normal'}
0.601750 (0.001645) with: {'kernel_initializer': 'he_uniform'}


### Tuning Number of Neurons

In [81]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch, verbose=0)
neurons_num = [1, 2, 4, 8, 16, 32, 64, 128, 256]
param_grid = dict(neurons_num=neurons_num)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

num_ne=grid_result.best_params_['neurons_num']

print_best(grid_result)

Best: 0.609187 using {'neurons_num': 32}
0.570625 (0.006131) with: {'neurons_num': 1}
0.581375 (0.011557) with: {'neurons_num': 2}
0.603000 (0.000955) with: {'neurons_num': 4}
0.603437 (0.002741) with: {'neurons_num': 8}
0.601063 (0.008013) with: {'neurons_num': 16}
0.609187 (0.004185) with: {'neurons_num': 32}
0.607125 (0.005526) with: {'neurons_num': 64}
0.604500 (0.002568) with: {'neurons_num': 128}
0.602749 (0.009227) with: {'neurons_num': 256}


### Tuning Number of Hidden Layers

In [82]:
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch, verbose=0)
hidden_layers = [2,4,8,10,12,14,16,18,20]
param_grid = dict(hidden_layers=hidden_layers)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

hidl=grid_result.best_params_['hidden_layers']

print_best(grid_result)

Best: 0.613250 using {'hidden_layers': 12}
0.607625 (0.005158) with: {'hidden_layers': 2}
0.602875 (0.003194) with: {'hidden_layers': 4}
0.610250 (0.004976) with: {'hidden_layers': 8}
0.606874 (0.008083) with: {'hidden_layers': 10}
0.613250 (0.003533) with: {'hidden_layers': 12}
0.604499 (0.006847) with: {'hidden_layers': 14}
0.610187 (0.005748) with: {'hidden_layers': 16}
0.604062 (0.005822) with: {'hidden_layers': 18}
0.609375 (0.003483) with: {'hidden_layers': 20}


### Using the Best Model

In [0]:
model=create_model(optim,activ,kern,num_ne,hidl)

In [84]:
model.fit(X_train,Y_train,epochs=epochs,batch_size=batch,verbose=0,validation_split=0.15)

<keras.callbacks.History at 0x7f5def970da0>

In [85]:
scores=model.evaluate(X_test,Y_test,verbose=2)
print("\nTest %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


Test acc: 61.25%


In [0]:
X_validate=pd.read_csv("/content/drive/My Drive/Colab Data/Earthquake/X_test.csv")
X_id=X_validate.iloc[:,0]
X_actual=X_validate.iloc[:,1:]

In [0]:
X_actual=pd.get_dummies(X_actual,columns=['land_surface_condition','foundation_type','roof_type','ground_floor_type','other_floor_type','position','plan_configuration','legal_ownership_status'])

In [0]:
X_actual=sc.fit_transform(X_actual)

In [0]:
Y_pred=model.predict_classes(X_actual)

In [0]:
Y_final=pd.concat([X_id,pd.Series(Y_pred)],axis=1)

In [0]:
Y_final.to_csv('final.csv',header=True,index=False)