## Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

import warnings
warnings.filterwarnings('ignore')

## Engineering

In [None]:
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split,cross_val_score,RandomizedSearchCV,GridSearchCV
from sklearn.metrics import f1_score

## Classification-Libraries

In [None]:
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from xgboost import XGBClassifier

## Importing Data

In [None]:
train_values = pd.read_csv(r'C:\Users\AnkushTaneja\Downloads\ML_Projects\EarthQuake_Predictions\Data\train_values.csv')
test = pd.read_csv(r'C:\Users\AnkushTaneja\Downloads\ML_Projects\EarthQuake_Predictions\Data\test_values.csv')
train_labels = pd.read_csv(r'C:\Users\AnkushTaneja\Downloads\ML_Projects\EarthQuake_Predictions\Data\train_labels.csv')
train_data = pd.merge(train_values,train_labels,how='inner',on=['building_id'])
df = pd.concat([train_data,test],axis=0,sort=False)

## Data Housekeeping

In [None]:
df.shape

In [None]:
df['damage_grade'].value_counts()

## Dependent and Independent variables

In [None]:
building_id = df['building_id']
damage_grade = df['damage_grade']

df = df.drop(columns=['building_id','damage_grade'])

## Feature Encoding

In [None]:
le = LabelEncoder()

features = ['land_surface_condition', 'foundation_type', 'roof_type','ground_floor_type', 
            'other_floor_type', 'position','plan_configuration','legal_ownership_status']

for feature in features:
    df[feature] = le.fit_transform(df[feature])

In [None]:
df.drop(columns=['height_percentage'],inplace=True)

## Co-Relation Plot

In [None]:
# features = df.columns

# mask = np.zeros_like(df[features].corr(), dtype=np.bool) 
# mask[np.triu_indices_from(mask)] = True 

# f, ax = plt.subplots(figsize=(16, 12))
# plt.title('Pearson Correlation Matrix',fontsize=25)

# sns.heatmap(df[features].corr(),linewidths=0.25,vmax=0.7,square=True,cmap="BuGn", #"BuGn_r" to reverse 
#             linecolor='w',annot=True,annot_kws={"size":8},mask=mask,cbar_kws={"shrink": .9});

## Feature Scaling

In [None]:
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df))
df_scaled.columns = df.columns
df_scaled.index = df.index

## Joining labels back to main-data

In [None]:
df_scaled['damage_grade'] = damage_grade

## Train and Test Split

In [None]:
train = df_scaled.iloc[:260601]
test = df_scaled.iloc[260601:]
test = test.drop(columns=['damage_grade'])

train['damage_grade']  = train['damage_grade'].astype(int)

## Using Train further to make train | test data

In [None]:
X = train.drop(columns=['damage_grade'])
Y = train['damage_grade']

xtrain,xtest,ytrain,ytest = train_test_split(X,Y,random_state=0,test_size=0.2)

# Classification Techniques

## Logistic Reg

In [None]:
lr = LogisticRegression(random_state=0)
lr = lr.fit(xtrain,ytrain)
pred_lr = lr.predict(xtest)

f1_score(ytest,pred_lr,average='micro')

## SVM - Linear

In [None]:
svm_linear = SVC(random_state=0,kernel='linear')
svm_linear = svm_linear.fit(xtrain,ytrain)
pred_svm_linear = svm_linear.predict(xtest)

f1_score(ytest,pred_svm_linear,average='micro')

## SVM - Kernel

In [None]:
svm_rbf = SVC(random_state=0)
svm_rbf = svm_rbf.fit(xtrain,ytrain)
pred_svm_rbf = svm_rbf.predict(xtest)

f1_score(ytest,pred_svm_rbf,average='micro')

## DT

In [None]:
dt = DecisionTreeClassifier(random_state=0)
dt = dt.fit(xtrain,ytrain)
pred_dt = dt.predict(xtest)

f1_score(ytest,pred_dt,average='micro')

## Random Forest

In [None]:
rf = RandomForestClassifier(random_state=0)
rf = rf.fit(xtrain,ytrain)
pred_rf = rf.predict(xtest)

f1_score(ytest,pred_rf,average='micro')

## XG-Boost

In [None]:
xgb = XGBClassifier(random_state=0)
xgb = xgb.fit(xtrain,ytrain)
pred_xgb = xgb.predict(xtest)

f1_score(ytest,pred_xgb,average='micro')

## Light - GBM

In [None]:
lgb = LGBMClassifier(random_state=0,n_estimators=300,learning_rate=0.50)
lgb = lgb.fit(xtrain,ytrain)
pred_lgb = lgb.predict(xtest)

f1_score(ytest,pred_lgb,average='micro')

## Cat-Boost

In [None]:
cat_boost = CatBoostClassifier(random_state=0,n_estimators=200,learning_rate=0.50)
cat_boost = cat_boost.fit(xtrain,ytrain)
pred_cat_boost = cat_boost.predict(xtest)

In [None]:
f1_score(ytest,pred_cat_boost,average='micro')

## Predicting out-of-sample data

In [None]:
out_of_sample.head()

In [None]:
out_of_sample = test
predictions = lgb.predict(out_of_sample)
df_predicted = pd.DataFrame()

In [None]:
df_predicted['damage_grade'] = predictions
df_predicted['building_id'] = building_id.iloc[260601:]

In [None]:
df_predicted = df_predicted[['building_id','damage_grade']]

## Exporting the Predictions

In [None]:
df_predicted.to_csv(r'C:\Users\AnkushTaneja\Downloads\ML_Projects\EarthQuake_Predictions\Data\Ankush_Submission.csv',
                   index=False)