In [None]:
# Read Data
import numpy as np                     # Linear Algebra (calculate the mean and standard deviation)
import pandas as pd                    # manipulate data, data processing, load csv file I/O (e.g. pd.read_csv)

# Visualization
import seaborn as sns                  # Visualization using seaborn
import matplotlib.pyplot as plt        # Visualization using matplotlib
%matplotlib inline
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# style
import plotly.io as pio
pio.templates.default = "plotly_dark"
plt.style.use("fivethirtyeight")       # Set Graphs Background style using matplotlib
sns.set_style("darkgrid")              # Set Graphs Background style using seaborn

# ML model building; Pre Processing & Evaluation
from sklearn.model_selection import train_test_split                     # split  data into training and testing sets
from sklearn.linear_model import LinearRegression, Lasso, Ridge          # Linear Regression, Lasso and Ridge
from sklearn.linear_model import LogisticRegression                      # Logistic Regression
from sklearn.tree import DecisionTreeRegressor                           # Decision tree Regression
from sklearn.ensemble import RandomForestClassifier                      # this will make a Random Forest Classifier
from sklearn import svm                                                  # this will make a SVM classificaiton
from sklearn.svm import SVC                                              # import SVC from SVM
import xgboost
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report      # this creates a confusion matrix
from sklearn.metrics import roc_curve,auc                                # ROC
from sklearn.preprocessing import StandardScaler                         # Standard Scalar
from sklearn.model_selection import GridSearchCV                         # this will do cross validation

import warnings                        # Ignore Warnings
warnings.filterwarnings("ignore")

In [None]:
# Checking the value count for different soil_types
for i in range(10, cover.shape[1]-1):
    j = cover.columns[i]
    print (cover[j].value_counts())

In [None]:
clf_accuracy=[]

### 1. Logistic Regression

In [None]:
LogReg = LogisticRegression(max_iter=1000)
LogReg.fit(X_train, y_train)

In [None]:
y_pred_LogReg = LogReg.predict(X_test)
clf_accuracy.append(accuracy_score(y_test, y_pred_LogReg))
print(accuracy_score(y_test, y_pred_LogReg))

In [None]:
print("Train Score {:.2f} & Test Score {:.2f}".format(LogReg.score(X_train, y_train), LogReg.score(X_test, y_test)))

In [None]:
print("Model\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""Logistic Regression \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, y_pred_LogReg)),
            mean_squared_error(y_test, y_pred_LogReg),
            mean_absolute_error(y_test, y_pred_LogReg),
            r2_score(y_test, y_pred_LogReg)))

### 2. Decision Tree

In [None]:
DTR = DecisionTreeRegressor()
DTR.fit(X_train, y_train)

In [None]:
y_pred_DTR = DTR.predict(X_test)

In [None]:
clf_accuracy.append(accuracy_score(y_test, y_pred_DTR))
print(accuracy_score(y_test, y_pred_DTR))

In [None]:
print("Train Score {:.2f} & Test Score {:.2f}".format(DTR.score(X_train, y_train), DTR.score(X_test, y_test)))

In [None]:
print("Model\t\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""Decision Tree Regressor \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, y_pred_DTR)),
            mean_squared_error(y_test, y_pred_DTR),
            mean_absolute_error(y_test, y_pred_DTR),
            r2_score(y_test, y_pred_DTR)))

plt.scatter(y_test, y_pred_DTR)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)

plt.xlabel("Predicted")
plt.ylabel("True")

plt.title("Decision Tree Regressor")

plt.show()

### 3. Random Forest

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)

In [None]:
pred_rf = rf.predict(X_test)

In [None]:
clf_accuracy.append(accuracy_score(y_test, pred_rf ))
print(accuracy_score(y_test, pred_rf ))

In [None]:
print("Train Score {:.2f} & Test Score {:.2f}".format(rf.score(X_train, y_train), rf.score(X_test, y_test)))

In [None]:
print("Model\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""Random Forest \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, pred_rf )),
            mean_squared_error(y_test, pred_rf ),
            mean_absolute_error(y_test, pred_rf ),
            r2_score(y_test, pred_rf )))

### 4. KNN (K Nearest Neighbors)

In [None]:
KNN = KNeighborsClassifier()

l=[i for i in range(1,11)]
accuracy=[]

for i in l:
    KNN = KNeighborsClassifier(n_neighbors=i, weights='distance')
    KNN.fit(X_train, y_train)
    pred_knn = KNN.predict(X_test)
    accuracy.append(accuracy_score(y_test, pred_knn))

plt.plot(l,accuracy)
plt.title('knn_accuracy plot')
plt.xlabel('neighbors')
plt.ylabel('accuracy')
plt.grid()

print(max(accuracy))

clf_accuracy.append(max(accuracy))

In [None]:
print("Model\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""Random Forest \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, pred_rf )),
            mean_squared_error(y_test, pred_rf ),
            mean_absolute_error(y_test, pred_rf ),
            r2_score(y_test, pred_rf )))

### 5. XGBoost

In [None]:
import xgboost
reg_xgb = xgboost.XGBClassifier(max_depth=7)
reg_xgb.fit(X_train,y_train)

In [None]:
# predicting X_test
y_pred_xgb = reg_xgb.predict(X_test)

In [None]:
print("Train Score {:.2f} & Test Score {:.2f}".format(reg_xgb.score(X_train,y_train),reg_xgb.score(X_test,y_test)))

In [None]:
clf_accuracy.append(accuracy_score(y_test, y_pred_xgb))
print(accuracy_score(y_test, y_pred_xgb))

In [None]:
print("Model\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""XGBClassifier \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, pred_rf )),
            mean_squared_error(y_test, pred_rf ),
            mean_absolute_error(y_test, pred_rf ),
            r2_score(y_test, pred_rf )))

### 6. Naive Bayes Classifier

In [None]:
nb = GaussianNB()
nb.fit(X_train,y_train)

In [None]:
pred_nb = nb.predict(X_test)

In [None]:
clf_accuracy.append(accuracy_score(y_test, pred_nb))
print(accuracy_score(y_test, pred_nb))

In [None]:
print("Train Score {:.2f} & Test Score {:.2f}".format(nb.score(X_train,y_train),nb.score(X_test,y_test)))

In [None]:
print("Model\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
print("""Naive Bayes Classifier \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(y_test, pred_nb )),
            mean_squared_error(y_test, pred_nb ),
            mean_absolute_error(y_test, pred_nb ),
            r2_score(y_test, pred_nb )))

### classification Report

In [None]:
# classification Report
print(classification_report(y_test, pred_rf))

### Confusion Matrix

In [None]:
# Confusion Matrix
cf_matrix = confusion_matrix(y_test, pred_rf)
print('Confusion Matrix \n',cf_matrix)

### Confusion Matrix Heatmap

In [None]:
plt.figure(figsize=(7,6))
sns.heatmap(cf_matrix, cmap='coolwarm', annot=True, linewidth=1, fmt="d")
plt.show()

### Score Summary :

In [None]:
classifier_list=['Logistic Regression','Decision Tree','Random Forest','KNN','xgboost','nbayes']

In [None]:
clf_accuracy1 = [0.7116597678201079,0.9381255217163068,0.9471184048604597,0.8405376797500925,0.8977134841613383,0.2682719034792561]

In [None]:
plt.figure(figsize=(7,6))
sns.barplot(x=clf_accuracy1, y=classifier_list)
plt.grid()
plt.xlabel('accuracy')
plt.ylabel('classifier')
plt.title('classifier vs accuracy plot')

In [None]:
models = [LogReg, DTR, rf, KNN, reg_xgb, nb]
names = ['Logistic Regression','Decision Tree','Random Forest','KNN','xgboost','nbayes']
rmses = []

for model in models:
    rmses.append(np.sqrt(mean_squared_error(y_test, model.predict(X_test))))

x = np.arange(len(names)) 
width = 0.3

fig, ax = plt.subplots(figsize=(10,7))
rects = ax.bar(x, rmses, width)
ax.set_ylabel('RMSE')
ax.set_xlabel('Models')

ax.set_title('RMSE with Different Algorithms')

ax.set_xticks(x)
ax.set_xticklabels(names, rotation=45)

fig.tight_layout()

### Submission

In [None]:
submission = pd.DataFrame({'Loan_ID': test['Loan_ID'], 'Loan_Status': y_pred_test})
submission.to_csv('Predict Loan.csv', index=False)