In [None]:
# Importing libraries

import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow import keras
from mlxtend.plotting import plot_decision_regions

## Section 01:- Reading Data and Checking the Meta information

In [None]:
df = pd.read_csv('../input/lrb-data/L_R data.csv')
df.info()
df.describe()

## Section02- EDA

### Checking the correlation

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df.Traget.drop_duplicates()) 
df.Traget= le.transform(df.Traget)

sns.heatmap(df.corr(), cmap="YlGnBu", annot = True)
plt.show()

### Checking the Correlation on the Target variable using barchart

In [None]:
plt.figure(figsize=(25,10))
df.corr()['Traget'].sort_values(ascending = False).plot(kind='bar')
plt.show()

### From the above we see very less correlation of each features on the Target column.

In [None]:
plt.figure(figsize = (12,8))
plt.grid(True)
ax = sns.countplot(x='Traget', data=df, palette='Spectral_r')
for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.3, p.get_height()+0.5))

### From the Above Target classification looks bit imablanced Target column B--> 0 (encoded) is having less data.

### Verifing Hue on many columns and checking on the Target column

In [None]:
df01 = df.melt('Traget', var_name='cols',  value_name='vals')
g = sns.catplot(x="Traget", y="vals", hue='cols', data=df01, kind='strip')

## Section 03:- Dividing the data in to Train and Test

In [None]:
X=df.iloc[:,1:]
y=df.iloc[:,0]

scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.3,random_state=2,stratify=y)

## Section 04:- Model Building

### <b>Model 01:- Support Vector Machine (SVM)</b>

In [None]:
model=SVC()
model.fit(X_train,y_train)
print(f' Training Accuracy is:- {model.score(X_train,y_train)}')
f'Test Accuracy is:- {model.score(X_test,y_test)}'

### <b>Model 02:- KNN</b>

In [None]:
k = range(1,20)
trainingAccuracy = []
testAccuracy=[]
for i in k:
    knn = KNeighborsClassifier(n_neighbors=i,n_jobs=15,p=1,weights='distance')
    knn.fit(X_train,y_train)
    trainingacc = knn.score(X_train,y_train)
    trainingAccuracy.append(trainingacc)
    testAccuracy.append(knn.score(X_test,y_test))

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,4))
plt.xlabel("value of K")
plt.ylabel("Accuracy of test and training")
plt.title("Select best value of k")
plt.plot(k,trainingAccuracy)
plt.plot(k,testAccuracy)
  #axes[0].legend(['loss','val_loss'])
axes.legend(["Training Accurracy","Test Accuracy"])
print("\n Best Test accuracy is:- ",max(testAccuracy))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_model=KNeighborsClassifier(n_jobs=15,n_neighbors=36,p=1,weights='distance')
knn_model.fit(X_train,y_train)
print(f' Training Accuracy {knn_model.score(X_train,y_train)}')
f' Testing Accuracy {knn_model.score(X_test,y_test)}'

#### <b>KNN With Hyper parameter Tuning</b>

In [None]:
k_range = list(range(1,50))
weight_options = ["uniform", "distance"]
pe=[1,2]

param_grid = dict(n_neighbors = k_range, weights = weight_options,p=pe)
knn = KNeighborsClassifier()
knngrid = GridSearchCV(knn, param_grid, cv = 10, scoring = 'accuracy',n_jobs=15)
knngrid.fit(X_train,y_train)

print ("Best score on 10 folds split Data on Train split is :- ",knngrid.best_score_)
print ("\n Best Param:- ",knngrid.best_params_)
print ("\n Best KNN Metric:- ", knngrid.best_estimator_)

print(f' \n Training Accuracy {knngrid.score(X_train,y_train)}')
f'Test Accuracy {knngrid.score(X_test,y_test)}'

In [None]:
y_predicted = knngrid.predict(X_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_predicted)
import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True,fmt='g')
plt.xlabel('Predicted')
plt.ylabel('Truth')

### <b>Modal 03:- Logistic Regression</b>

In [None]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(solver='lbfgs', max_iter=1000)

log_model.fit(X_train, y_train)
print(f' Training Accuracy {log_model.score(X_train,y_train)}')
f'Test Accuracy {log_model.score(X_test,y_test)}'

### <b>Model 04:- Neural Network </b>

In [None]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=4)
param_grid = [
        {
            'activation' : ['identity', 'logistic', 'tanh', 'relu'],
            'solver' : ['lbfgs', 'sgd', 'adam'],            
        }
       ]
clf = GridSearchCV(MLPClassifier(), param_grid, cv=folds,
                           scoring='accuracy',n_jobs=-1,verbose = 1,
)
clf.fit(X_train, y_train)
print(f' Training Accuracy {clf.score(X_train,y_train)}')
f'Test Accuracy {clf.score(X_test,y_test)}'

#### <b>Best params used for MLPClassifier</b>

In [None]:
cv_results = pd.DataFrame(clf.cv_results_)
cv_results[cv_results.rank_test_score<5]
#print the optimum value of hyperparameters
print('Best hyperparameters: ', clf.best_params_)

#### Neural network with Dense layer and defining the Stop criteria on reachign the Accuracy threshold on Train data 

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>= 1.0):
      print("\nReached 99% accuracy so cancelling training!")
      self.model.stop_training = True
callbacks = myCallback()

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(800, activation=tf.nn.relu),
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50,batch_size=32,callbacks=[callbacks])
print("accuracy on test data is", model.evaluate(X_test, y_test))

In [None]:
print("Accuracy on test data is ",model.evaluate(X_test, y_test))
print("Predicted value of ytest[4] is ",np.argmax(y_predicted[4]))

### Model 05-- Spot check without Tuning Hyper parameters


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression, SGDClassifier, Perceptron, RidgeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC,NuSVC
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import precision_score

models =[("LR", LogisticRegression()),("SVC", SVC()),
         ('KNN',KNeighborsClassifier()),("DTC", DecisionTreeClassifier()),
         ("GNB", GaussianNB()),("SGDC", SGDClassifier()),("Perc", Perceptron()),
         ("NC",NearestCentroid()),("Ridge", RidgeClassifier()),
         ("BNB", BernoulliNB()),('RF',RandomForestClassifier()),('ADA',AdaBoostClassifier()),
         ('XGB',GradientBoostingClassifier()),('PAC',PassiveAggressiveClassifier())]
pred = []
names = []
modelsprecision = []

for name,model in models:
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    score = precision_score(y_test, prediction,average = 'macro')
    pred.append(score)
    names.append(name)
    modelsprecision.append((name,score))
    
modelsprecision.sort(key=lambda k:k[1],reverse=True)

modelsprecision


## <b><u>Conclusion</u></b>
* ### Neural network with Dense layers is giving the best result with 96.8% accuracy on test model.