#Import section

In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC

import matplotlib.pyplot as plt
%matplotlib inline

#RandomForest_Algorithm

In [2]:
def RandomForest_Algorithm(filename):
  df = pd.read_csv(filename)
  #df.info()
  
  columns = df.columns.values.tolist()
  label_name = columns[-1]
  columns.remove(label_name)

  labels = df[label_name].values
  features = df[list(columns)].values

  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.30)
  clf = RandomForestClassifier(n_estimators=10)
  clf = clf.fit(X_train, y_train)
  train_accuracy = clf.score(X_train, y_train)
  test_accuracy = clf.score(X_test, y_test)
  ypredict_train = clf.predict(X_train)
  ypredict_test = clf.predict(X_test)
  
  print("\n\n#########################################################\n")
  print("Random Forest Algorithm with ",filename," :")
  print("\n\n---------------||||||||Training Data||||||||---------------\n\n")
  print("Training accuracy = ",train_accuracy*100,"%")
  print('\nTraining classification report\n', classification_report(y_train, ypredict_train))
  print("\n Confusion matrix of training \n", confusion_matrix(y_train, ypredict_train))
  print("\n\n---------------||||||||Testing Data||||||||---------------\n\n")
  print("Testing accuracy  = ",test_accuracy*100,"%")
  print('\nTesting classification report\n', classification_report(y_test, ypredict_test))
  print("\n Confusion matrix of testing \n", confusion_matrix(y_test, ypredict_test))

#KNN_Algorithm

In [3]:
def KNN_Algorithm(filename):
  
  # Read dataset to pandas dataframe
  dataset = pd.read_csv(filename)
  #dataset.head()
  #dataset.shape
  
  columns = dataset.columns.values.tolist()
  label_name = columns[-1]
  columns.remove(label_name)
  
  X = dataset[list(columns)].values
  y = dataset[label_name].values
  
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
  
  scaler = StandardScaler()
  scaler.fit(X_train)
  
  X_train = scaler.transform(X_train)
  X_test = scaler.transform(X_test)
  
  classifier = KNeighborsClassifier(n_neighbors=5)
  classifier.fit(X_train, y_train)
  
  y_pred_test = classifier.predict(X_test)
  y_pred_train = classifier.predict(X_train)
  
  train_accuracy = classifier.score(X_train, y_train)
  test_accuracy = classifier.score(X_test, y_test)
  print("\n\n#########################################################\n")
  print("KNN with ",filename," :")
  print("\n\n---------------||||||||Training Data||||||||---------------\n\n")
  print("Training accuracy = ",train_accuracy*100,"%")
  print('\nTraining classification report\n', classification_report(y_train, y_pred_train))
  print("\n Confusion matrix of training \n", confusion_matrix(y_train, y_pred_train))
  print("\n\n---------------||||||||Testing Data||||||||---------------\n\n")
  print("Testing accuracy  = ",test_accuracy*100,"%")
  print('\nTesting classification report\n', classification_report(y_test, y_pred_test))
  print("\n Confusion matrix of testing \n", confusion_matrix(y_test, y_pred_test))

#LDA_Algorithm

In [4]:
def LDA_Algorithm(filename):
  
  # Read dataset to pandas dataframe
  dataset = pd.read_csv(filename)
  #dataset.head()
  #dataset.shape
  
  columns = dataset.columns.values.tolist()
  label_name = columns[-1]
  columns.remove(label_name)
  
  X = dataset[list(columns)].values
  y = dataset[label_name].values
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
  
  sc = StandardScaler()
  X_train = sc.fit_transform(X_train)
  X_test = sc.transform(X_test)
  
  lda = LDA(n_components=1)
  X_train = lda.fit_transform(X_train, y_train)
  X_test = lda.transform(X_test)
  
  classifier = RandomForestClassifier(max_depth=2, random_state=0)
  classifier.fit(X_train, y_train)
  
  y_pred_test = classifier.predict(X_test)
  y_pred_train = classifier.predict(X_train)
  
  train_accuracy = classifier.score(X_train, y_train)
  test_accuracy = classifier.score(X_test, y_test)

  print("\n\n#########################################################\n")
  print("LDA with ",filename," :")
  print("\n\n---------------||||||||Training Data||||||||---------------\n\n")
  print("Training accuracy = ",train_accuracy*100,"%")
  print('\nTraining classification report\n', classification_report(y_train, y_pred_train))
  print("\n Confusion matrix of training \n", confusion_matrix(y_train, y_pred_train))
  print("\n\n---------------||||||||Testing Data||||||||---------------\n\n")
  print("Testing accuracy  = ",test_accuracy*100,"%")
  print('\nTesting classification report\n', classification_report(y_test, y_pred_test))
  print("\n Confusion matrix of testing \n", confusion_matrix(y_test, y_pred_test))

#SVM_Algorithm

In [5]:
def SVM_Algorithm(filename):
  bankdata = pd.read_csv(filename)
  #bankdata.shape
  #bankdata.head()
  
  columns = bankdata.columns.values.tolist()
  label_name = columns[-1]
  
  X = bankdata.drop(label_name, axis=1)
  y = bankdata[label_name]
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
  scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
  X_train = scaling.transform(X_train)
  X_test = scaling.transform(X_test)
  svclassifier = SVC(kernel='linear')
  svclassifier.fit(X_train, y_train)
  
  y_pred_train = svclassifier.predict(X_train)
  y_pred_test = svclassifier.predict(X_test)
  
  train_accuracy = svclassifier.score(X_train, y_train)
  test_accuracy = svclassifier.score(X_test, y_test)

  print("\n\n#########################################################\n")
  print("SVM with ",filename," :")
  print("\n\n---------------||||||||Training Data||||||||---------------\n\n")
  print("Training accuracy = ",train_accuracy*100,"%")
  print('\nTraining classification report\n', classification_report(y_train, y_pred_train))
  print("\n Confusion matrix of training \n", confusion_matrix(y_train, y_pred_train))
  print("\n\n---------------||||||||Testing Data||||||||---------------\n\n")
  print("Testing accuracy  = ",test_accuracy*100,"%")
  print('\nTesting classification report\n', classification_report(y_test, y_pred_test))
  print("\n Confusion matrix of testing \n", confusion_matrix(y_test, y_pred_test))

#call the algorithms

##RandomForest_Algorithm

In [6]:
RandomForest_Algorithm('bill_authentication.csv')
RandomForest_Algorithm('PimaIndians.csv')
RandomForest_Algorithm('StudentsPerformance.csv')
RandomForest_Algorithm('covid19_tweets.csv')
RandomForest_Algorithm('hashtag_donaldtrump.csv')



#########################################################

Random Forest Algorithm with  bill_authentication.csv  :


---------------||||||||Training Data||||||||---------------


Training accuracy =  99.89583333333333 %

Training classification report
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       533
           1       1.00      1.00      1.00       427

    accuracy                           1.00       960
   macro avg       1.00      1.00      1.00       960
weighted avg       1.00      1.00      1.00       960


 Confusion matrix of training 
 [[533   0]
 [  1 426]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  99.02912621359224 %

Testing classification report
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       229
           1       0.99      0.99      0.99       183

    accuracy                           0.99       412
   macr

  _warn_prf(average, modifier, msg_start, len(result))



Testing classification report
                precision    recall  f1-score   support

       Africa       0.98      0.99      0.98       334
   Antarctica       0.00      0.00      0.00         2
         Asia       0.99      0.99      0.99      1495
       Europe       0.99      0.99      0.99      4138
North America       1.00      1.00      1.00      8088
      Oceania       1.00      0.98      0.99       319
South America       0.99      1.00      0.99       624

     accuracy                           0.99     15000
    macro avg       0.85      0.85      0.85     15000
 weighted avg       0.99      0.99      0.99     15000


 Confusion matrix of testing 
 [[ 329    0    3    2    0    0    0]
 [   2    0    0    0    0    0    0]
 [   2    0 1483    4    6    0    0]
 [   4    0    1 4114   18    0    1]
 [   0    0    1   22 8062    0    3]
 [   0    0    3    0    2  313    1]
 [   0    0    0    2    1    0  621]]


##KNN_Algorithm

In [7]:
KNN_Algorithm('bill_authentication.csv')
KNN_Algorithm('PimaIndians.csv')
KNN_Algorithm('StudentsPerformance.csv')
KNN_Algorithm('covid19_tweets.csv')
KNN_Algorithm('hashtag_donaldtrump.csv')



#########################################################

KNN with  bill_authentication.csv  :


---------------||||||||Training Data||||||||---------------


Training accuracy =  99.90884229717412 %

Training classification report
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       609
           1       1.00      1.00      1.00       488

    accuracy                           1.00      1097
   macro avg       1.00      1.00      1.00      1097
weighted avg       1.00      1.00      1.00      1097


 Confusion matrix of training 
 [[608   1]
 [  0 488]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  99.63636363636364 %

Testing classification report
               precision    recall  f1-score   support

           0       1.00      0.99      1.00       153
           1       0.99      1.00      1.00       122

    accuracy                           1.00       275
   macro avg       1.00    

  _warn_prf(average, modifier, msg_start, len(result))



Training classification report
                precision    recall  f1-score   support

       Africa       0.99      0.98      0.99       808
   Antarctica       0.00      0.00      0.00         1
         Asia       0.99      0.99      0.99      4047
       Europe       0.99      1.00      0.99     11192
North America       1.00      1.00      1.00     21414
      Oceania       1.00      0.99      1.00       948
South America       0.99      0.99      0.99      1588

     accuracy                           1.00     39998
    macro avg       0.85      0.85      0.85     39998
 weighted avg       1.00      1.00      1.00     39998


 Confusion matrix of training 
 [[  794     0     6     5     1     0     2]
 [    0     0     0     0     0     0     1]
 [    0     0  4009    31     5     0     2]
 [    2     0     8 11147    34     0     1]
 [    0     0     5    42 21362     0     5]
 [    1     0     3     2     1   940     1]
 [    1     0     1     5    11     0  1570]]


--------

##LDA_Algorithm

In [8]:
LDA_Algorithm('bill_authentication.csv')
LDA_Algorithm('PimaIndians.csv')
LDA_Algorithm('StudentsPerformance.csv')
LDA_Algorithm('covid19_tweets.csv')
LDA_Algorithm('hashtag_donaldtrump.csv')



#########################################################

LDA with  bill_authentication.csv  :


---------------||||||||Training Data||||||||---------------


Training accuracy =  99.08842297174111 %

Training classification report
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       605
           1       0.99      0.99      0.99       492

    accuracy                           0.99      1097
   macro avg       0.99      0.99      0.99      1097
weighted avg       0.99      0.99      0.99      1097


 Confusion matrix of training 
 [[601   4]
 [  6 486]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  99.27272727272727 %

Testing classification report
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       157
           1       0.99      0.99      0.99       118

    accuracy                           0.99       275
   macro avg       0.99    

  _warn_prf(average, modifier, msg_start, len(result))



Training classification report
                     precision    recall  f1-score   support

Android Mobile App       0.45      0.94      0.61     16081
       Apple Watch       0.00      0.00      0.00      2012
    IOS Mobile App       0.41      0.27      0.32      9961
                PC       0.00      0.00      0.00      5984
          Smart TV       0.00      0.00      0.00      1222
          websites       0.00      0.00      0.00      4738

          accuracy                           0.45     39998
         macro avg       0.14      0.20      0.16     39998
      weighted avg       0.28      0.45      0.33     39998


 Confusion matrix of training 
 [[15146     0   935     0     0     0]
 [  890     0  1122     0     0     0]
 [ 7281     0  2680     0     0     0]
 [ 5314     0   670     0     0     0]
 [  341     0   881     0     0     0]
 [ 4413     0   325     0     0     0]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  44.36 %

Tes

  _warn_prf(average, modifier, msg_start, len(result))



Training classification report
                precision    recall  f1-score   support

       Africa       0.00      0.00      0.00       837
   Antarctica       0.00      0.00      0.00         2
         Asia       0.74      0.99      0.85      4086
       Europe       0.86      0.99      0.92     11260
North America       0.98      0.99      0.98     21328
      Oceania       0.00      0.00      0.00       929
South America       0.00      0.00      0.00      1556

     accuracy                           0.91     39998
    macro avg       0.37      0.42      0.39     39998
 weighted avg       0.84      0.91      0.87     39998


 Confusion matrix of training 
 [[    0     0   397   440     0     0     0]
 [    0     0     0     2     0     0     0]
 [    0     0  4030    54     2     0     0]
 [    0     0    53 11192    15     0     0]
 [    0     0    14   206 21108     0     0]
 [    0     0   925     2     2     0     0]
 [    0     0     2  1108   446     0     0]]


--------

##SVM_Algorithm

In [9]:
SVM_Algorithm('bill_authentication.csv')
SVM_Algorithm('PimaIndians.csv')
SVM_Algorithm('StudentsPerformance.csv')
SVM_Algorithm('covid19_tweets.csv')
SVM_Algorithm('hashtag_donaldtrump.csv')



#########################################################

SVM with  bill_authentication.csv  :


---------------||||||||Training Data||||||||---------------


Training accuracy =  98.35916134913401 %

Training classification report
               precision    recall  f1-score   support

           0       1.00      0.97      0.98       601
           1       0.97      1.00      0.98       496

    accuracy                           0.98      1097
   macro avg       0.98      0.98      0.98      1097
weighted avg       0.98      0.98      0.98      1097


 Confusion matrix of training 
 [[584  17]
 [  1 495]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  97.81818181818181 %

Testing classification report
               precision    recall  f1-score   support

           0       1.00      0.96      0.98       161
           1       0.95      1.00      0.97       114

    accuracy                           0.98       275
   macro avg       0.97    

  _warn_prf(average, modifier, msg_start, len(result))



Training classification report
                     precision    recall  f1-score   support

Android Mobile App       0.44      0.96      0.61     16078
       Apple Watch       0.21      0.01      0.02      1991
    IOS Mobile App       0.42      0.22      0.29      9913
                PC       0.00      0.00      0.00      6036
          Smart TV       1.00      0.00      0.00      1235
          websites       0.00      0.00      0.00      4745

          accuracy                           0.44     39998
         macro avg       0.35      0.20      0.15     39998
      weighted avg       0.32      0.44      0.32     39998


 Confusion matrix of training 
 [[15437    28   613     0     0     0]
 [ 1228    21   742     0     0     0]
 [ 7762     7  2144     0     0     0]
 [ 5510    40   486     0     0     0]
 [  418     5   811     0     1     0]
 [ 4444     1   300     0     0     0]]


---------------||||||||Testing Data||||||||---------------


Testing accuracy  =  43.25 %

Tes

  _warn_prf(average, modifier, msg_start, len(result))



Training classification report
                precision    recall  f1-score   support

       Africa       0.99      0.82      0.89       798
   Antarctica       0.00      0.00      0.00         1
         Asia       0.97      0.98      0.97      4062
       Europe       0.98      1.00      0.99     11259
North America       1.00      0.99      1.00     21350
      Oceania       1.00      0.99      1.00       950
South America       0.97      1.00      0.98      1578

     accuracy                           0.99     39998
    macro avg       0.84      0.83      0.83     39998
 weighted avg       0.99      0.99      0.99     39998


 Confusion matrix of training 
 [[  651     0   111    36     0     0     0]
 [    0     0     0     0     0     0     1]
 [    2     0  3991    66     2     0     1]
 [    3     0    13 11228    13     0     2]
 [    2     0    14    94 21196     0    44]
 [    0     0     4     2     1   942     1]
 [    0     0     1     5     1     0  1571]]


--------