In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
from sklearn.metrics  import accuracy_score,confusion_matrix,ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler

In [2]:
from sklearn.tree import ExtraTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import OneClassSVM
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import ClassifierChain
from sklearn.multioutput import MultiOutputClassifier
from sklearn.multiclass import OutputCodeClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import PassiveAggressiveClassifier    
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.naive_bayes import GaussianNB
from sklearn.semi_supervised import LabelPropagation
from sklearn.semi_supervised import LabelSpreading
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB  
from sklearn.neighbors import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import NuSVC
from sklearn.linear_model import Perceptron
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.mixture import BayesianGaussianMixture
from sklearn.mixture import GaussianMixture

In [3]:
df = pd.read_csv('accelerometer.csv')
df.shape

(153000, 5)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153000 entries, 0 to 152999
Data columns (total 5 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   wconfid  153000 non-null  int64  
 1   pctid    153000 non-null  int64  
 2   x        153000 non-null  float64
 3   y        153000 non-null  float64
 4   z        153000 non-null  float64
dtypes: float64(3), int64(2)
memory usage: 5.8 MB


In [5]:
df.head()

Unnamed: 0,wconfid,pctid,x,y,z
0,1,20,1.004,0.09,-0.125
1,1,20,1.004,-0.043,-0.125
2,1,20,0.969,0.09,-0.121
3,1,20,0.973,-0.012,-0.137
4,1,20,1.0,-0.016,-0.121


In [6]:
df.describe()

Unnamed: 0,wconfid,pctid,x,y,z
count,153000.0,153000.0,153000.0,153000.0,153000.0
mean,2.0,60.0,0.995622,0.005351,-0.117769
std,0.816499,24.494977,0.773958,0.742602,0.517008
min,1.0,20.0,-8.0,-8.0,-5.867
25%,1.0,40.0,0.945,-0.078,-0.172
50%,2.0,60.0,0.992,0.008,-0.125
75%,3.0,80.0,1.039,0.105,-0.066
max,3.0,100.0,7.996,7.996,6.086


In [7]:
df.isna().sum()

wconfid    0
pctid      0
x          0
y          0
z          0
dtype: int64

In [8]:
scaler = MinMaxScaler(feature_range=(-10,10))
df['pctid'] = scaler.fit_transform(df[['pctid']])


In [9]:
df.describe()

Unnamed: 0,wconfid,pctid,x,y,z
count,153000.0,153000.0,153000.0,153000.0,153000.0
mean,2.0,0.0,0.995622,0.005351,-0.117769
std,0.816499,6.123744,0.773958,0.742602,0.517008
min,1.0,-10.0,-8.0,-8.0,-5.867
25%,1.0,-5.0,0.945,-0.078,-0.172
50%,2.0,0.0,0.992,0.008,-0.125
75%,3.0,5.0,1.039,0.105,-0.066
max,3.0,10.0,7.996,7.996,6.086


In [10]:
x = df.drop(['wconfid'],axis=1)
y = df['wconfid'].values

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=30)

In [12]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((122400, 4), (30600, 4), (122400,), (30600,))

In [13]:
def calculate_accuracy(actual,predicted):
    acc = accuracy_score(actual,predicted)
    print(f'Accuracy Score: {round(acc*100,2)}%')
    # cnfmt = confusion_matrix(actual,predicted)
    # disp = ConfusionMatrixDisplay(confusion_matrix=cnfmt)
    # disp.plot()
    # plt.show()

In [14]:
def process_classification(object_class):
    try:
        obj = object_class()
        print(obj)
        obj.fit(x_train,y_train)
        obj_pred = obj.predict(x_test)
        calculate_accuracy(y_test,obj_pred)
    except Exception as e:
        print(e)

In [15]:
class_objects = [
    ExtraTreeClassifier,
    DecisionTreeClassifier,
    OneClassSVM,
    MLPClassifier,
    RadiusNeighborsClassifier,
    KNeighborsClassifier,
    ClassifierChain,
    MultiOutputClassifier,
    OutputCodeClassifier,
    OneVsOneClassifier,
    OneVsRestClassifier,
    SGDClassifier,
    RidgeClassifierCV,
    RidgeClassifier,
    PassiveAggressiveClassifier,
    GaussianProcessClassifier,
    VotingClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
    BaggingClassifier,
    ExtraTreesClassifier,
    RandomForestClassifier,
    BernoulliNB,
    CalibratedClassifierCV,
    GaussianNB,
    LabelPropagation,
    LabelSpreading,
    LinearDiscriminantAnalysis,
    LinearSVC,
    LogisticRegression,
    LogisticRegressionCV,
    SGDClassifier,
    MultinomialNB,
    NearestCentroid,
    KNeighborsClassifier,
    NuSVC,
    Perceptron,
    QuadraticDiscriminantAnalysis,
    SVC,
    BayesianGaussianMixture,
    GaussianMixture,
]

In [16]:
for obj in class_objects:
    process_classification(obj)

ExtraTreeClassifier()
Accuracy Score: 65.79%
DecisionTreeClassifier()
Accuracy Score: 66.84%
OneClassSVM()
Accuracy Score: 13.12%
MLPClassifier()




Accuracy Score: 66.63%
RadiusNeighborsClassifier()
No neighbors found for test samples array([  506,   884,  1944,  2033,  2458,  2551,  2817,  3123,  3498,
        3517,  3651,  4123,  4554,  4758,  4837,  5123,  5305,  5573,
        7326,  9090,  9096,  9099,  9241,  9627, 10542, 10985, 11696,
       11849, 12470, 12496, 14021, 15360, 16057, 17147, 17308, 17730,
       18145, 18148, 18600, 18822, 19254, 19847, 20210, 20558, 20645,
       21400, 21546, 21897, 21966, 22221, 22686, 23337, 24525, 25113,
       25479, 25720, 25990, 26011, 26248, 26294, 27147, 27346, 27515,
       27675, 27746, 27753, 28082, 28359, 28363, 28769, 28803, 29539,
       29792, 29889, 30235, 30568]), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
KNeighborsClassifier()
Accuracy Score: 71.84%
_BaseChain.__init__() missing 1 required positional argument: 'base_estimator'
MultiOutputClassifier.__init__() missing 1 required positional argument: 'estimat



Accuracy Score: 31.22%
GaussianNB()
Accuracy Score: 50.41%
LabelPropagation()
Unable to allocate 112. GiB for an array with shape (122400, 122400) and data type float64
LabelSpreading()
Unable to allocate 112. GiB for an array with shape (122400, 122400) and data type float64
LinearDiscriminantAnalysis()
Accuracy Score: 31.36%
LinearSVC()




Accuracy Score: 32.09%
LogisticRegression()
Accuracy Score: 31.46%
LogisticRegressionCV()
Accuracy Score: 31.42%
SGDClassifier()
Accuracy Score: 31.81%
MultinomialNB()
Negative values in data passed to MultinomialNB (input X)
NearestCentroid()
Accuracy Score: 33.37%
KNeighborsClassifier()
Accuracy Score: 71.84%
NuSVC()
