In [1]:
import pandas as pd

from sklearn.linear_model import RidgeClassifier, LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.model_selection import cross_validate, train_test_split

seed = 66

In [2]:
df = pd.read_csv('ionosphere.data', header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.0,0.0376,...,-0.51171,0.41078,-0.46168,0.21266,-0.3409,0.42267,-0.54487,0.18641,-0.453,g
1,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
2,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
3,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
4,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g


In [3]:
X, y = df.iloc[:, :34], df.iloc[:, -1]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=seed)

## Spot-checking

In [5]:
rc = RidgeClassifier(random_state=seed)
lr = LogisticRegression(random_state=seed)
sgd = SGDClassifier(random_state=seed)
svc = SVC(random_state=seed)
lsvc = LinearSVC(random_state=seed)
knn = KNeighborsClassifier()
gnb = GaussianNB()
dt = DecisionTreeClassifier(random_state=seed)
ada = AdaBoostClassifier(random_state=seed)
rf = RandomForestClassifier(random_state=seed)

In [6]:
algos = {
    'rc': rc, 
    'lr':lr, 
    'sgd': sgd, 
    'svc': svc, 
    'lsvc': lsvc, 
    'knn': knn, 
    'gnb': gnb, 
    'dt': dt, 
    'ada': ada, 
    'rf': rf
}

for algo_name, algo in algos.items():
    score = algo.fit(X_train, y_train).score(X_test, y_test)
    print(f'{algo_name}: {score:.0%}')

rc: 85%
lr: 85%
sgd: 86%
svc: 93%
lsvc: 87%
knn: 80%
gnb: 87%
dt: 90%
ada: 94%
rf: 94%


## Stacking

In [7]:
estimators = [
    ('svc', svc),
    ('dt', dt),
    ('ada', ada),
    ('rf', rf)
]

In [8]:
clf = StackingClassifier(
    estimators=estimators, 
    final_estimator=lr
)

In [9]:
results = cross_validate(estimator=clf, X=X, y=y, cv=10)
mean_score = results['test_score'].mean()
sd_score = results['test_score'].std()

print(f'Accuracy is {mean_score:.2%} +/- {sd_score:.0%}')

Accuracy is 93.44% +/- 5%


## Voting

In [10]:
clf = VotingClassifier(
    estimators=estimators
)

In [11]:
results = cross_validate(estimator=clf, X=X, y=y, cv=10)
mean_score = results['test_score'].mean()
sd_score = results['test_score'].std()

print(f'Accuracy is {mean_score:.2%} +/- {sd_score:.0%}')

Accuracy is 93.72% +/- 4%
