In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.multiclass import OneVsRestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import  OrdinalEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
import array_to_latex as a2l

In [None]:
pdata = pd.read_csv('train.out',sep=' ',header=0)

num_attribs = ["mrr", "mtt","mff", "mrt", "mrf", "mtf"]
cat_attribs = ["Mechanism_type"]
pcomp = 5
p = PCA(n_components=pcomp)
mech_pipeline = Pipeline([("ord", OrdinalEncoder()),("bins",KBinsDiscretizer(n_bins=3,strategy='uniform',encode='ordinal'))])
num_pipeline = Pipeline([("scal",StandardScaler()),("pca",p)])
pipeline = ColumnTransformer([("num",num_pipeline,num_attribs),("cat",mech_pipeline,cat_attribs)])
prepped = pipeline.fit_transform(pdata)


ina = prepped[:,0:pcomp]
outa = prepped[:,pcomp]
clf = OneVsRestClassifier(svm.SVC(gamma=1.0,C=1.0))


maydata = pd.read_csv('may20.out',sep=' ',header=0)
mayprep = pipeline.transform(maydata)
fore = mayprep[:,0:pcomp]
plabels = mayprep[:,pcomp]

In [None]:
tuned_parameters = [{'estimator__kernel': ['rbf'], 'estimator__gamma': np.power(10, np.linspace(-1, 1,10)),
                     'estimator__C': np.power(10, np.linspace(-1, 1,10))}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clfg = GridSearchCV(
        clf, tuned_parameters, n_jobs=4, scoring='%s_macro' % score
    )
    clfg.fit(ina, outa)

    print("Best parameters set found on development set:")
    print()
    print(clfg.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clfg.cv_results_['mean_test_score']
    stds = clfg.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clfg.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = plabels, clfg.predict(fore)
    print(classification_report(y_true, y_pred))
    print()


p = clfg.predict(fore)
c=confusion_matrix(plabels, p)

In [None]:
f = plt.figure()
ax = f.gca() 
cm = plt.cm.get_cmap('jet', 3)
ind = (outa==0)
sc=ax.scatter(ina[ind, 0], ina[ind, 1], c='blue', s=10, cmap=cm,
                zorder=3, label='Normal')
ind = (outa==1)
sc=ax.scatter(ina[ind, 0], ina[ind, 1], c='green', s=10, cmap=cm,
                zorder=3, label='Reverse')
ind = (outa==2)
sc=ax.scatter(ina[ind, 0], ina[ind, 1], c='yellow', s=10, cmap=cm,
              zorder=3, label='Strike-Slip')
plt.legend()
ax.axis('tight')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
ax.set_xlabel(r'$M_{11}$')
ax.set_ylabel(r'$M_{22}$')
n_classes = len(np.unique(outa))

xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
Z = clfg.predict(np.c_[xx.ravel(), yy.ravel(),np.zeros([200*200,pcomp-2])]).reshape(xx.shape)
from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['blue', 'green','yellow'])
contours = ax.contourf(xx, yy, Z,
                         levels=np.arange(n_classes + 1) - 0.5, alpha=0.3,
                          cmap=custom_cmap,
                          zorder=1)
ax.set(xlim=xlim, ylim=ylim)
ax.set_xlabel(r'$M_{11}$')
ax.set_ylabel(r'$M_{22}$')


#y = clf.feature_importances_

f = plt.figure()
ax = f.gca() 
ax.scatter(fore[:,0],fore[:,1], c=p, s=10, cmap=custom_cmap,
               clim=(outa.min(), outa.max()), zorder=3)
ax.set_xlabel(r'$M_{11}$')
ax.set_ylabel(r'$M_{22}$')
contours = ax.contourf(xx, yy, Z,
                           levels=np.arange(n_classes + 1) - 0.5, alpha=0.5,
                           cmap=custom_cmap, clim=(outa.min(), outa.max()),
                           zorder=1)

a = clfg.predict(ina)
ac = accuracy_score(a,outa)