In [1]:
%reset -f

In [2]:
import warnings
warnings.filterwarnings('ignore')

from matplotlib import pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold
from sklearn.manifold import TSNE

In [3]:
df = pd.read_csv("./land_mines.csv")
# min_max_scaler = MinMaxScaler()
# df[["S", "M"]] = min_max_scaler.fit_transform(df[["S", "M"]])
X = df.drop(["M"], axis=1)
y = df.M
tsne_features = TSNE().fit_transform(X)
df["x"] = tsne_features[:,0]
df["y"] = tsne_features[:,1]

In [4]:
from matplotlib.colors import ListedColormap

soil_labels = ["Dry and Sandy",
               "Dry and Humus",
               "Dry and Limy",
               "Humid and Sandy",
               "Humid and Humus",
               "Humid and Limy"]
mine_labels = ["Null",
               "Anti-tank",
               "Anti-personnel",
               "Booby Trapped Anti-personnel",
               "M14 Anti-personnel"]
train_colors = ["white",
                "red",
                "green",
                "blue",
                "yellow",
                "purple"]
train_colormap = ListedColormap(train_colors)

#mine_vals = [0.0, 0.25, 0.5, 0.75, 1.0]
#soil_vals = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
mine_vals = [1,2,3,4,5]
soil_vals = [1,2,3,4,5,6]
mine_classes = list(map(lambda value: df[np.isclose(df["M"], value)], mine_vals))
soil_classes = list(map(lambda value: df[np.isclose(df["S"], value)], soil_vals))
x_y_values_transpose = lambda dataframe: dataframe[["x", "y"]].values.transpose()

mine_classified_train_data = list(map(
    x_y_values_transpose,
    mine_classes))

soil_classified_train_data = list(map(
    lambda dfs: list(map(
        x_y_values_transpose,
        list(map(lambda value: dfs[dfs["M"] == value], mine_vals)))),
    soil_classes))

In [5]:
def plot_boundary(index, C=1e-2, degree=7, grid_step=.01, max_iter=100):
    data = soil_classes[index][["x", "y", "M"]]
    X = data.iloc[:,:2].values
    y = data.iloc[:,2].values
    poly = PolynomialFeatures(degree=degree)
    X_poly = poly.fit_transform(X)
    logit = LogisticRegression(C=C, max_iter=max_iter, random_state=17)
    logit.fit(X_poly, y)
    plt.figure(figsize=(4,4))
    x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
    y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, grid_step),
                         np.arange(y_min, y_max, grid_step))
    Z = logit.predict(poly.transform(np.c_[xx.ravel(), yy.ravel()]))
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.4, cmap="Greys")
    for i in range(5):
        plt.scatter(X[y == mine_vals[i], 0],
                    X[y == mine_vals[i], 1],
                    c=train_colors[i], edgecolors='black', label=mine_labels[i])
    plt.xlabel("x")
    plt.ylabel("y")
    plt.title("mine detection t-sne\n'%s' soil\nlogit with c=%s"%(soil_labels[index],C))
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    print("Accuracy:", round(logit.score(X_poly, y), 3))
    return y, X_poly

def C_graph(y, X_poly, c_range=(0,10)):
    skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=17)
    c_values = np.logspace(c_range[0]-1, c_range[1], 250)
    logit_searcher = LogisticRegressionCV(Cs=c_values, cv=skf, verbose=1, n_jobs=-1)
    logit_searcher.fit(X_poly, y)
    print("logit_searcher.C_ = ", logit_searcher.C_[0])
    plt.figure(figsize=(4,4))
    plt.plot(c_values, np.mean(logit_searcher.scores_[1], axis=0))
    plt.xlabel('C')
    plt.ylabel('Mean CV-accuracy')
    plt.xlim(c_range)
    return logit_searcher.C_[0]

In [6]:
C = 10000

In [7]:
y, X_poly = plot_boundary(4, C=C, degree=7, grid_step=0.01, max_iter=10000)

In [8]:
C = C_graph(y, X_poly, c_range=(0, 10))