In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix

In [2]:
d = pd.read_csv("../dats/battleship.csv")
d[d > 0] = 1

In [3]:
# create all combs
xx, yy = np.meshgrid(
    np.array(["A", "B", "C", "D", "E", "F", "G", "H"]),
    np.array([1, 2, 3, 4, 5, 6, 7, 8]),
    indexing="ij")
# create labels
labs = np.empty(xx.shape[0] * xx.shape[1], 
                dtype = "U2").reshape(xx.shape)
for i in range(xx.shape[0]):
    for j in range(xx.shape[1]):
        labs[i, j] = xx[i,j] + str(yy[i, j])
labs = labs.flatten()

In [4]:
# # NOT RUN:
# y = d.loc[:, "D2"].values
# X = d.drop("D2", axis = 1).values
# par_grid = [
#     {"n_neighbors": [3, 5, 7, 10, 15]}
# ]
# m = KNeighborsClassifier()
# gs_cv = GridSearchCV(m, par_grid, 
#                      cv=5, 
#                      return_train_score=True,
#                      n_jobs=-4)
# gs_cv.fit(X, y)

In [5]:
mods = {}
for lab in labs:
    # select data
    y = d.loc[:, lab].values
    X = d.drop(lab, axis = 1).values
    # fit a model
    rfc = RandomForestClassifier(max_features="sqrt", 
                                 max_depth=None, 
                                 min_samples_split=2,
                                 n_estimators=100,
                                n_jobs=3)
    
    lgr = LogisticRegression(n_jobs=3)
    gnb = GaussianNB()
    clf = VotingClassifier(
        estimators=[("rfc", rfc), ("lgr", lgr), ("gnb", gnb)],
        voting="soft",
        n_jobs=-4
    )
    clf.fit(X, y)
    # store
    mods[lab] = clf
    

In [None]:
# import pickle
# with open("mods.pickle", "wb") as f:
#    pickle.dump(mods, f, protocol=5)

In [23]:
board=np.array(
    #1,2,3,4,5,6,7,8
    [0,0,0,0,0,0,0,0, #A
     0,1,1,1,1,0,0,0, #B
     0,0,0,0,0,0,0,0, #C
     0,1,0,0,1,0,1,0, #D
     0,1,0,0,1,0,1,0, #E
     0,1,0,0,1,0,1,0, #F
     0,0,0,0,0,0,1,0, #G
     0,1,1,0,0,0,0,0] #H
)

to_predict = labs[board == 0]
pred_prob = {}
for lab in to_predict:
    i = np.where(labs != lab)
    X_ = board[i]
    pred_prob[lab] = mods[lab].predict_proba(X_.reshape(1,-1))[0][1]

pd.Series(pred_prob).sort_values(ascending=False).head(20)

C2    0.844224
H4    0.838029
G2    0.822891
B6    0.800958
H1    0.729948
C7    0.701830
C5    0.688613
G5    0.622534
H7    0.581276
B1    0.546075
B7    0.492469
H5    0.468128
F3    0.459711
F4    0.435185
D4    0.434195
D6    0.430736
D3    0.425165
F6    0.399840
E4    0.378688
E3    0.363890
dtype: float64