In [32]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
import numpy as  np	 
import pandas as pd
import pickle 


In [33]:
train_data_path = 'new_wifi.json'

cellid_scaler = MinMaxScaler()

def load(train_file_name):
    if train_file_name == None:
        print('File does not exist')
        exit()

    data_frame = pd.read_json(train_file_name)
    
    x_data = data_frame[['AP01','AP02','AP03','AP04','AP05']]
    y_data = data_frame['cellid_']    

    x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=0)
    return x_train,x_test,y_train,y_test, x_data, y_data


def normalizeX(arr):
    res = np.copy(arr).astype(np.float)
    for i in range(np.shape(res)[0]):
        for j in range(np.shape(res)[1]):
            if res[i][j] == 100:
                res[i][j] = 0
            else:
                res[i][j] = -0.01 * res[i][j]
    return res

def normalizeY(arr):
    global cellid_scaler
    cellid_scaler.fit(arr)
    return cellid_scaler.transform(arr)


def getMiniBatch(arr, batch_size=3):
    index=0
    while True:
        if index+batch_size >= len(arr):
            res = arr[index:]
            res = np.concatenate((res, arr[:index+batch_size-len(arr)]))
        else:
            res =arr[index:index + batch_size]
        index = (index+batch_size)%len(arr)
        yield res

class AbstractModel(object):
    parameter_save_path = 'param.pkl'
    cellid_regression_model_save_path = None

    cellid_regression_model = None
    cellid_mean = None
    cellid_std = None

    normalize_x = None
    normalize_y = None

    def __init__(self):
        pass
    
    def preprocess(self, x, y):
        self.normalize_x = normalizeX(x)
        self.normalize_y = normalizeY(y)
    
    def save(self):
        print("<<Saving>>")
        joblib.dump(self.cellid_regression_model, self.cellid_regression_model_save_path)
    
    def load(self):
        self.cellid_regression_model = joblib.load(self.cellid_regression_model_save_path)
    
    def fit(self, x, y):
        self.preprocess(x, y)
        print("<< training >>")
        self.cellid_regression_model.fit(self.normalize_x, self.normalize_y)
    
        del self.normalize_x
        del self.normalize_y
        self.save()

    def predict(self, x):

        self.load()

        x = normalizeX(x)
        predict_cellid = self.cellid_regression_model.predict(x)
        return predict_cellid
    
        def score(self, x, y):
        self.preprocess(x, y)
        print("<<scoring>>")
        rscore = self.cellid_regression_model.score(self.normalize_x, self.normalize_y)
        return rscore
    
class RandomForest(AbstractModel):
    cellid_regression_model_save_path = './rf_cellid.pkl'

    def __init__(self):
        self.cellid_regression_model = RandomForestRegressor()

In [34]:
if __name__ == '__main__':
    train_x, test_x, train_y, test_y, x_data, y_data = load(train_data_path)

    rf_model = RandomForest() 
    #rf_model.fit(train_x, train_y)
    #rf_score = rf_model.score(test_x, test_y)
    Scores = cross_val_score(rf_model, x_data, y_data, cv=10)
    print("Mean score:",np.mean(Scores))
    #print("RF score:",rf_score)

TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator <__main__.RandomForest object at 0x7fcd78cb8ac8> does not.

In [None]:
df =pd.read_json('new_wifi.json')

In [None]:
df.dtypes