In [1]:
import time
import numpy as np
import pandas as pd

In [2]:
def prepareDataFrame(data_file):
    #Takes about one minute to load
    data=pd.read_csv(data_conv,header=None,sep=" ")
    data.columns=["Date","Hour","Sensor","Value","Voltage"]
    data=data.sort_values(['Date','Hour']).reset_index(drop=True)
    
    data['datetime']=pd.to_datetime(data.Date+' '+data.Hour)
    data['relative_datetime']=data['datetime']-data['datetime'][0]
    data['seconds']=data['relative_datetime'].dt.total_seconds()
    
    sensorId_type=data.Sensor.str.split("-",expand=True)
    sensorId_type.columns=['SensorId','Type']
    data['SensorId']=sensorId_type['SensorId'].astype(int)
    data['Type']=sensorId_type['Type'].astype(int)
    
    #Drop features not needed for the simulation
    data=data.drop(['datetime','relative_datetime','Sensor','Date','Hour','Voltage'],axis=1)
    return data

In [3]:
data_conv = "../data/data.conv.txt"
data = prepareDataFrame(data_conv)

# DO NOT RUN ABOVE

In [4]:
temp_1=data[(data.SensorId==1) & (data.Type==0) & (data.seconds<=8*86400)]
temp_1=temp_1.reset_index(drop=True)

In [5]:
temp_24=data[(data.SensorId==24) & (data.Type==0) & (data.seconds<=8*86400)]
temp_24=temp_24.reset_index(drop=True)

In [6]:
# 5 closest neighbors of sensor 1 are sensors 2, 3, 33, 34, 35
neighbors_1 = [data[(data.SensorId==2) & (data.Type==0) & (data.seconds<=8*86400)],\
               data[(data.SensorId==3) & (data.Type==0) & (data.seconds<=8*86400)],\
               data[(data.SensorId==33) & (data.Type==0) & (data.seconds<=8*86400)],\
               data[(data.SensorId==34) & (data.Type==0) & (data.seconds<=8*86400)],\
               data[(data.SensorId==35) & (data.Type==0) & (data.seconds<=8*86400)]]

In [7]:
# 5 closest neighbors of sensor 24 are sensors 22, 23, 25, 26, 27
neighbors_24 = [data[(data.SensorId==22) & (data.Type==0) & (data.seconds<=8*86400)],\
                data[(data.SensorId==23) & (data.Type==0) & (data.seconds<=8*86400)],\
                data[(data.SensorId==25) & (data.Type==0) & (data.seconds<=8*86400)],\
                data[(data.SensorId==26) & (data.Type==0) & (data.seconds<=8*86400)],\
                data[(data.SensorId==27) & (data.Type==0) & (data.seconds<=8*86400)]]

In [8]:
def convertTimeToSlots(dataframe):
    """
    Add a column 'slot' to the dataframe and divides the relative time in slots
    Also replace the 'seconds' value by the amount of seconds at the center of the slot
    :param dataframe: dataframe on which the operation are performed
    :return: modified dataframe
    """
    interval_slot = 30
    #divide data in slots of 30sec, add each slot value to each entry
    dataframe["slot"] = (dataframe["seconds"]//interval_slot).astype(int)
    #transform the seconds so that for each slot, its corresponding 'seconds' value is at the center of this slot (usefull for plots)
    dataframe["seconds"] = interval_slot*(dataframe["slot"] + dataframe["slot"]+1) / 2
    
    #Take care of the potential multiple value appearing within the same slot -> average them
    dataframe = dataframe.groupby(["slot"]).agg("mean")
    dataframe.reset_index(level=0, inplace=True)
    return dataframe

def fillMissingRows(dataframe):
    """
    Fill missing rows of the dataframe to ensure that there is a value at each time step (slot)
    so a prediction and a correction can be performed
    :param dataframe: dataframe on which the operation are performed
    :return: completed dataframe
    """
    interval_slot = 30
    nb_slots = 23040
    sensor_type = dataframe["SensorId"].values[0]
    missing_values = {"slot": [], "seconds": [], "SensorId": [], "Type": []}
    for i in range (nb_slots): #total nb of slots for 8 days
        if i not in dataframe["slot"].values:
            seconds = interval_slot*(i + i+1) / 2
            missing_values["slot"].append(i)
            missing_values["seconds"].append(seconds)
            missing_values["SensorId"].append(sensor_type)
            missing_values["Type"].append(0)
    #Build DataFrame with missing values
    temp_missing = pd.DataFrame(missing_values)
    #Merge the two Dataframe and sort them by values of the 'slot' column
    #At this point, the temperature values are still missing -> NaN
    complete_temp = dataframe.append(temp_missing).sort_values('slot')
    #Replace NaN by values extracted from a linear method based on the neighbors
    complete_temp["Value"] = complete_temp["Value"].interpolate(limit_direction="both")    
    return complete_temp

def preprocessDataFrames(output_df, input_dfs):
    """
    Preprocess the different dataframes to add their time slots and their missing values
    :param output_df: dataframe containing information of the desired sensor
    :param input_dfs: list of dataframes containing information of the neighbors sensors
    :return: complete output_df and merged list of complete neighbors df
    """
    output_df = fillMissingRows(convertTimeToSlots(output_df))
    output_df.reset_index(level=0, inplace=True)
    for i in range (len(input_dfs)):
        input_dfs[i] = fillMissingRows(convertTimeToSlots(input_dfs[i]))
    merged_inputs_dfs = pd.concat(input_dfs) 
    merged_inputs_dfs["slot"] = merged_inputs_dfs["slot"].astype(int)
    merged_inputs_dfs.reset_index(level=0, inplace=True)
    return output_df, merged_inputs_dfs



In [9]:
complete_temp_1, merged_neighbors_1 = preprocessDataFrames(temp_1, neighbors_1)
complete_temp_24, merged_neighbors_24 = preprocessDataFrames(temp_24, neighbors_24)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/

# SGD FROM HERE

In [22]:
#For plots
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode()

In [10]:
print(complete_temp_1.head(5))
print(merged_neighbors_1.head(5))
print(complete_temp_24.head(5))
print(merged_neighbors_24.head(5))

   index  SensorId  Type      Value  seconds  slot
0      0         1     0  19.243600     15.0     0
1      1         1     0  19.243600     45.0     1
2      0         1     0  19.243600     75.0     2
3      2         1     0  19.237067    105.0     3
4      3         1     0  19.230533    135.0     4
   index  SensorId  Type    Value  seconds  slot
0      0         2     0  19.6160     15.0     0
1      0         2     0  19.6160     45.0     1
2      1         2     0  19.6160     75.0     2
3      1         2     0  19.6013    105.0     3
4      2         2     0  19.5866    135.0     4
   index  SensorId  Type    Value  seconds  slot
0      0        24     0  18.1362     15.0     0
1      0        24     0  18.1362     45.0     1
2      1        24     0  18.1166     75.0     2
3      2        24     0  18.1264    105.0     3
4      3        24     0  18.1362    135.0     4
   index  SensorId  Type    Value  seconds  slot
0      0        22     0  18.3518     15.0     0
1      1

# APPROACH FOR ALGO

In [17]:
SLOTS_PER_DAY = 2880
SIM_DAYS = 8
LEARNING_RATE = 0.01
ITERATIONS = 1500

Y_per_day = []    

# extract data for each day
for i in range(0, SIM_DAYS):
        
    # cut into slots / day
    day = complete_temp_1.drop(complete_temp_1[complete_temp_1.slot >= (i + 1) * SLOTS_PER_DAY].index)
    day = day.drop(day[day.slot < i * SLOTS_PER_DAY].index)
        
    # extract X, Y
    Y = day['Value']
        
    # append to lists
    Y_per_day.append(Y)

day_0 = Y_per_day[0]
day_1 = Y_per_day[1]
day_2 = Y_per_day[2]
day_3 = Y_per_day[3]
day_4 = Y_per_day[4]
day_5 = Y_per_day[5]
day_6 = Y_per_day[6]
day_7 = Y_per_day[7]

In [113]:
import pandas as pd

class SGD:

    def __init__(self, day_0, id = None, rate = LEARNING_RATE, iterations = ITERATIONS):
        self.id = id
        self.learning_rate = rate
        self.nb_iterations = iterations

        self.latest_Y = day_0.to_frame()
        
        self.MSE = []
        self.truths = []
        self.predictions = []
        
        self.initializeModel()
        
    def getID(self):
        return self.id
    
    def getLearningRate(self):
        return self.learning_rate

    def getMaxIterations(self):
        return self.nb_iterations

    def initializeModel(self):
        # define bias
        self.__bias = pd.Series(1, index=range(len(self.latest_Y)))
        
        # add bias to given data
        self.latest_Y["Bias"] = self.__bias
        
        # intialize weights
        self.__nb_features = len(self.latest_Y.columns)
        self.__weights = [1 for i in range(self.__nb_features)]
        self.__weights = np.array(self.__weights)
        self.__weights = self.__weights.reshape(self.__nb_features, 1)
        
        #self.__weights = np.dot(np.dot(np.linalg.inv(np.dot(self.latest_Y.T,self.latest_Y)),self.latest_Y.T),learning_Y)


        
        #print(np.dot(self.latest_Y, self.__weights))
    
    def updateModel(self, new_Y):
        
        originally_predicted = self.getPrediction()
        
        # convert new observations to expected format
        new_Y = new_Y.to_frame()
        new_Y = new_Y.reset_index()
        new_Y["Bias"] = self.__bias
        new_Y = new_Y.drop(["index"], axis=1)

        # extract learning info        
        learning_Y = np.array(new_Y["Value"])
        
        # add sighting to truths
        self.truths.append(learning_Y)
        
        learning_Y = learning_Y.reshape(len(learning_Y),self.__nb_features - 1) 
        
        # apply learning
        regularization = np.dot(self.__weights.T,self.__weights)*0.001
        hypothesis = np.dot(self.latest_Y, self.__weights)
        loss = (learning_Y - hypothesis + regularization) * (-1)
        partial_derivate = (np.dot(self.latest_Y.T,loss)*2)/SLOTS_PER_DAY
        self.__weights = self.__weights - (self.getLearningRate() * partial_derivate)
        
        # normalize weights
        self.__weights = np.dot(np.dot(np.linalg.inv(np.dot(self.latest_Y.T,self.latest_Y)),self.latest_Y.T),learning_Y)
        
        # update new latest Y
        self.latest_Y = new_Y
        
        # compute MSE
        self.MSE.append(np.mean((learning_Y - originally_predicted)**2))
        
        # add prediction for next day to predictions
        self.predictions.append(self.getPrediction().T[0])
    
     
    def getPrediction(self):
        return np.dot(self.latest_Y, self.__weights)
    
    def getMSE(self):
        return self.MSE
    
    def predict(self, real = ["Unknown" for i in range(5)]):
        prediction = self.getPrediction()
        for i in range(5):
            print("supposed: {}| predicted: {}".format(real[i], prediction[i][0]))
        print("-- --")
        
    def plot(self):
        #for truth in self.truths:
        #    print(truth)
        seconds = [i*30 for i in range(SLOTS_PER_DAY * SIM_DAYS) ]#[list(range(SLOTS_PER_DAY * SIM_DAYS * 30))]
        all_truths = self.truths[0]
        all_predictions = self.predictions[0]
        for i in range(1, len(self.truths)):
            all_truths = np.concatenate((all_truths, self.truths[i]), axis=0)
            all_predictions = np.concatenate((all_predictions, self.predictions[i]), axis=0)
        MSE = self.getMSE()[-1]
        
        trace_truth = go.Scatter(
            y = all_truths,
            x = seconds,
            name="Truth"
        )
        
        trace_predictions = go.Scatter(
            y = all_predictions,
            x = seconds,
            name="Predictions"
        )
        
        layout= go.Layout(
            title= 'Truth and predictions for sensor ' + str(self.getID()) + ', on day 8<br>Stochastic Gradient Descent<br>'+\
                    'Mean square error: '+ str(MSE),
            xaxis= dict(
                title= 'Time (seconds)',
            ),
            yaxis=dict(
                title= 'Temperature',
            ),
            showlegend= True
        )
        
        fig= go.Figure(data=[trace_truth,trace_predictions], layout=layout)
        iplot(fig)

In [114]:
    descent = SGD(day_0, id=1)
    descent.predict(np.array(day_1)[:5])
    
    descent.updateModel(day_1)
    descent.predict(np.array(day_2)[:5])
    descent.updateModel(day_2)
    descent.predict(np.array(day_3)[:5])
    descent.updateModel(day_3)
    descent.predict(np.array(day_4)[:5])
    descent.updateModel(day_4)
    descent.predict(np.array(day_5)[:5])
    descent.updateModel(day_5)
    descent.predict(np.array(day_6)[:5])
    descent.updateModel(day_6)
    descent.predict(np.array(day_7)[:5])
    descent.updateModel(day_7)
    descent.predict()
    #descent.updateModel(day_3)
    #descent.predict()
    #print(day_0)
    print(descent.getMSE())

supposed: 18.91432| predicted: 20.2436
supposed: 18.75948| predicted: 20.2436
supposed: 18.60464| predicted: 20.2436
supposed: 18.4498| predicted: 20.237066666666667
supposed: 18.4449| predicted: 20.230533333333334
-- --
supposed: 19.326900000000002| predicted: 19.986862741803662
supposed: 19.33425| predicted: 19.795090638195724
supposed: 19.3416| predicted: 19.603318534587785
supposed: 19.3465| predicted: 19.41154643097984
supposed: 19.3514| predicted: 19.4054776935239
-- --
supposed: 20.057000000000002| predicted: 19.894132570116803
supposed: 20.0668| predicted: 19.89975684109732
supposed: 20.060266666666667| predicted: 19.905381112077844
supposed: 20.053733333333334| predicted: 19.909130626064858
supposed: 20.0472| predicted: 19.912880140051875
-- --
supposed: 20.5666| predicted: 20.945983610637377
supposed: 20.5666| predicted: 20.955108881522385
supposed: 20.56415| predicted: 20.94902536759905
supposed: 20.561700000000002| predicted: 20.942941853675705
supposed: 20.55925| predicted

In [115]:
descent.plot()

In [47]:
def getSensorsLoc(locations_file):
    """
    returns an array where each element is [sensor, x_coord, y_coord] based on a location file
    """
    sensors_loc = []
    with open(locations_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            info = line.split(" ")
            sensor = int(info[0])
            x = float(info[1])
            y = float(info[2])
            sensors_loc.append([sensor, x, y])
    sensors_loc = np.array(sensors_loc)
    return sensors_loc

def getNClosestNeighbors(sensorId, sensors_loc, n):
    """
    returns a list of n closest neighbors ordered from closest to furthest to the given sensorId
    """

    index_sensor_id = np.where(sensors_loc[:,0] == sensorId)[0][0]
    x_sensor = sensors_loc[index_sensor_id, 1]
    y_sensor = sensors_loc[index_sensor_id, 2]

    neighbors = []
    distances = []
    for i in range(len(sensors_loc)):
        if i!= index_sensor_id:
            id_neighbor = int(sensors_loc[i,0])
            x_neighbor = sensors_loc[i,1]
            y_neighbor = sensors_loc[i,2]
            x = x_sensor - x_neighbor
            y = y_sensor - y_neighbor
            distance = math.sqrt(math.pow(x,2) + math.pow(y,2))
            neighbors.append(id_neighbor)
            distances.append(distance)
    ar_neighbors = np.array(neighbors)
    ar_distances = np.array(distances)
    inds = ar_distances.argsort()
    sorted_neighbors = ar_neighbors[inds]
    sorted_distances = ar_distances[inds]

    return sorted_neighbors[:n]

In [96]:
import math

DATA_LOCATION = "../data"
FILE = "data.conv.txt"
LOC = "mote_locs.txt"
data_file = "{}/{}".format(DATA_LOCATION, FILE)
data_loc = "{}/{}".format(DATA_LOCATION, LOC)

sensors_loc = getSensorsLoc(data_loc)
n = 2
closest_neighbors_1 = getNClosestNeighbors(1, sensors_loc,n)
closest_neighbors_24 = getNClosestNeighbors(24, sensors_loc,n)

In [97]:
neighbor_numbers =  closest_neighbors_1
#print(type(neighbor_numbers))
#print(type(closest_neighbors_1))
solo_df = []
#for neighbor in neighbor_numbers:
#    print(merged_neighbors_1.loc[merged_neighbors_1['SensorId'] == neighbor].head(5))

Y_per_day = [] 

# extract data for each day
for i in range(0, SIM_DAYS):
        
    # cut into slots / day
    day = merged_neighbors_1.drop(merged_neighbors_1[merged_neighbors_1.slot >= (i + 1) * SLOTS_PER_DAY].index)
    day = day.drop(day[day.slot < i * SLOTS_PER_DAY].index)
    per_sensor = []
    for neighbor in neighbor_numbers:
        sensor_day = day.loc[merged_neighbors_1['SensorId'] == neighbor]
        sensor_day = sensor_day["Value"]
        sensor_day = sensor_day.rename("Value_{}".format(neighbor))
        sensor_day = sensor_day.reset_index()
        sensor_day = sensor_day.drop(["index"], axis=1)
        if not sensor_day.empty:
            per_sensor.append(sensor_day)
        
    # append to lists
    Y_per_day.append(per_sensor)

day_0_group = Y_per_day[0]
day_1_group = Y_per_day[1]
day_2_group = Y_per_day[2]
day_3_group = Y_per_day[3]
day_4_group = Y_per_day[4]
day_5_group = Y_per_day[5]
day_6_group = Y_per_day[6]
day_7_group = Y_per_day[7]

In [116]:
class SGD:

    def __init__(self, multiple_day_0, id = None, rate = LEARNING_RATE):
        self.id = id
        self.learning_rate = rate        
        self.latest_X = multiple_day_0[0]
        
        for i in range(1, len(multiple_day_0)):
            merge_frame = multiple_day_0[i]
            self.latest_X = self.latest_X.join(merge_frame)
        
        self.MSE = []
        self.truths = []
        self.predictions = []
        
        self.initializeModel()
        
    def getLearningRate(self):
        return self.learning_rate
    
    def getID(self):
        return self.id

    def initializeModel(self):
        # define bias
        self.__bias = pd.Series(1, index=range(len(self.latest_X)))
        
        # add bias to given data
        self.latest_X["Bias"] = self.__bias
        #print(self.latest_Y.head(5))
        
        # intialize weights
        self.__nb_features = len(self.latest_X.columns)
        self.__weights = [1 / (self.__nb_features - 1) for i in range(self.__nb_features)]        # heuristics -> reduce ??
        self.__weights = np.array(self.__weights)
        self.__weights = self.__weights.reshape(self.__nb_features, 1)
        
        # add prediction for next day to predictions
        self.predictions.append(self.getPrediction().T[0])
    
    def updateModel(self, new_multiple_neighbors, new_Y):        
        # obtain prediction made current day
        originally_predicted = self.getPrediction()
        
        # convert new observations to expected format
        new_Y = new_Y.to_frame()
        new_Y = new_Y.reset_index()
        #new_Y["Bias"] = self.__bias
        new_Y = new_Y.drop(["index"], axis=1)

        # extract learning info        
        learning_Y = np.array(new_Y["Value"])
        
        # add sighting to truths
        self.truths.append(learning_Y)
        
        # reshape for use
        learning_Y = learning_Y.reshape(len(learning_Y),1) 
        
        self.latest_X = new_multiple_neighbors[0]
        for i in range(1, len(new_multiple_neighbors)):
            merge_frame = new_multiple_neighbors[i]
            self.latest_X = self.latest_X.join(merge_frame)
        self.latest_X["Bias"] = self.__bias
        
        # apply learning
        regularization = np.dot(self.__weights.T,self.__weights)*0.001
        hypothesis = np.dot(self.latest_X, self.__weights)
        loss = (learning_Y - hypothesis + regularization) * (-1)
        partial_derivate = (np.dot(self.latest_X.T,loss)*2)/SLOTS_PER_DAY
        self.__weights = self.__weights - (self.getLearningRate() * partial_derivate)
        
        # normalize weights
        self.__weights = np.dot(np.dot(np.linalg.inv(np.dot(self.latest_X.T,self.latest_X)),self.latest_X.T),learning_Y)
        
        # update new latest Y
        #self.latest_Y = new_Y
        
        # compute MSE
        self.MSE.append(np.mean((learning_Y - originally_predicted)**2))
        
        # add prediction for next day to predictions
        self.predictions.append(self.getPrediction().T[0])
        
    def getPrediction(self):
        return np.dot(self.latest_X, self.__weights)
    
    def getMSE(self):
        return self.MSE
    
    def predict(self, real = ["Unknown" for i in range(5)]):
        prediction = self.getPrediction()
        for i in range(5):
            print("supposed: {}| predicted: {}".format(real[i], prediction[i][0]))
        print("-- --")
    
    def plot(self):
        seconds = [i*30 for i in range(SLOTS_PER_DAY * SIM_DAYS) ]#[list(range(SLOTS_PER_DAY * SIM_DAYS * 30))]
        all_truths = self.truths[0]
        all_predictions = self.predictions[0]
        for i in range(1, len(self.truths)):
            all_truths = np.concatenate((all_truths, self.truths[i]), axis=0)
            all_predictions = np.concatenate((all_predictions, self.predictions[i]), axis=0)
        MSE = self.getMSE()[-1]
        
        trace_truth = go.Scatter(
            y = all_truths,
            x = seconds,
            name="Truth"
        )
        
        trace_predictions = go.Scatter(
            y = all_predictions,
            x = seconds,
            name="Predictions"
        )
        
        layout= go.Layout(
            title= 'Truth and predictions for sensor ' + str(self.getID()) + ', on day 8<br>Stochastic Gradient Descent<br>'+\
                    'Mean square error: '+ str(MSE),
            xaxis= dict(
                title= 'Time (seconds)',
            ),
            yaxis=dict(
                title= 'Temperature',
            ),
            showlegend= True
        )
        
        fig= go.Figure(data=[trace_truth,trace_predictions], layout=layout)
        iplot(fig)

In [120]:
classifier = SGD(day_0_group, id=1)
classifier.predict(np.array(day_1)[:5])

# end day 1 = update
classifier.updateModel(day_1_group, day_1)
classifier.predict(np.array(day_2)[:5])

# end day 2 = update
classifier.updateModel(day_2_group, day_2)
classifier.predict(np.array(day_3)[:5])

# end day 3 = update
classifier.updateModel(day_3_group, day_3)
classifier.predict(np.array(day_4)[:5])

# end day 4 = update
classifier.updateModel(day_4_group, day_4)
classifier.predict(np.array(day_5)[:5])

# end day 5 = update
classifier.updateModel(day_5_group, day_5)
classifier.predict(np.array(day_6)[:5])

# end day 6 = update
classifier.updateModel(day_6_group, day_6)
classifier.predict(np.array(day_7)[:5])

# end day 6 = update
classifier.updateModel(day_7_group, day_7)
classifier.predict()

supposed: 18.91432| predicted: 19.8808
supposed: 18.75948| predicted: 19.8808
supposed: 18.60464| predicted: 19.8808
supposed: 18.4498| predicted: 19.873450000000002
supposed: 18.4449| predicted: 19.866100000000003
-- --
supposed: 19.326900000000002| predicted: 18.475996799300056
supposed: 19.33425| predicted: 18.470402048703363
supposed: 19.3416| predicted: 18.470402048703363
supposed: 19.3465| predicted: 18.470402048703363
supposed: 19.3514| predicted: 18.470402048703363
-- --
supposed: 20.057000000000002| predicted: 19.420767175030203
supposed: 20.0668| predicted: 19.42636737360816
supposed: 20.060266666666667| predicted: 19.420765275478953
supposed: 20.053733333333334| predicted: 19.415163177349747
supposed: 20.0472| predicted: 19.414045797005905
-- --
supposed: 20.5666| predicted: 20.102262416041697
supposed: 20.5666| predicted: 20.085765239170442
supposed: 20.56415| predicted: 20.08480120715714
supposed: 20.561700000000002| predicted: 20.08480120715714
supposed: 20.55925| predict

In [121]:
print(classifier.getMSE())

[3.145441937968539, 1.924235733736958, 1.150468426296113, 1.1826513973165933, 1.2416376044982724, 4.4278413849977625, 1.5085699493873426]


In [122]:
classifier.plot()