# Active Learning for dToU tariff policy
## Minimizing the cost of labelling
This notebook contains the simple active learning model of prediction of peak load shaving. The *Simulator* class contains the data point generation code, which ideally generates a new data point following given conditions (currently, day-of-week and season). The *activeLearner* class is a active learning loop which is simulating the real world experimentation. The detailed information about each class is provided above each class.

In [1]:
import numpy as np
import pandas as pd
import random
import time
from multiprocessing import Pool

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from math import sqrt
import xgboost as xgb

# For data visualization
import matplotlib.pyplot as plt
# from pandas.tools.plotting import autocorrelation_plot
from bokeh.io import output_notebook, show
from bokeh.models import Title
from bokeh.plotting import figure, output_file, show

import seaborn as sns
%matplotlib inline

from datetime import datetime, timedelta, date
from tqdm import tqdm            #for .py version
# from tqdm import tqdm_notebook as tqdm     # for .ipynb version

pd.options.mode.chained_assignment = None  # default='warn'

## parameters dictionary

The following cell contains the single list of main parameters used in the python notebook. The following `param` variable is called in all the classes and passed to functions by self object.

In [2]:
# The dict 'params' consists of all the parameters used in the simulation software for ease of alteration
params = {
#         Set the regression model related parameters
          'train_start_dt':'2013-01',
          'train_stop_dt':'2013-12',
          'y_variable': 'trial_d',
          'X_variables':['trial_n', 'low', 'normal', 'high', 'WIND_DIRECTION', 
                         'WIND_SPEED', 'VISIBILITY', 'MSL_PRESSURE',
                         'AIR_TEMPERATURE', 'DEWPOINT', 'WETB_TEMP', 
                         'STN_PRES', 'WMO_HR_SUN_DUR', 'hour', 'day'],
    
#         Set XGBoost regression parameters (for consumption model)
          'n_estimators': 2000,
          'early_stopping_rounds': 50,  #stop if 50 consequent rounds without decrease of error
          'verbose': False,             # Change verbose to True if you want to see it train
          'nthread': 4,
    
#         Set simulator parameters to default values
          'season': 3,
          'day_of_week': 3,
          'special_event': 0,
          'tariff_policy':[],
    
#         Set Occupant behaviour dynamics
          'active_users': 0.1,#.5,     # Set the % of users who are willing to engage in the experiments
          'avail_users': 0.1,#.5,       # Set the % of users who will be available to participate in specific experiment
          'user_latency': 0,         # Set the values which correspond to real life participation delay for users 
          'frac_users_exp':1,      # Fraction of users selected for a particular trial
          
#         Set parameters for active learning
          'total_iterations':5,
          'total_experiments':1000,#100, #Total number of experiments allowed per trial
          'init_samples': 10,#50,      # Set the initial random samples to be chosen
          'test_size':.3,           # Set test data size for splitting data in train-test
          'X_var_activeL':['dow', 
                           'season', 
                           'hod', 
                           'AIR_TEMPERATURE', 
                           'DEWPOINT', 
                           'MSL_PRESSURE', 
                           'STN_PRES',
                           'VISIBILITY', 
                           'WETB_TEMP',
                           'WIND_DIRECTION',
                           'WIND_SPEED',
                           'WMO_HR_SUN_DUR',
                           'hod', 
                           'month'],
    
          'y_var_activeL':'expected'
         }

# Consumption model

The following model consists the aggregate energy consumption model of the LCL users. The model tends to generalize the predictions, therefore it is not used for generation of new data for random timestamps. Instead, in future, it can be used to add the weather effect in the user response.

In [3]:
class ConsumptionModel(object):
    def __init__(self, df, params):
        self.df = df
        self.params = params
#         some variables

    def prep_data(self):
        self.df = self.df.dropna().copy()
        one_hot= pd.get_dummies(self.df['tariff'])
        one_hot_renamed = one_hot.rename(index=str, columns={0.0399:'low', 0.1176:'normal', 0.672:'high'}) 
        self.df = self.df.join(one_hot_renamed).drop('tariff', axis=1)
        
        self.df["hour"] = self.df.index.hour
        self.df["day"] = self.df.index.day
        self.df["month"] = self.df.index.month


    
    def train(self):
#         Complete the xgboost model on 2013 data
        self.X_train = self.df.loc[self.params["train_start_dt"]:self.params["train_stop_dt"],self.params["X_variables"]]
        self.y_train = self.df.loc[self.params["train_start_dt"]:self.params["train_stop_dt"],self.params["y_variable"]]
        self.X_test = self.df.loc[self.params["train_stop_dt"]:,self.params["X_variables"]]
        self.y_test = self.df.loc[self.params["train_stop_dt"]:,self.params["y_variable"]]

        self.xg_reg = xgb.XGBRegressor(n_estimators=self.params['n_estimators'], nthread = self.params["nthread"])
        self.xg_reg.fit(self.X_train, self.y_train,
                        eval_set=[(self.X_train, self.y_train), (self.X_test, self.y_test)],
                        early_stopping_rounds = self.params["early_stopping_rounds"],
                        verbose = self.params["verbose"])

#         Get feature importance chart
        return xgb.plot_importance(self.xg_reg, height=0.9) # Plot feature importance
      

    def test(self, X_test, tariff):
#         test the data points. Get the predictions
        self.preds = self.xg_reg.predict(X_test)
        pass
        

    
    def entropy(self):
#         get entropy of each data point nad return the entropy dataframe
        pass



## Simulator
This class creates a new data point on request. The new datapoint can be constrained to some calendar parameters like day-of-week and season-of-year. The following cell randomly selects the date index which follows the input constraints and generates a 'new' datapoint by aggregating the data of multiple LCL energy users.

Also the tariff policy and user response for that particular day is decided by modelling stochastic behaviour of users. Therefore, each user's response is calculated individually by considering user latency into account.

In [4]:
class Simulator:
    
    
    def __init__(self, df, df_weather, params):
        self.params = params
        self.df = df
        self.df_weather = df_weather
        active_users = int(len(df.columns)*self.params["active_users"])   # get no. of active users from input percentage
        self.active_users = random.sample(list(df.columns), active_users)
        self.noisy_tariff = {}
        self.spring = [3, 4, 5]
        self.summer = [6, 7, 8]
        self.autumn = [9, 10, 11]
        self.winter = [1, 2, 12]


    def select_day(self):
#         Get user ids of participating users
        self.fuzzy_participation()

#         Select the season
        if selectsample.params["season"] == -1:
            month = random.randrange(1,12)
        elif selectsample.params["season"] == 0:
            month = random.choice(self.spring)
        elif selectsample.params["season"] == 1:
            month = random.choice(self.summer)
        elif selectsample.params["season"] == 2:
            month = random.choice(self.autumn)
        elif selectsample.params["season"] == 3:
            month = random.choice(self.winter)

#         Select the day of week
        if selectsample.params["day_of_week"] == -1:
#             Select random day
            dow = random.randrange(0,7)
        else:
            dow = selectsample.params["day_of_week"] 

#         Select the random day from the entries which satisfy above conditions
        shortlist = self.df.loc[(self.df.index.month == month) & (self.df.index.dayofweek == dow), :].index
        
#         day = random.choice(shortlist.day.values)
#         year = random.choice(shortlist.year.values)
        random_index = random.choice(shortlist)
        timestamp = str(random_index.year)+"-"+str(random_index.month)+"-"+str(random_index.day)
#         print(timestamp, " Select day")
        self.sample = self.df.loc[timestamp,self.avail_users]

        
        
        
        
    def random_day(self):
#         Get user ids of participating users
        self.fuzzy_participation()
    
#         Sample a random day timestamp
        shortlist = self.df.sample(axis = 0).index
#         day = random.choice(shortlist.day.values)
#         month = random.choice(shortlist.month.values)
#         year = random.choice(shortlist.year.values)
        random_index = random.choice(shortlist)
        self.timestamp = str(random_index.year)+"-"+str(random_index.month)+"-"+str(random_index.day)
#         print(timestamp, " Random day")
        self.sample = self.df.loc[self.timestamp,self.avail_users]
        
        
    
    def fuzzy_participation(self):
        avail_users = int(len(self.active_users)*self.params["avail_users"])
        self.avail_users = random.sample(self.active_users, avail_users)
    
    
    def auto_noise_addition(self, levels, constraints):
#         select the random users and their behaviour with random latency
        self.noisy_tariff["h1_start"] = [random.choice(range(constraints["h1_start"]-2, 
                                                             constraints["h1_start"]+int(trials_.duration/2))) for _ in range(len(self.avail_users))]
        self.noisy_tariff["h1_end"] = [random.choice(range(constraints["h1_end"]-int(trials_.duration/2), 
                                                           constraints["h1_end"]+2)) for _ in range(len(self.avail_users))]
    

    def tariff_policy(self, levels, constraints):
#         use variables from auto_noise_addition and input variables of this function to create a tariff policy 
#         for each participating user **Needs more attention
        self.auto_noise_addition(levels,constraints)
    
        self.d = np.ones((48, len(self.avail_users)))
        self.df_tariff = pd.DataFrame(data=self.d, columns = self.avail_users)
        for i in range(len(self.avail_users)):
            self.df_tariff.loc[self.noisy_tariff["h1_start"][i]:self.noisy_tariff["h1_end"][i], self.avail_users[i]] = 2

        self.df_tariff.index = self.sample.index
        
        
    def run(self):
#         FOR EACH USER, call test function of consumption model, get modified behaviour, return original data point and modified data point
        self.sample = self.sample.interpolate(method = 'linear', axis = 0).ffill().bfill()
        self.sample = self.sample.join(self.df_weather.loc[self.sample.index,:])
        df_response = pd.DataFrame()
        self.sample["hour"] = self.sample.index.hour
        self.sample["day"] = self.sample.index.day
        self.sample["month"] = self.sample.index.month
        
        list_ = [i for i in range(len(self.avail_users))]

        for i in list_:
            one_hot= pd.get_dummies(self.df_tariff[self.avail_users[i]])
            one_hot_renamed = one_hot.rename(index=str, columns={1.0:'normal', 2.0:'high', 3.0:'low'}) 
            self.sample = pd.concat([self.sample, one_hot_renamed], axis =1)
            self.sample["low"] = 0

            self.sample["trial_n"] = self.sample[self.avail_users[i]]
            
#             consumption_model.test(self.sample[self.params['X_variables']], one_hot_renamed)
            self.test()
#             df_response[self.avail_users[i]] = consumption_model.preds
            df_response[self.avail_users[i]] = self.preds
            self.sample = self.sample.drop(['low', 'normal', 'high', 'trial_n'], axis= 1)
            
        df_response['response']= df_response.mean(axis = 1)
        return df_response['response']
            
            
            
    def test(self):
        self.preds = self.sample['trial_n']
        self.preds.loc[self.sample['high']==1] = self.preds.loc[self.sample['high']==1]*0.9 #(1 - 9/(100*self.params['active_users']*self.params['avail_users']))
        
        

## Active Learner
The following cell simulates the real-world scenario to mimic the practical trials. The only difference is that the dates are randomly selected rather than sequentially moving in time. 

The following algorithm gets the features set for the next datapoint and based on the knowledge of the historical feature subspace, it then takes a decision about the 'need' of an labelling experiment. That is, if the situation is rare in the historical data, the learner will give it more importance as it contains more information (Information theory says that the probability of occurance of a symbol is inversely proportional to the infomation contained in it)

In [5]:
class activeLearner(object):
    
    def __init__(self, df_n, df_weath, params):
        self.df_n = df_n
        self.df_weather = df_weath
        self.params = params
        self.y_pred = pd.DataFrame()
        self.y_test = pd.DataFrame()
        self.counter = 0
        self.iter = 1
        
        
        
            
    def sample_stream(self):
        
        #Randomly select next data point
        sim.random_day()
        level, constraints = trials_.get_random_tariff()
        sim.tariff_policy(level, constraints)
            
        response = sim.run()          
        expected = sim.sample[sim.avail_users].mean(axis = 1).values
         
        dayofweek, month, hourofday, season = self.get_features()
            
        data = {'expected':expected, 
                'response':response.values, 
                'dow':dayofweek, 
                'season':season,
                'hod': hourofday,
                'month': month}
        
        df_ = pd.DataFrame(data, index=response.index)
        df = pd.concat([df_,self.df_weather.loc[sim.timestamp,:]], axis=1)

#         print("sample_stream ", df.columns)
        return df
    
    
    
    
    def get_random_samples(self):
        temp_df = pd.DataFrame(columns = ['expected', 
                                          'response', 
                                          'dow', 
                                          'season'])
        
        
#         select first random day of 48 data points
        sim.random_day()
        
#         Add contextual data in future for the particular day to self.df
        
    
#         Generate new tariff signals for one day
        level, constraints = self.get_random_tariff()
        
#         Get schocastic behaviour of users
        sim.tariff_policy(level, constraints)
    
    
        response = sim.run()
                
        expected = sim.sample[sim.avail_users].mean(axis = 1).values
        
        dayofweek, month, hourofday, season = self.get_features()
            
        data = {'expected':expected, 
                'response':response.values, 
                'dow':dayofweek, 
                'season':season,
                'hod': hourofday,
                'month': month}

        df_ = pd.DataFrame(data, index=response.index)
        self.df = pd.concat([df_,self.df_weather.loc[sim.timestamp,:]], axis=1)

            
#        Create n number of datapoints from simulator (n=self.params["init_samples"])
#        Create a list of 1 to n to include a progress bar
        
        list_ = [i for i in range(self.params["init_samples"])]

        for i in tqdm(list_):
            
            sim.random_day()
            
#             Decide the tariff signal and stochastic behaviour of users around that tariff signal
            level, constraints = self.get_random_tariff()
            sim.tariff_policy(level, constraints)
            
            response = sim.run()
            
            expected = sim.sample[sim.avail_users].mean(axis = 1).values
            
            dayofweek, month, hourofday, season = self.get_features()
            
            data = {'expected':expected, 
                    'response':response.values, 
                    'dow':dayofweek, 
                    'season':season,
                    'hod': hourofday,
                    'month': month}
            
            
        
            df_ = pd.DataFrame(data, index=response.index)
            temp_df = pd.concat([df_,self.df_weather.loc[sim.timestamp,:]], axis=1)
            self.df = pd.concat([self.df, temp_df], axis=0, sort=True)
            
            
            
    def split_data(self, df):
        
        X_train = df[self.params['X_var_activeL']]
        y_train = df[self.params['y_var_activeL']]

        return X_train, y_train
        
        
        
    def get_features(self):
        try:
#             get day of week encoding
            dayofweek = sim.sample.index.dayofweek
#             get month of year encoding
            month = sim.sample.index.month
#             get hour of day value from timestamp
            hourofday = sim.sample.index.hour
#             we are more interested in season based behaviour than monthly behaviour
                
            season = [0 if x in [3,4,5] else x for x in month]
            season = [1 if x in [6,7,8] else x for x in season]
            season = [2 if x in [9,10,11] else x for x in season]
            season = [3 if x in [1,2,12] else x for x in season]
            
            
            
                       
        except Exception as e: print(e)    

        return dayofweek, month, hourofday, season
                     
        
    def train_model(self, X_train, y_train):
        self.regres = RandomForestRegressor(max_depth=10, 
                                                random_state=0, 
                                                n_estimators=1000)
        self.regres.fit(X_train, y_train)
        
        
    def predict(self, X_test, y_test):
        exp_id = self.get_experiment_id()
        self.y_pred.loc[:, exp_id] = self.regres.predict(X_test)
        self.y_test.loc[:, exp_id] = y_test.values
        mse = self.get_error_measure(y_test, self.y_pred)
        
    
    def get_error_measure(self, y_test, y_pred):
        mse = ((y_test - y_pred)**2).mean(axis=0)
        return mse
    
                     
    def get_experiment_id(self):
        exp_available = self.params["total_experiments"] - self.params["init_samples"]
        
        if self.counter > exp_available:
            self.counter=0
            self.iter = self.iter + 1
            
        exp_count = str(self.counter)
        iter_count = str(self.iter)
        exp_id = "iter" + iter_count + "_exp" + exp_count
        self.counter = self.counter + 1
        return exp_id
       
    def get_random_tariff(self):
        self.year = random.randrange(2012,2013)
        self.month = random.randrange(1,12)
        self.day = random.randrange(1,28)
        self.hour = random.randrange(17,18)
        self.minute = random.choice([0,30])
        self.duration = random.randrange(6, 8)
        index = datetime(self.year, self.month, self.day, self.hour, self.minute, 0)
        h1_start = int(index.hour * 2) + int(index.minute / 30) 
        h1_end = h1_start + self.duration
        constraints = {"h1_start": h1_start, "h1_end": h1_end}
        level = 0       #dummy
        return level, constraints
        
                     
                     
                     
                     
    def run(self):
        
        self.df = pd.DataFrame(columns = ['expected', 
                                          'response', 
                                          'dow', 
                                          'season',
                                          'month',
                                          'hod'])
        
        self.get_random_samples()
                
        X_train, y_train= self.split_data(self.df)
        self.train_model(X_train, y_train)
        mse = []
        
        
        
        list_ = [i for i in range(self.params["total_experiments"] - self.params["init_samples"])]
        for exp in tqdm(list_):
            
            next_sample = self.sample_stream()
            X_test, y_test= self.split_data(next_sample)
            self.predict(X_test, y_test)
            self.df = pd.concat([self.df, next_sample], axis=0, sort=True)
            X_train, y_train= self.split_data(self.df)
            self.train_model(X_train, y_train)
            
                     
        return mse


In [6]:
def import_data():
    try:
        print("Reading aggregate consumption data...")
        df=pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/mod_datasets/aggregate_consumption.csv', sep=',', header=0, index_col=0, parse_dates=['GMT'], low_memory=False)
        df = df.drop_duplicates()
        print("Done")
        print("Reading weather data...")
        df_midas=pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/mod_datasets/midas_weather.csv', sep=',', header=0, index_col=0, parse_dates=['GMT'], low_memory=False)
        df_midas_rs = df_midas.resample('30T').mean()
        df_interpolated = df_midas_rs.interpolate(method='linear')
        df_weather = df_interpolated.loc['2013-01':'2013-12',:]
        df_final = pd.concat([df,df_weather], axis=1)
        print("Done")
        print("Reading LCL consumption data...")
        df_n=pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/UKDA-7857-csv/csv/data_collection/data_tables/consumption_n.csv', sep=',', header=0, index_col=0, parse_dates=['GMT'], low_memory=False)
        df_n = df_n.drop_duplicates()
        df_weath = df_interpolated.copy()
        print("Done")
        
    except Exception as e: print(e)
        
    return df_final, df_n, df_weath

In [7]:
def _init():
    df_final, df_n, df_weath = import_data()
    
    try:
        cons_model = ConsumptionModel(df_final, params)
        sim = Simulator(df_n.loc['2012-05':, :], df_weath.loc['2012-05':, :], params)
        trials_ = activeLearner(df_n.loc['2012-05':, :], df_weath.loc['2012-05':, :], params)
        
    except Exception as e: print(e)    
    
    return cons_model, sim, trials_#, selectsample

In [8]:
def plot_bokeh(y, x, title, xlabel, ylabel):
    output_notebook()
    file_name = "../temp/" + title + ".html"
    output_file(file_name) #Uncom`ment it to save the plot in html file
    

    p=figure(plot_width=800, plot_height=400, title = title, x_axis_label = xlabel, y_axis_label = ylabel,)
    p.line(x, y, line_width=1, color='blue')
    show(p)

In [None]:
if __name__ == '__main__':
#     # import data and declare classes
    cons_model, sim, trials_= _init()

#     # start the simulator and active learning by membership query synthesis

    trials_.run()
    list_ = [i for i in range(len(mse))]
    d = {'0':mse}
    
    for i in range(params["total_iterations"]):
        print("Iteration", i+1) 
        trials_.run()
    
    
#     title = "Mean Squared Error vs Number of samples"
#     xlabel = "Number of iterations"
#     ylabel = "MSE"
#     list_ = [i for i in range(params["total_experiments"])]
    
#     plot_bokeh(mse_total.mean(axis=1), list_, title, xlabel, ylabel)
    
    
    
    
    today = date.today()

    
    file_name = "../results/generated_data" + str(today) + ".csv"
    trials_.df.to_csv(file_name, sep='\t')
        
    file_name = "../results/predictions" + str(today) + ".csv"
    trials_.y_pred.to_csv(file_name, sep='\t')
    
    file_name = "../results/actuals" + str(today) + ".csv"
    trials_.y_test.to_csv(file_name, sep='\t')
    
    file_name = "../results/feature_set" + str(today) + ".txt"
    with open(file_name, "w") as output:
        output.write(str(params["X_var_activeL"]))
    


# Scratch book
.

.

.

.

.

.

.

.

.

.

.




















In [None]:
   # start the simulator and active learning by membership query synthesis
    mse_ActiveL, mse_Rand = trials_.run()
    list_ = [i for i in range(len(mse_ActiveL))]
    d1 = {'0':mse_ActiveL}
    mse_AL_total = pd.DataFrame(data=d1)
    d2 = {'0':mse_Rand}
    mse_Rand_total = pd.DataFrame(data=d2)
    
    for i in range(50):
        mse_ActiveL, mse_Rand = trials_.run()
        mse_AL_total.loc[:,str(i+1)] = mse_ActiveL
        mse_Rand_total.loc[:, str(i+1)] = mse_Rand
        
    plot_bokeh(mse_AL_total.mean(axis=1), mse_Rand_total.mean(axis=1), params, list_)

In [6]:
class SelectSample(object):
    def __init__(self, params):
        self.params = params
        
    def from_oracle(self, preds, X_test, y_test):
        #Select the point with maximum error
        df_y_test = y_test.reset_index()
        d = {'preds': preds}
        df_preds = pd.DataFrame(data = d)
        df_X_test = X_test.reset_index()
        
        error_ = (df_y_test['response']-df_preds['preds'])**2
        
        mse = ((df_y_test['response']-df_preds['preds'])**2).mean(axis=0)
        
        self.params["day_of_week"] = df_X_test.loc[error_.idxmax(),'dow']
        self.params["season"] = df_X_test.loc[error_.idxmax(),'season']
        
        # Generate new data point for above dow and season
        
        sim.select_day()
        level, constraints = trials_.get_random_tariff()
        sim.tariff_policy(level, constraints)
            
        response = sim.run()
        response_max = response.max()
          
        expected = sim.sample[sim.avail_users].mean(axis = 1).values
        expected_max = expected.max()
          
        dow, season = trials_.get_features()
    
        df = pd.DataFrame(columns = ['expected', 'response', 'dow', 'season'])
        df.loc[0] = [expected_max, response_max, dow, season]
        return df, mse
        
    def random(self):
        #Randomly select next data point
        sim.random_day()
        level, constraints = trials_.get_random_tariff()
        sim.tariff_policy(level, constraints)
            
        response = sim.run()
        response_max = response.max()
          
        expected = sim.sample[sim.avail_users].mean(axis = 1).values
        expected_max = expected.max()
          
        dow, season = trials_.get_features()
    
        df = pd.DataFrame(columns = ['expected', 'response', 'dow', 'season'])
        df.loc[0] = [expected_max, response_max, dow, season]
        return df, mse

In [None]:
df1 = pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/results/actuals2019-04-20.csv', sep='\t', low_memory=False)
df2 = df = pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/results/predictions2019-04-20.csv', sep='\t', low_memory=False)
df1-df2

In [19]:
def plot_bokeh2(y1, y2, x, title, xlabel, ylabel):
    output_notebook()
    file_name = "../temp/" + title + ".html"
    output_file(file_name) #Uncom`ment it to save the plot in html file
    

    p=figure(plot_width=800, plot_height=400, title = title, x_axis_label = xlabel, y_axis_label = ylabel,)
    p.line(x, y1, line_width=1, color='blue')
    p.line(x, y2, line_width=1, color='red')
    show(p)

In [None]:
# df1.mean(axis = 1)
title = "Actual and predictions"
xlabel = "half hour slots"
ylabel = "energy consumption"
list_ = [i for i in range(48)]

plot_bokeh2(df1["iter1_exp800"].values, df2["iter1_exp804"].values, list_, title, xlabel, ylabel)

In [None]:
# generate.experiment()
# level, constraints = generate.tariff()
# sim.tariff_policy(level, constraints)
# response = sim.run()
# response

a = trials_.y_test.reset_index()
d = {'preds': trials_.preds}
b = pd.DataFrame(data = d)

In [None]:
df1 = pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/results/actuals2019-04-19.csv', sep='\t', low_memory=False)
df2 = df = pd.read_csv('~/Documents/work/Active-Learning-TUD-Thesis/results/predictions2019-04-19.csv', sep='\t', low_memory=False)
