In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import holidays
import itertools
from sklearn.linear_model import LinearRegression, Ridge
# from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, max_error, mean_absolute_error
import time

# 1. Import Data

In [3]:
def import_ridership_weather_data():
  # Import ridership data
  data_dir = '/content/drive/My Drive/MIE498 Thesis/0_Data'
  merged_bike_data_2019 = pd.read_csv("{}/ridership_2019_with_bike_stations_info_20200930.csv".format(data_dir), header=0)
  print(merged_bike_data_2019.shape)
  merged_bike_data_2019['End Day of Year'] = merged_bike_data_2019.apply(lambda row: datetime.strptime(row['End Time'], "%Y-%m-%d %H:%M:%S").timetuple().tm_yday, axis=1)
  print(merged_bike_data_2019.shape)

  merged_bike_data_2019['Start Time'] = merged_bike_data_2019.apply(lambda row: datetime.strptime(row['Start Time'], "%Y-%m-%d %H:%M:%S"), axis=1)
  merged_bike_data_2019['Start Day'] = merged_bike_data_2019.apply(lambda row: row['Start Time'].day, axis=1)
  merged_bike_data_2019['End Time'] = merged_bike_data_2019.apply(lambda row: datetime.strptime(row['End Time'], "%Y-%m-%d %H:%M:%S"), axis=1)
  merged_bike_data_2019['End Day'] = merged_bike_data_2019.apply(lambda row: row['End Time'].day, axis=1)

  # Import weather data
  df_weather = pd.read_csv('/content/drive/My Drive/MIE498 Thesis/Share-Bike-Station-Clustering-and-Usage-Prediction/toronto_weather_2019.csv', index_col=None)
  df_weather['Month'] = df_weather['Month'].astype("float64")
  df_weather['Day'] = df_weather['Day'].astype("float64")
  df_weather['Hour'] = df_weather['Hour'].astype("float64")
  df_weather['Day of Week'] = df_weather['Day of Week'].astype("float64")

  return merged_bike_data_2019, df_weather

In [4]:
def merge_clustering_data(nClusteringIterations, ridership_data, weather_data, k2):
  merged_data = merged_bike_data_2019.copy()
  # Import cluster-station-assignment data
  df_data_w_clusters = pd.read_csv('/content/drive/My Drive/MIE498 Thesis/Share-Bike-Station-Clustering-and-Usage-Prediction/clustering_results/station_data_w_clusters_{}iterations_k2={}.csv'.format(nClusteringIterations, k2), index_col=0)
  merged_data = merged_data.merge(df_data_w_clusters[['station_id', 'cluster']], how='left', left_on='Start Station Id', right_on='station_id').drop('station_id', axis=1)
  merged_data.rename({'cluster': 'Start Cluster'}, axis=1, inplace=True)
  merged_data = merged_data.merge(df_data_w_clusters[['station_id', 'cluster']], how='left', left_on='End Station Id', right_on='station_id').drop('station_id', axis=1)
  merged_data.rename({'cluster': 'End Cluster'}, axis=1, inplace=True)

  merged_data = merged_data.merge(df_weather[['Month', 'Day', 'Hour', 'Temperature (Celsius)']], how='left', left_on=['Start Month', 'Start Day', 'Start Hour'], right_on=['Month', 'Day', 'Hour']).drop(['Month', 'Day', 'Hour'], axis=1)
  merged_data.rename({'Temperature (Celsius)': 'Start Temp'}, axis=1, inplace=True)
  merged_data = merged_data.merge(df_weather[['Month', 'Day', 'Hour', 'Temperature (Celsius)']], how='left', left_on=['End Month', 'End Day', 'End Hour'], right_on=['Month', 'Day', 'Hour']).drop(['Month', 'Day', 'Hour'], axis=1)
  merged_data.rename({'Temperature (Celsius)': 'End Temp'}, axis=1, inplace=True)

  return merged_data

In [5]:
def check_weekend(dayofweek):
    if dayofweek > 4:
      return 'weekend'
    else:
      return 'weekday'

In [6]:
def filter_checkout_checkin_data(merged_data):
# check-out and check-in data split
  checkout_data = merged_data[['Trip Id', 'Start Station Id', 'Start Time', 'Start Station Name', 'Start Year', 'Start Month', 'Start Hour',
        'Start Day of Week', 'Start Holiday', 'Start Day of Year', 'Start Week of Year', 'Start Lat', 'Start Lon', 'Start Cluster', 'Start Temp']]
  checkout_data['weekday/weekend'] = checkout_data.apply(lambda row: check_weekend(row['Start Day of Week']), axis=1)

  checkin_data = merged_data[['Trip Id', 'End Station Id', 'End Time', 'End Station Name', 'End Year', 'End Month',
        'End Hour', 'End Day of Week', 'End Holiday', 'End Lat', 'End Lon', 'End Day of Year', 'End Cluster', 'End Temp']]
  checkin_data['weekday/weekend'] = checkin_data.apply(lambda row: check_weekend(row['End Day of Week']), axis=1)

  return checkout_data, checkin_data

# 2. Class Definition

In [7]:
class BikeDemand:
  def __init__(self, nTransitions, tempflag):
    self.nTransitionMatrix = nTransitions
    self.tempFlag = tempflag
    self.nClusters = None

  # Training: Months 1-9, Testing: Months 10-12
  def train_test_split(self, data, start_end_flag):
    training_data = data.loc[(data['{} Month'.format(start_end_flag)] >= 1) & (data['{} Month'.format(start_end_flag)] <= 9)]
    testing_data = data.loc[(data['{} Month'.format(start_end_flag)] >= 10) & (data['{} Month'.format(start_end_flag)] <= 12)]
    print(data.shape, training_data.shape, testing_data.shape)
    return training_data, testing_data
  
  def fill_in_missing_combinations(self, data, start_end_flag):
    # Check missing combinations
    
    hours = np.arange(24)
    # dayofyear = data['{} Day of Year'.format(start_end_flag)].unique()
    weekday_weekend = ['weekday', 'weekend']
    clusters = np.arange(self.nClusters)

    combinations = itertools.product(hours, weekday_weekend, clusters)
    features_list = ['{} Hour'.format(start_end_flag), 'weekday/weekend', '{} Cluster'.format(start_end_flag)]
    df_combinations = data[features_list].to_numpy().astype('str')
    df_combinations = df_combinations.tolist()
    comb_list = []
    for comb in combinations:
      comb = [str(comb[0]), comb[1], str(comb[2])]
      if comb not in df_combinations:
        # print(comb)
        comb_list.append(comb)
    print('number of missing combinations:', len(comb_list))

    # Fill in missing combinations
    for comb in comb_list:
      hour = int(comb[0])
      weekday_weekend = comb[1]
      cluster = int(comb[2])
      df_select = data[(data['{} Hour'.format(start_end_flag)] == hour) & (data['weekday/weekend'] == weekday_weekend)]
      
      clusters_list = data['{} Cluster'.format(start_end_flag)].unique()
      for c in clusters:
        if c not in clusters_list:
          if self.tempFlag == True:
            for temperature in df_select['{} Temp'.format(start_end_flag)].unique():
              for doy in df_select[df_select['{} Temp'.format(start_end_flag)] == temperature]['{} Day of Year'.format(start_end_flag)].unique():
                new_row = {'{} Hour'.format(start_end_flag) : hour, 'weekday/weekend' : weekday_weekend, 
                            '{} Cluster'.format(start_end_flag) : cluster, '{} Day of Year'.format(start_end_flag) : doy,
                            '{} Temp'.format(start_end_flag) : temperature, 'Trip Id': 0.000001}
                data = data.append(new_row, ignore_index = True)
          else:
            for doy in df_select['{} Day of Year'.format(start_end_flag)].unique():
                new_row = {'{} Hour'.format(start_end_flag) : hour, 'weekday/weekend' : weekday_weekend, 
                            '{} Cluster'.format(start_end_flag) : cluster, '{} Day of Year'.format(start_end_flag) : doy,
                            'Trip Id': 0.000001}
                data = data.append(new_row, ignore_index = True)
    return data
  
  def preprocess_data(self, train_data, test_data, start_end_flag):
    features_list = ['Trip Id', '{} Hour'.format(start_end_flag), 'weekday/weekend', '{} Cluster'.format(start_end_flag), '{} Day of Year'.format(start_end_flag)]
    grouping_features_list = ['{} Hour'.format(start_end_flag), 'weekday/weekend', '{} Cluster'.format(start_end_flag), '{} Day of Year'.format(start_end_flag)]
    reset_levels_list = [0,1,2,3]
    if self.tempFlag == True:
      features_list += ['{} Temp'.format(start_end_flag)]
      grouping_features_list += ['{} Temp'.format(start_end_flag)]
      reset_levels_list += [4]
    train_data = train_data[features_list]
    train_data = train_data.groupby(by=grouping_features_list).count()
    # print(train_data.head())
    train_data = train_data.reset_index(level=reset_levels_list)
    train_data['{} Hour'.format(start_end_flag)] = train_data['{} Hour'.format(start_end_flag)].astype("int64")

    test_data = test_data[features_list]
    test_data = test_data.groupby(by=grouping_features_list).count()
    test_data = test_data.reset_index(level=reset_levels_list)
    test_data['{} Hour'.format(start_end_flag)] = test_data['{} Hour'.format(start_end_flag)].astype("int64")

    train_data = self.fill_in_missing_combinations(train_data, start_end_flag)
    test_data = self.fill_in_missing_combinations(test_data, start_end_flag)

    if start_end_flag == 'Start':
      name = 'Number of Checkouts'
    else:
      name = 'Number of Checkins'

    train_data = train_data.sort_values(by=grouping_features_list)
    train_data.rename({'Trip Id': name}, axis=1, inplace=True)
    train_data.reset_index(drop=True, inplace=True)

    test_data = test_data.sort_values(by=grouping_features_list)
    test_data.rename({'Trip Id': name}, axis=1, inplace=True)
    test_data.reset_index(drop=True, inplace=True)

    print('Train Data Shape: ', train_data.shape)
    print('Test Data Shape: ', test_data.shape)

    return train_data, test_data

  def predict_checkout(self, checkout_train, checkout_test):
    features_list = ['Start Hour', 'weekday/weekend', 'Start Cluster']
    if self.tempFlag == True:
      features_list += ['Start Temp']
    X_train = checkout_train[features_list]
    X_train = pd.get_dummies(data=X_train, columns=['Start Hour', 'weekday/weekend', 'Start Cluster'], drop_first=True).to_numpy()
    y_train = checkout_train[['Number of Checkouts']].to_numpy()

    X_test = checkout_test[features_list]
    X_test = pd.get_dummies(data=X_test, columns=['Start Hour', 'weekday/weekend', 'Start Cluster'], drop_first=True).to_numpy()
    y_test = checkout_test[['Number of Checkouts']].to_numpy()

    y_train_log, y_test_log = np.log(y_train), np.log(y_test)
    checkout_linreg = LinearRegression().fit(X_train, y_train_log)

    y_train_pred = np.exp(checkout_linreg.predict(X_train))
    y_test_pred = np.exp(checkout_linreg.predict(X_test))

    return X_train, y_train, y_train_pred, X_test, y_test, y_test_pred
  
  def evaluate(self, y_true, y_pred):
    mse = round(mean_squared_error(y_true, y_pred, squared=True),3) # MSE
    rmse = round(mean_squared_error(y_true, y_pred, squared=False),3) # RMSE
    mre = round(max_error(y_true, y_pred),3) # maximum residual error
    mae = round(mean_absolute_error(y_true, y_pred),3) # MAE
    # r2 = round(r2_score(y_true, y_pred),3)
    # return mse, rmse, mre, mae, r2
    return mse, rmse, mre, mae

  def generate_transition_matrix(self, df_train_select):
    from_cluster_list = list(np.arange(0, self.nClusters))
    df_transition_matrix = pd.DataFrame(data=np.zeros((self.nClusters, self.nClusters)), columns = from_cluster_list, index=from_cluster_list)
    df_transition_matrix = df_transition_matrix.astype("int")

    for from_cluster in from_cluster_list:
      data_train_temp = df_train_select[df_train_select['Start Cluster'] == from_cluster]
      df_counts = pd.DataFrame(data_train_temp['End Cluster'].value_counts())
      
      # print(df_counts.head())
      to_cluster_list = list(df_counts.index)
      
      for to_cluster in to_cluster_list:
        cnt = df_counts.loc[to_cluster]['End Cluster']
        df_transition_matrix.loc[from_cluster, to_cluster] = cnt
      
      df_transition_matrix = df_transition_matrix.div(df_transition_matrix.sum(axis=1), axis=0)

    df_transition_matrix.replace(np.nan, 0, inplace=True)
    return df_transition_matrix
  
  def generate_transition_matrix_dict(self, data_train):
    n = self.nTransitionMatrix
    transition_matrix_dict = {}
    if n == 1:
      return self.generate_transition_matrix(data_train)
    
    elif n == 24:
      for hr in data_train['Start Hour'].unique():
        df_train_select = data_train[data_train['Start Hour'] == hr]
        transition_matrix = self.generate_transition_matrix(df_train_select)
        transition_matrix_dict[hr] = transition_matrix
      return transition_matrix_dict
    
    elif n == 48:
      for hr in data_train['Start Hour'].unique():
        for wd in data_train['weekday/weekend'].unique():
          df_train_select = data_train[(data_train['Start Hour'] == hr) & (data_train['weekday/weekend'] == wd)]
          transition_matrix = self.generate_transition_matrix(df_train_select)
          transition_matrix_dict[(hr, wd)] = transition_matrix
      return transition_matrix_dict
  
  def predict_checkin_by_cluster(self, df_predictions, df_transition_matrix):
    checkin_prediction_list = []
    columns_list = ['End Cluster {}'.format(c) for c in np.arange(self.nClusters)]
    df_checkin = pd.DataFrame(columns = columns_list)

    if self.nTransitionMatrix == 1:
      for idx, row in df_predictions.iterrows():
        if idx % 5000 == 0:
          print(idx)
        checkout_prediction = row['Check-out Predictions']
        cluster_n = row['Start Cluster']
        transition_list = np.array(df_transition_matrix.iloc[cluster_n])
        # check-out predictions x transition matrix = check-in predictions
        checkin_prediction = np.round(transition_list * checkout_prediction, 6)
        checkin_prediction = pd.Series(checkin_prediction, index = df_checkin.columns)
        df_checkin = df_checkin.append(checkin_prediction, ignore_index=True)
    elif self.nTransitionMatrix == 24:
      for idx, row in df_predictions.iterrows():
        if idx % 5000 == 0:
          print(idx)
        checkout_prediction = row['Check-out Predictions']
        cluster_n = row['Start Cluster']
        hour = row['Start Hour']
        transition_list = np.array(df_transition_matrix[hour].iloc[cluster_n])
        # check-out predictions x transition matrix = check-in predictions
        checkin_prediction = np.round(transition_list * checkout_prediction, 6)
        checkin_prediction = pd.Series(checkin_prediction, index = df_checkin.columns)
        df_checkin = df_checkin.append(checkin_prediction, ignore_index=True)
    elif self.nTransitionMatrix == 48:
      for idx, row in df_predictions.iterrows():
        if idx % 5000 == 0:
          print(idx)
        checkout_prediction = row['Check-out Predictions']
        cluster_n = row['Start Cluster']
        hour, weekday = row['Start Hour'], row['weekday/weekend']
        transition_list = np.array(df_transition_matrix[(hour, weekday)].iloc[cluster_n])
        # check-out predictions x transition matrix = check-in predictions
        checkin_prediction = np.round(transition_list * checkout_prediction, 6)
        checkin_prediction = pd.Series(checkin_prediction, index = df_checkin.columns)
        df_checkin = df_checkin.append(checkin_prediction, ignore_index=True)
    
    return pd.concat([df_predictions, df_checkin], axis=1)
  
  def compute_true_checkin(self, checkin_test):
    features_list = ['End Hour', 'weekday/weekend', 'End Day of Year']
    data_columns = ['End Hour','weekday/weekend', 'End Cluster', 'Number of Checkins']
    if self.tempFlag == True:
      features_list += ['End Temp']
      data_columns += ['End Temp']
    df = checkin_test[features_list].drop_duplicates()
    clusters = np.arange(self.nClusters)
    df_checkin = pd.DataFrame(columns = data_columns)
    for idx, row in df.iterrows():
      if self.tempFlag == True:
        hr, wd, edoy, temp = row[0], row[1], row[2], row[3]
        df_temp = checkin_test[(checkin_test['End Hour'] == hr) & (checkin_test['weekday/weekend'] == wd) & 
                             (checkin_test['End Day of Year'] == edoy) & (checkin_test['End Temp'] == temp)]
      else: 
        hr, wd, edoy = row[0], row[1], row[2]
        df_temp = checkin_test[(checkin_test['End Hour'] == hr) & (checkin_test['weekday/weekend'] == wd) & 
                             (checkin_test['End Day of Year'] == edoy)]
      
      clusters_list = df_temp['End Cluster'].unique()
      for c in clusters:
        if c not in clusters_list:
          if self.tempFlag == True:
            new_row = {'End Hour' : hr, 'weekday/weekend' : wd, 'End Cluster' : c, 'End Day of Year' : edoy, 'End Temp' : temp, 'Number of Checkins': 0.000001}
          else:
            new_row = {'End Hour' : hr, 'weekday/weekend' : wd, 'End Cluster' : c, 'End Day of Year' : edoy, 'Number of Checkins': 0.000001}
          df_temp = df_temp.append(new_row, ignore_index = True)
      if self.tempFlag == True:
        df_temp.sort_values(by=['End Hour', 'weekday/weekend', 'End Cluster', 'End Day of Year', 'End Temp'], inplace=True)
      else:
        df_temp.sort_values(by=['End Hour', 'weekday/weekend', 'End Cluster', 'End Day of Year'], inplace=True)
      df_temp.reset_index(drop=True, inplace=True)
      if len(df_temp) != self.nClusters:
        print(df_temp)
      df_checkin = df_checkin.append(df_temp, ignore_index = True)
        
    return df_checkin
  
  def run_prediction_pipeline(self, merged_data, checkout_data, checkin_data):
    self.nClusters = len(merged_data['Start Cluster'].unique())
    print(self.nClusters)

    print('Checkout predictions')
    checkout_train, checkout_test = self.train_test_split(checkout_data, 'Start')
    checkout_train, checkout_test = self.preprocess_data(checkout_train, checkout_test, 'Start')
    X_train, y_train, y_train_pred, X_test, y_test, y_test_pred = self.predict_checkout(checkout_train, checkout_test)

    print('Checkout training and testing errors')
    mse_train, rmse_train, mre_train, mae_train = self.evaluate(y_train, y_train_pred)
    mse_test, rmse_test, mre_test, mae_test = self.evaluate(y_test, y_test_pred)
    print('training errors: ', mse_train, rmse_train, mre_train, mae_train)
    print('testing errors: ', mse_test, rmse_test, mre_test, mae_test)

    print('Transition Matrix Computation')
    data_train, data_test = self.train_test_split(merged_data, 'Start')
    data_train['weekday/weekend'] = data_train.apply(lambda row: check_weekend(row['Start Day of Week']), axis=1)
    df_transition_matrix = self.generate_transition_matrix_dict(data_train)

    print('Summarize checkout predictions')
    if self.tempFlag == True:
      df_predictions = checkout_test[['Start Hour', 'weekday/weekend', 'Start Cluster', 'Start Temp']]
    else:
      df_predictions = checkout_test[['Start Hour', 'weekday/weekend', 'Start Cluster']]
    df_predictions['Check-out Predictions'], df_predictions['Check-out True Values'] = y_test_pred, y_test
    df_predictions = self.predict_checkin_by_cluster(df_predictions, df_transition_matrix)

    print('Checkin predictions')
    checkin_train, checkin_test = self.train_test_split(checkin_data, 'End')
    checkin_train, checkin_test = self.preprocess_data(checkin_train, checkin_test, 'End')
    df_checkin = self.compute_true_checkin(checkin_test)
    checkin_pred = []
    
    if self.tempFlag == True:
      df_checkin_row = df_checkin[['End Hour', 'weekday/weekend', 'End Temp', 'End Day of Year']].drop_duplicates()
    else:
      df_checkin_row = df_checkin[['End Hour', 'weekday/weekend', 'End Day of Year']].drop_duplicates()
      df_predictions2_cp = df_predictions.copy()
      df_predictions2_cp.drop(columns=['Check-out True Values'], inplace=True)
      df_predictions2_cp = df_predictions2_cp.drop_duplicates()
    
    for idx, row in df_checkin_row.iterrows():
      if self.tempFlag == True:
        hr, wd, temp = row[0], row[1], row[2]
        temp_pred_list = df_predictions[(df_predictions['Start Hour'] == hr) & (df_predictions['weekday/weekend'] == wd) & (df_predictions['Start Temp'] == temp)].iloc[:, 6:].sum(axis=0)
      else:
        hr, wd = row[0], row[1]
        temp_pred_list = df_predictions2_cp[(df_predictions['Start Hour'] == hr) & (df_predictions2_cp['weekday/weekend'] == wd)].iloc[:, 4:].sum(axis=0)
      
      if len(temp_pred_list) != self.nClusters:
        print(len(temp_pred_list))
      checkin_pred += temp_pred_list.to_list()
    df_checkin['Predicted Number of Checkins'] = checkin_pred

    print('Evaluate the predictions')
    checkout_true = df_predictions['Check-out True Values'].to_numpy()
    checkout_pred = df_predictions['Check-out Predictions'].to_numpy()
    checkin_true = df_checkin[['Number of Checkins']].to_numpy()
    checkin_pred = df_checkin[['Predicted Number of Checkins']].to_numpy()

    print('Checkout evaluation:')
    checkout_mse, checkout_rmse, checkout_mre, checkout_mae = self.evaluate(checkout_true, checkout_pred)
    print('mse, rmse, mre, mae: ', checkout_mse, checkout_rmse, checkout_mre, checkout_mae)
    print('Checkin evaluation:')
    checkin_mse, checkin_rmse, checkin_mre, checkin_mae = self.evaluate(checkin_true, checkin_pred)
    print('mse, rmse, mre, mae: ', checkin_mse, checkin_rmse, checkin_mre, checkin_mae)
    # return checkout_mse, checkout_rmse, checkout_mre, checkout_mae, checkin_mse, checkin_rmse, checkin_mre, checkin_mae
    return checkout_test, checkin_test, df_transition_matrix, df_predictions, df_checkin, checkout_mse, checkout_rmse, checkout_mre, checkout_mae, checkin_mse, checkin_rmse, checkin_mre, checkin_mae

# 3. Run Pipeline

In [8]:
merged_bike_data_2019, df_weather = import_ridership_weather_data()

(2438720, 26)
(2438720, 27)


In [9]:
def run_scenarios(df_checkout_errs, df_checkin_errs, n_clustering_iterations_list, n_transition_matrix_list, temperatureFlag_list, k2):
  savepath_folder = '/content/drive/My Drive/MIE498 Thesis/Share-Bike-Station-Clustering-and-Usage-Prediction/prediction_results/'
  for n_clustering_iterations in n_clustering_iterations_list:
    merged_data = merge_clustering_data(nClusteringIterations=n_clustering_iterations, ridership_data=merged_bike_data_2019, weather_data=df_weather, k2=k2)
    checkout_data, checkin_data = filter_checkout_checkin_data(merged_data)
    for temperatureFlag in temperatureFlag_list:
      for n_transition_matrix in n_transition_matrix_list:
        print(n_clustering_iterations, temperatureFlag, n_transition_matrix)
        start_time = time.time()
        bikedemand = BikeDemand(nTransitions=n_transition_matrix, tempflag=temperatureFlag)
        checkout_test, checkin_test, df_transition_matrix, df_predictions, df_checkin, checkout_mse, checkout_rmse, checkout_mre, checkout_mae, checkin_mse, checkin_rmse, checkin_mre, checkin_mae = bikedemand.run_prediction_pipeline(merged_data, checkout_data, checkin_data)
        savepath_checkout = savepath_folder + 'CheckoutPred_nClusteringIter={}_nTMs={}_Temp={}_K2={}.csv'.format(n_clustering_iterations, n_transition_matrix, temperatureFlag, k2)
        savepath_checkin = savepath_folder + 'CheckinPred_nClusteringIter={}_nTMs={}_Temp={}_K2={}.csv'.format(n_clustering_iterations, n_transition_matrix, temperatureFlag, k2)
        # df_predictions.to_csv(savepath_checkout)
        # df_checkin.to_csv(savepath_checkin)
        print("Total Time Elapsed: ", time.time() - start_time)

        # Save results
        df_temp = {'n Clustering Iterations' : n_clustering_iterations, 'n Transition Matrix' : bikedemand.nTransitionMatrix, 'Temperature': bikedemand.tempFlag,'mse': checkout_mse, 'rmse': checkout_rmse, 'mre': checkout_mre, 'mae': checkout_mae}
        df_checkout_errs = df_checkout_errs.append(df_temp, ignore_index=True)
        df_temp = {'n Clustering Iterations' : n_clustering_iterations, 'n Transition Matrix' : bikedemand.nTransitionMatrix, 'Temperature': bikedemand.tempFlag,'mse': checkin_mse, 'rmse': checkin_rmse, 'mre': checkin_mre, 'mae': checkin_mae}
        df_checkin_errs = df_checkin_errs.append(df_temp, ignore_index=True)
  savepath_checkout_errors = savepath_folder + 'CheckoutErrors_K2={}.csv'.format(k2)
  savepath_checkin_errors = savepath_folder + 'CheckinErrors_K2={}.csv'.format(k2)
  df_checkout_errs.to_csv(savepath_checkout_errors)
  df_checkin_errs.to_csv(savepath_checkin_errors)
  return df_checkout_errs, df_checkin_errs

## k2 = 8

In [84]:
k2 = 8
n_clustering_iterations_list = [1, 5]
n_transition_matrix_list = [1, 24, 48]
temperatureFlag_list = [True, False]
df_checkout_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkin_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkout_errs, df_checkin_errs = run_scenarios(df_checkout_errs, df_checkin_errs, n_clustering_iterations_list, n_transition_matrix_list, temperatureFlag_list, k2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


1 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 6)
Test Data Shape:  (52809, 6)
Checkout training and testing errors
training errors:  148.101 12.17 251.137 5.852
testing errors:  95.145 9.754 154.416 4.758
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157061, 6)
Test Data Shape:  (52197, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.145 9.754 154.416 4.758
Checkin evaluation:
mse, rmse, mre, mae:  63.115 7.944 159.067 3.359
Total Time Elapsed:  426.4751946926117
1 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 6)
Test Data Shape:  (52809, 6)
Checkout training and testing errors
training errors:  148.101 12.17 251.137 5.852
testing errors:  95.145 9.754 154.416 4.758
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157061, 6)
Test Data Shape:  (52197, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.145 9.754 154.416 4.758
Checkin evaluation:
mse, rmse, mre, mae:  54.113 7.356 143.145 3.182
Total Time Elapsed:  437.5776686668396
1 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 6)
Test Data Shape:  (52809, 6)
Checkout training and testing errors
training errors:  148.101 12.17 251.137 5.852
testing errors:  95.145 9.754 154.416 4.758
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157061, 6)
Test Data Shape:  (52197, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.145 9.754 154.416 4.758
Checkin evaluation:
mse, rmse, mre, mae:  52.536 7.248 142.612 3.146
Total Time Elapsed:  446.249294757843
1 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 5)
Test Data Shape:  (52809, 5)
Checkout training and testing errors
training errors:  242.428 15.57 278.634 8.028
testing errors:  97.134 9.856 159.194 5.284
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157059, 5)
Test Data Shape:  (52197, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.134 9.856 159.194 5.284
Checkin evaluation:
mse, rmse, mre, mae:  70.502 8.397 174.435 4.025
Total Time Elapsed:  439.9011433124542
1 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 5)
Test Data Shape:  (52809, 5)
Checkout training and testing errors
training errors:  242.428 15.57 278.634 8.028
testing errors:  97.134 9.856 159.194 5.284
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157059, 5)
Test Data Shape:  (52197, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.134 9.856 159.194 5.284
Checkin evaluation:
mse, rmse, mre, mae:  62.685 7.917 153.819 3.855
Total Time Elapsed:  445.587929725647
1 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 60
Train Data Shape:  (158529, 5)
Test Data Shape:  (52809, 5)
Checkout training and testing errors
training errors:  242.428 15.57 278.634 8.028
testing errors:  97.134 9.856 159.194 5.284
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 55
Train Data Shape:  (157059, 5)
Test Data Shape:  (52197, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.134 9.856 159.194 5.284
Checkin evaluation:
mse, rmse, mre, mae:  60.455 7.775 152.217 3.817
Total Time Elapsed:  460.6041896343231


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


5 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 6)
Test Data Shape:  (52957, 6)
Checkout training and testing errors
training errors:  161.483 12.708 226.451 5.92
testing errors:  103.092 10.153 164.487 4.785
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158246, 6)
Test Data Shape:  (52639, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  103.092 10.153 164.487 4.785
Checkin evaluation:
mse, rmse, mre, mae:  68.821 8.296 185.482 3.405
Total Time Elapsed:  431.7338206768036
5 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 6)
Test Data Shape:  (52957, 6)
Checkout training and testing errors
training errors:  161.483 12.708 226.451 5.92
testing errors:  103.092 10.153 164.487 4.785
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158246, 6)
Test Data Shape:  (52639, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  103.092 10.153 164.487 4.785
Checkin evaluation:
mse, rmse, mre, mae:  56.899 7.543 149.575 3.196
Total Time Elapsed:  434.57317304611206
5 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 6)
Test Data Shape:  (52957, 6)
Checkout training and testing errors
training errors:  161.483 12.708 226.451 5.92
testing errors:  103.092 10.153 164.487 4.785
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158246, 6)
Test Data Shape:  (52639, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  103.092 10.153 164.487 4.785
Checkin evaluation:
mse, rmse, mre, mae:  54.8 7.403 147.423 3.159
Total Time Elapsed:  465.4459753036499
5 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 5)
Test Data Shape:  (52957, 5)
Checkout training and testing errors
training errors:  258.22 16.069 257.234 8.043
testing errors:  105.024 10.248 169.438 5.31
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158244, 5)
Test Data Shape:  (52639, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  105.024 10.248 169.438 5.31
Checkin evaluation:
mse, rmse, mre, mae:  76.595 8.752 193.14 4.055
Total Time Elapsed:  442.0597426891327
5 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 5)
Test Data Shape:  (52957, 5)
Checkout training and testing errors
training errors:  258.22 16.069 257.234 8.043
testing errors:  105.024 10.248 169.438 5.31
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158244, 5)
Test Data Shape:  (52639, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  105.024 10.248 169.438 5.31
Checkin evaluation:
mse, rmse, mre, mae:  66.439 8.151 160.438 3.858
Total Time Elapsed:  448.1070513725281
5 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 10
number of missing combinations: 53
Train Data Shape:  (159074, 5)
Test Data Shape:  (52957, 5)
Checkout training and testing errors
training errors:  258.22 16.069 257.234 8.043
testing errors:  105.024 10.248 169.438 5.31
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 6
number of missing combinations: 50
Train Data Shape:  (158244, 5)
Test Data Shape:  (52639, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  105.024 10.248 169.438 5.31
Checkin evaluation:
mse, rmse, mre, mae:  63.564 7.973 158.436 3.819
Total Time Elapsed:  461.68870210647583


In [85]:
df_checkout_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,95.145,9.754,154.416,4.758
1,1,24,True,95.145,9.754,154.416,4.758
2,1,48,True,95.145,9.754,154.416,4.758
3,1,1,False,97.134,9.856,159.194,5.284
4,1,24,False,97.134,9.856,159.194,5.284
5,1,48,False,97.134,9.856,159.194,5.284
6,5,1,True,103.092,10.153,164.487,4.785
7,5,24,True,103.092,10.153,164.487,4.785
8,5,48,True,103.092,10.153,164.487,4.785
9,5,1,False,105.024,10.248,169.438,5.31


In [86]:
df_checkin_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,63.115,7.944,159.067,3.359
1,1,24,True,54.113,7.356,143.145,3.182
2,1,48,True,52.536,7.248,142.612,3.146
3,1,1,False,70.502,8.397,174.435,4.025
4,1,24,False,62.685,7.917,153.819,3.855
5,1,48,False,60.455,7.775,152.217,3.817
6,5,1,True,68.821,8.296,185.482,3.405
7,5,24,True,56.899,7.543,149.575,3.196
8,5,48,True,54.8,7.403,147.423,3.159
9,5,1,False,76.595,8.752,193.14,4.055


In [87]:
k2 = 8
n_clustering_iterations_list = [10, 15, 20]
n_transition_matrix_list = [1, 24, 48]
temperatureFlag_list = [True, False]
df_checkout_errs, df_checkin_errs = run_scenarios(df_checkout_errs, df_checkin_errs, n_clustering_iterations_list, n_transition_matrix_list, temperatureFlag_list, k2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


10 True 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 6)
Test Data Shape:  (50563, 6)
Checkout training and testing errors
training errors:  172.293 13.126 272.843 6.122
testing errors:  116.434 10.79 212.083 4.998
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151463, 6)
Test Data Shape:  (49855, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.434 10.79 212.083 4.998
Checkin evaluation:
mse, rmse, mre, mae:  76.784 8.763 218.844 3.467
Total Time Elapsed:  404.84883522987366
10 True 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 6)
Test Data Shape:  (50563, 6)
Checkout training and testing errors
training errors:  172.293 13.126 272.843 6.122
testing errors:  116.434 10.79 212.083 4.998
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151463, 6)
Test Data Shape:  (49855, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.434 10.79 212.083 4.998
Checkin evaluation:
mse, rmse, mre, mae:  64.003 8.0 155.526 3.26
Total Time Elapsed:  417.74930906295776
10 True 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 6)
Test Data Shape:  (50563, 6)
Checkout training and testing errors
training errors:  172.293 13.126 272.843 6.122
testing errors:  116.434 10.79 212.083 4.998
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151463, 6)
Test Data Shape:  (49855, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.434 10.79 212.083 4.998
Checkin evaluation:
mse, rmse, mre, mae:  61.548 7.845 152.718 3.225
Total Time Elapsed:  439.0485451221466
10 False 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 5)
Test Data Shape:  (50563, 5)
Checkout training and testing errors
training errors:  282.37 16.804 305.787 8.364
testing errors:  118.008 10.863 221.787 5.531
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151461, 5)
Test Data Shape:  (49855, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.008 10.863 221.787 5.531
Checkin evaluation:
mse, rmse, mre, mae:  85.35 9.239 246.406 4.129
Total Time Elapsed:  431.13097286224365
10 False 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 5)
Test Data Shape:  (50563, 5)
Checkout training and testing errors
training errors:  282.37 16.804 305.787 8.364
testing errors:  118.008 10.863 221.787 5.531
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151461, 5)
Test Data Shape:  (49855, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.008 10.863 221.787 5.531
Checkin evaluation:
mse, rmse, mre, mae:  74.27 8.618 207.432 3.937
Total Time Elapsed:  433.9732322692871
10 False 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 29
number of missing combinations: 81
Train Data Shape:  (152874, 5)
Test Data Shape:  (50563, 5)
Checkout training and testing errors
training errors:  282.37 16.804 305.787 8.364
testing errors:  118.008 10.863 221.787 5.531
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 19
number of missing combinations: 88
Train Data Shape:  (151461, 5)
Test Data Shape:  (49855, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.008 10.863 221.787 5.531
Checkin evaluation:
mse, rmse, mre, mae:  70.733 8.41 204.743 3.897
Total Time Elapsed:  485.54336953163147


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


15 True 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 6)
Test Data Shape:  (51515, 6)
Checkout training and testing errors
training errors:  189.229 13.756 332.595 6.086
testing errors:  116.662 10.801 207.699 4.909
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153936, 6)
Test Data Shape:  (51193, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.662 10.801 207.699 4.909
Checkin evaluation:
mse, rmse, mre, mae:  79.887 8.938 247.06 3.479
Total Time Elapsed:  420.6315824985504
15 True 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 6)
Test Data Shape:  (51515, 6)
Checkout training and testing errors
training errors:  189.229 13.756 332.595 6.086
testing errors:  116.662 10.801 207.699 4.909
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153936, 6)
Test Data Shape:  (51193, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.662 10.801 207.699 4.909
Checkin evaluation:
mse, rmse, mre, mae:  65.96 8.122 186.825 3.283
Total Time Elapsed:  425.91599440574646
15 True 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 6)
Test Data Shape:  (51515, 6)
Checkout training and testing errors
training errors:  189.229 13.756 332.595 6.086
testing errors:  116.662 10.801 207.699 4.909
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153936, 6)
Test Data Shape:  (51193, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  116.662 10.801 207.699 4.909
Checkin evaluation:
mse, rmse, mre, mae:  63.399 7.962 183.357 3.246
Total Time Elapsed:  436.83304166793823
15 False 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 5)
Test Data Shape:  (51515, 5)
Checkout training and testing errors
training errors:  302.581 17.395 373.957 8.264
testing errors:  118.851 10.902 209.011 5.428
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153934, 5)
Test Data Shape:  (51193, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.851 10.902 209.011 5.428
Checkin evaluation:
mse, rmse, mre, mae:  89.066 9.437 268.184 4.138
Total Time Elapsed:  427.80311822891235
15 False 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 5)
Test Data Shape:  (51515, 5)
Checkout training and testing errors
training errors:  302.581 17.395 373.957 8.264
testing errors:  118.851 10.902 209.011 5.428
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153934, 5)
Test Data Shape:  (51193, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.851 10.902 209.011 5.428
Checkin evaluation:
mse, rmse, mre, mae:  77.01 8.776 220.639 3.949
Total Time Elapsed:  439.610800743103
15 False 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (154974, 5)
Test Data Shape:  (51515, 5)
Checkout training and testing errors
training errors:  302.581 17.395 373.957 8.264
testing errors:  118.851 10.902 209.011 5.428
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 52
Train Data Shape:  (153934, 5)
Test Data Shape:  (51193, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  118.851 10.902 209.011 5.428
Checkin evaluation:
mse, rmse, mre, mae:  73.448 8.57 217.641 3.91
Total Time Elapsed:  444.2680866718292


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


20 True 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 6)
Test Data Shape:  (54684, 6)
Checkout training and testing errors
training errors:  133.458 11.552 181.861 5.716
testing errors:  87.172 9.337 144.8 4.614
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162701, 6)
Test Data Shape:  (54111, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  87.172 9.337 144.8 4.614
Checkin evaluation:
mse, rmse, mre, mae:  64.527 8.033 167.594 3.503
Total Time Elapsed:  424.85011553764343
20 True 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 6)
Test Data Shape:  (54684, 6)
Checkout training and testing errors
training errors:  133.458 11.552 181.861 5.716
testing errors:  87.172 9.337 144.8 4.614
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162701, 6)
Test Data Shape:  (54111, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  87.172 9.337 144.8 4.614
Checkin evaluation:
mse, rmse, mre, mae:  52.998 7.28 134.344 3.271
Total Time Elapsed:  445.5611789226532
20 True 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 6)
Test Data Shape:  (54684, 6)
Checkout training and testing errors
training errors:  133.458 11.552 181.861 5.716
testing errors:  87.172 9.337 144.8 4.614
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162701, 6)
Test Data Shape:  (54111, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  87.172 9.337 144.8 4.614
Checkin evaluation:
mse, rmse, mre, mae:  51.182 7.154 133.798 3.232
Total Time Elapsed:  453.28407192230225
20 False 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 5)
Test Data Shape:  (54684, 5)
Checkout training and testing errors
training errors:  217.396 14.744 210.111 7.77
testing errors:  89.02 9.435 149.49 5.14
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162699, 5)
Test Data Shape:  (54111, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.02 9.435 149.49 5.14
Checkin evaluation:
mse, rmse, mre, mae:  71.521 8.457 180.492 4.183
Total Time Elapsed:  461.849609375
20 False 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 5)
Test Data Shape:  (54684, 5)
Checkout training and testing errors
training errors:  217.396 14.744 210.111 7.77
testing errors:  89.02 9.435 149.49 5.14
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162699, 5)
Test Data Shape:  (54111, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.02 9.435 149.49 5.14
Checkin evaluation:
mse, rmse, mre, mae:  61.582 7.847 153.023 3.963
Total Time Elapsed:  435.99752020835876
20 False 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 4
number of missing combinations: 37
Train Data Shape:  (163889, 5)
Test Data Shape:  (54684, 5)
Checkout training and testing errors
training errors:  217.396 14.744 210.111 7.77
testing errors:  89.02 9.435 149.49 5.14
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 1
number of missing combinations: 32
Train Data Shape:  (162699, 5)
Test Data Shape:  (54111, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.02 9.435 149.49 5.14
Checkin evaluation:
mse, rmse, mre, mae:  59.068 7.686 151.21 3.919
Total Time Elapsed:  455.73870515823364


In [88]:
df_checkout_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,95.145,9.754,154.416,4.758
1,1,24,True,95.145,9.754,154.416,4.758
2,1,48,True,95.145,9.754,154.416,4.758
3,1,1,False,97.134,9.856,159.194,5.284
4,1,24,False,97.134,9.856,159.194,5.284
5,1,48,False,97.134,9.856,159.194,5.284
6,5,1,True,103.092,10.153,164.487,4.785
7,5,24,True,103.092,10.153,164.487,4.785
8,5,48,True,103.092,10.153,164.487,4.785
9,5,1,False,105.024,10.248,169.438,5.31


In [89]:
df_checkin_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,63.115,7.944,159.067,3.359
1,1,24,True,54.113,7.356,143.145,3.182
2,1,48,True,52.536,7.248,142.612,3.146
3,1,1,False,70.502,8.397,174.435,4.025
4,1,24,False,62.685,7.917,153.819,3.855
5,1,48,False,60.455,7.775,152.217,3.817
6,5,1,True,68.821,8.296,185.482,3.405
7,5,24,True,56.899,7.543,149.575,3.196
8,5,48,True,54.8,7.403,147.423,3.159
9,5,1,False,76.595,8.752,193.14,4.055


around 3.5 hours for each k2 (~20 minutes for transition matrix = [1,24,48])

## k2 = 12

In [91]:
k2 = 12
n_clustering_iterations_list = [1, 5, 10, 15, 20]
n_transition_matrix_list = [1, 24, 48]
temperatureFlag_list = [True, False]
df_checkout_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkin_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkout_errs, df_checkin_errs = run_scenarios(df_checkout_errs, df_checkin_errs, n_clustering_iterations_list, n_transition_matrix_list, temperatureFlag_list, k2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


1 True 1
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 6)
Test Data Shape:  (53385, 6)
Checkout training and testing errors
training errors:  140.273 11.844 239.847 5.772
testing errors:  96.045 9.8 156.976 4.705
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159586, 6)
Test Data Shape:  (52820, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.045 9.8 156.976 4.705
Checkin evaluation:
mse, rmse, mre, mae:  62.237 7.889 170.967 3.3
Total Time Elapsed:  439.0542469024658
1 True 24
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 6)
Test Data Shape:  (53385, 6)
Checkout training and testing errors
training errors:  140.273 11.844 239.847 5.772
testing errors:  96.045 9.8 156.976 4.705
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159586, 6)
Test Data Shape:  (52820, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.045 9.8 156.976 4.705
Checkin evaluation:
mse, rmse, mre, mae:  52.674 7.258 140.246 3.098
Total Time Elapsed:  451.0067970752716
1 True 48
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 6)
Test Data Shape:  (53385, 6)
Checkout training and testing errors
training errors:  140.273 11.844 239.847 5.772
testing errors:  96.045 9.8 156.976 4.705
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159586, 6)
Test Data Shape:  (52820, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.045 9.8 156.976 4.705
Checkin evaluation:
mse, rmse, mre, mae:  51.049 7.145 138.419 3.064
Total Time Elapsed:  451.0876348018646
1 False 1
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 5)
Test Data Shape:  (53385, 5)
Checkout training and testing errors
training errors:  233.789 15.29 264.576 7.901
testing errors:  97.596 9.879 160.576 5.221
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159584, 5)
Test Data Shape:  (52820, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.596 9.879 160.576 5.221
Checkin evaluation:
mse, rmse, mre, mae:  69.28 8.323 188.674 3.968
Total Time Elapsed:  451.4167551994324
1 False 24
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 5)
Test Data Shape:  (53385, 5)
Checkout training and testing errors
training errors:  233.789 15.29 264.576 7.901
testing errors:  97.596 9.879 160.576 5.221
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159584, 5)
Test Data Shape:  (52820, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.596 9.879 160.576 5.221
Checkin evaluation:
mse, rmse, mre, mae:  61.019 7.811 165.968 3.781
Total Time Elapsed:  454.42970609664917
1 False 48
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 12
number of missing combinations: 60
Train Data Shape:  (161035, 5)
Test Data Shape:  (53385, 5)
Checkout training and testing errors
training errors:  233.789 15.29 264.576 7.901
testing errors:  97.596 9.879 160.576 5.221
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 60
Train Data Shape:  (159584, 5)
Test Data Shape:  (52820, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.596 9.879 160.576 5.221
Checkin evaluation:
mse, rmse, mre, mae:  58.633 7.657 164.281 3.742
Total Time Elapsed:  477.2665526866913


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


5 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 6)
Test Data Shape:  (54115, 6)
Checkout training and testing errors
training errors:  137.571 11.729 201.361 5.73
testing errors:  89.8 9.476 142.956 4.668
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161047, 6)
Test Data Shape:  (53754, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.8 9.476 142.956 4.668
Checkin evaluation:
mse, rmse, mre, mae:  60.286 7.764 179.866 3.392
Total Time Elapsed:  428.20111560821533
5 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 6)
Test Data Shape:  (54115, 6)
Checkout training and testing errors
training errors:  137.571 11.729 201.361 5.73
testing errors:  89.8 9.476 142.956 4.668
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161047, 6)
Test Data Shape:  (53754, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.8 9.476 142.956 4.668
Checkin evaluation:
mse, rmse, mre, mae:  50.892 7.134 135.092 3.187
Total Time Elapsed:  445.0756239891052
5 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 6)
Test Data Shape:  (54115, 6)
Checkout training and testing errors
training errors:  137.571 11.729 201.361 5.73
testing errors:  89.8 9.476 142.956 4.668
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161047, 6)
Test Data Shape:  (53754, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  89.8 9.476 142.956 4.668
Checkin evaluation:
mse, rmse, mre, mae:  49.451 7.032 133.164 3.153
Total Time Elapsed:  461.98356771469116
5 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 5)
Test Data Shape:  (54115, 5)
Checkout training and testing errors
training errors:  225.077 15.003 230.296 7.837
testing errors:  91.746 9.578 145.384 5.183
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161045, 5)
Test Data Shape:  (53754, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.746 9.578 145.384 5.183
Checkin evaluation:
mse, rmse, mre, mae:  67.224 8.199 193.838 4.069
Total Time Elapsed:  451.4542336463928
5 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 5)
Test Data Shape:  (54115, 5)
Checkout training and testing errors
training errors:  225.077 15.003 230.296 7.837
testing errors:  91.746 9.578 145.384 5.183
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161045, 5)
Test Data Shape:  (53754, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.746 9.578 145.384 5.183
Checkin evaluation:
mse, rmse, mre, mae:  59.188 7.693 165.313 3.875
Total Time Elapsed:  453.26015186309814
5 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 24
number of missing combinations: 68
Train Data Shape:  (162034, 5)
Test Data Shape:  (54115, 5)
Checkout training and testing errors
training errors:  225.077 15.003 230.296 7.837
testing errors:  91.746 9.578 145.384 5.183
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 17
number of missing combinations: 68
Train Data Shape:  (161045, 5)
Test Data Shape:  (53754, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.746 9.578 145.384 5.183
Checkin evaluation:
mse, rmse, mre, mae:  57.118 7.558 163.61 3.839
Total Time Elapsed:  467.84950065612793


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


10 True 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 6)
Test Data Shape:  (52369, 6)
Checkout training and testing errors
training errors:  180.63 13.44 347.605 5.972
testing errors:  110.498 10.512 253.817 4.816
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155800, 6)
Test Data Shape:  (51747, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  110.498 10.512 253.817 4.816
Checkin evaluation:
mse, rmse, mre, mae:  76.348 8.738 247.348 3.485
Total Time Elapsed:  418.9600758552551
10 True 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 6)
Test Data Shape:  (52369, 6)
Checkout training and testing errors
training errors:  180.63 13.44 347.605 5.972
testing errors:  110.498 10.512 253.817 4.816
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155800, 6)
Test Data Shape:  (51747, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  110.498 10.512 253.817 4.816
Checkin evaluation:
mse, rmse, mre, mae:  62.878 7.93 182.836 3.269
Total Time Elapsed:  416.80710554122925
10 True 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 6)
Test Data Shape:  (52369, 6)
Checkout training and testing errors
training errors:  180.63 13.44 347.605 5.972
testing errors:  110.498 10.512 253.817 4.816
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155800, 6)
Test Data Shape:  (51747, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  110.498 10.512 253.817 4.816
Checkin evaluation:
mse, rmse, mre, mae:  60.478 7.777 178.632 3.232
Total Time Elapsed:  422.38091468811035
10 False 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 5)
Test Data Shape:  (52369, 5)
Checkout training and testing errors
training errors:  287.177 16.946 386.425 8.111
testing errors:  112.38 10.601 260.425 5.328
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155798, 5)
Test Data Shape:  (51747, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  112.38 10.601 260.425 5.328
Checkin evaluation:
mse, rmse, mre, mae:  85.277 9.235 277.113 4.134
Total Time Elapsed:  421.2941653728485
10 False 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 5)
Test Data Shape:  (52369, 5)
Checkout training and testing errors
training errors:  287.177 16.946 386.425 8.111
testing errors:  112.38 10.601 260.425 5.328
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155798, 5)
Test Data Shape:  (51747, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  112.38 10.601 260.425 5.328
Checkin evaluation:
mse, rmse, mre, mae:  73.528 8.575 239.571 3.93
Total Time Elapsed:  428.9633467197418
10 False 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 52
Train Data Shape:  (157278, 5)
Test Data Shape:  (52369, 5)
Checkout training and testing errors
training errors:  287.177 16.946 386.425 8.111
testing errors:  112.38 10.601 260.425 5.328
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 49
Train Data Shape:  (155798, 5)
Test Data Shape:  (51747, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  112.38 10.601 260.425 5.328
Checkin evaluation:
mse, rmse, mre, mae:  70.048 8.369 237.019 3.89
Total Time Elapsed:  443.67332649230957


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


15 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 6)
Test Data Shape:  (53223, 6)
Checkout training and testing errors
training errors:  160.94 12.686 279.575 5.874
testing errors:  98.683 9.934 212.86 4.735
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158541, 6)
Test Data Shape:  (52781, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  98.683 9.934 212.86 4.735
Checkin evaluation:
mse, rmse, mre, mae:  68.602 8.283 253.664 3.404
Total Time Elapsed:  426.8392150402069
15 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 6)
Test Data Shape:  (53223, 6)
Checkout training and testing errors
training errors:  160.94 12.686 279.575 5.874
testing errors:  98.683 9.934 212.86 4.735
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158541, 6)
Test Data Shape:  (52781, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  98.683 9.934 212.86 4.735
Checkin evaluation:
mse, rmse, mre, mae:  55.143 7.426 176.355 3.18
Total Time Elapsed:  435.7319881916046
15 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 6)
Test Data Shape:  (53223, 6)
Checkout training and testing errors
training errors:  160.94 12.686 279.575 5.874
testing errors:  98.683 9.934 212.86 4.735
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158541, 6)
Test Data Shape:  (52781, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  98.683 9.934 212.86 4.735
Checkin evaluation:
mse, rmse, mre, mae:  53.024 7.282 173.205 3.144
Total Time Elapsed:  450.47626996040344
15 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 5)
Test Data Shape:  (53223, 5)
Checkout training and testing errors
training errors:  252.422 15.888 308.261 7.988
testing errors:  100.519 10.026 217.261 5.261
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158540, 5)
Test Data Shape:  (52781, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  100.519 10.026 217.261 5.261
Checkin evaluation:
mse, rmse, mre, mae:  76.063 8.721 274.898 4.068
Total Time Elapsed:  438.6603558063507
15 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 5)
Test Data Shape:  (53223, 5)
Checkout training and testing errors
training errors:  252.422 15.888 308.261 7.988
testing errors:  100.519 10.026 217.261 5.261
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158540, 5)
Test Data Shape:  (52781, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  100.519 10.026 217.261 5.261
Checkin evaluation:
mse, rmse, mre, mae:  64.492 8.031 229.29 3.852
Total Time Elapsed:  430.26512122154236
15 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 11
number of missing combinations: 49
Train Data Shape:  (159632, 5)
Test Data Shape:  (53223, 5)
Checkout training and testing errors
training errors:  252.422 15.888 308.261 7.988
testing errors:  100.519 10.026 217.261 5.261
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 5
number of missing combinations: 55
Train Data Shape:  (158540, 5)
Test Data Shape:  (52781, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  100.519 10.026 217.261 5.261
Checkin evaluation:
mse, rmse, mre, mae:  61.499 7.842 226.518 3.812
Total Time Elapsed:  456.55586647987366


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


20 True 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 6)
Test Data Shape:  (51744, 6)
Checkout training and testing errors
training errors:  184.72 13.591 348.547 5.993
testing errors:  115.236 10.735 247.56 4.868
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154868, 6)
Test Data Shape:  (51152, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  115.236 10.735 247.56 4.868
Checkin evaluation:
mse, rmse, mre, mae:  77.808 8.821 248.431 3.469
Total Time Elapsed:  408.9828507900238
20 True 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 6)
Test Data Shape:  (51744, 6)
Checkout training and testing errors
training errors:  184.72 13.591 348.547 5.993
testing errors:  115.236 10.735 247.56 4.868
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154868, 6)
Test Data Shape:  (51152, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  115.236 10.735 247.56 4.868
Checkin evaluation:
mse, rmse, mre, mae:  64.662 8.041 181.887 3.269
Total Time Elapsed:  411.8808398246765
20 True 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 6)
Test Data Shape:  (51744, 6)
Checkout training and testing errors
training errors:  184.72 13.591 348.547 5.993
testing errors:  115.236 10.735 247.56 4.868
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154868, 6)
Test Data Shape:  (51152, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  115.236 10.735 247.56 4.868
Checkin evaluation:
mse, rmse, mre, mae:  62.246 7.89 177.699 3.234
Total Time Elapsed:  441.88317131996155
20 False 1
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 5)
Test Data Shape:  (51744, 5)
Checkout training and testing errors
training errors:  294.067 17.148 386.986 8.15
testing errors:  117.352 10.833 252.944 5.378
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154867, 5)
Test Data Shape:  (51152, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  117.352 10.833 252.944 5.378
Checkin evaluation:
mse, rmse, mre, mae:  87.184 9.337 273.248 4.127
Total Time Elapsed:  434.55005955696106
20 False 24
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 5)
Test Data Shape:  (51744, 5)
Checkout training and testing errors
training errors:  294.067 17.148 386.986 8.15
testing errors:  117.352 10.833 252.944 5.378
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154867, 5)
Test Data Shape:  (51152, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  117.352 10.833 252.944 5.378
Checkin evaluation:
mse, rmse, mre, mae:  75.743 8.703 234.791 3.939
Total Time Elapsed:  428.198224067688
20 False 48
37
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 7
number of missing combinations: 47
Train Data Shape:  (156448, 5)
Test Data Shape:  (51744, 5)
Checkout training and testing errors
training errors:  294.067 17.148 386.986 8.15
testing errors:  117.352 10.833 252.944 5.378
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 2
number of missing combinations: 48
Train Data Shape:  (154867, 5)
Test Data Shape:  (51152, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  117.352 10.833 252.944 5.378
Checkin evaluation:
mse, rmse, mre, mae:  72.23 8.499 232.312 3.899
Total Time Elapsed:  436.2483057975769


In [92]:
df_checkout_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,96.045,9.8,156.976,4.705
1,1,24,True,96.045,9.8,156.976,4.705
2,1,48,True,96.045,9.8,156.976,4.705
3,1,1,False,97.596,9.879,160.576,5.221
4,1,24,False,97.596,9.879,160.576,5.221
5,1,48,False,97.596,9.879,160.576,5.221
6,5,1,True,89.8,9.476,142.956,4.668
7,5,24,True,89.8,9.476,142.956,4.668
8,5,48,True,89.8,9.476,142.956,4.668
9,5,1,False,91.746,9.578,145.384,5.183


In [93]:
df_checkin_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,62.237,7.889,170.967,3.3
1,1,24,True,52.674,7.258,140.246,3.098
2,1,48,True,51.049,7.145,138.419,3.064
3,1,1,False,69.28,8.323,188.674,3.968
4,1,24,False,61.019,7.811,165.968,3.781
5,1,48,False,58.633,7.657,164.281,3.742
6,5,1,True,60.286,7.764,179.866,3.392
7,5,24,True,50.892,7.134,135.092,3.187
8,5,48,True,49.451,7.032,133.164,3.153
9,5,1,False,67.224,8.199,193.838,4.069


## k2 = 16

In [10]:
k2 = 16
n_clustering_iterations_list = [1, 5, 10, 15, 20]
n_transition_matrix_list = [1, 24, 48]
temperatureFlag_list = [True, False]
df_checkout_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkin_errs = pd.DataFrame(columns = ['n Clustering Iterations', 'n Transition Matrix', 'Temperature', 'mse', 'rmse', 'mre', 'mae'])
df_checkout_errs, df_checkin_errs = run_scenarios(df_checkout_errs, df_checkin_errs, n_clustering_iterations_list, n_transition_matrix_list, temperatureFlag_list, k2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


1 True 1
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 6)
Test Data Shape:  (54805, 6)
Checkout training and testing errors
training errors:  149.931 12.245 271.799 5.649
testing errors:  94.681 9.73 225.108 4.592
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164297, 6)
Test Data Shape:  (54009, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  94.681 9.73 225.108 4.592
Checkin evaluation:
mse, rmse, mre, mae:  65.488 8.092 261.311 3.222
Total Time Elapsed:  457.47393107414246
1 True 24
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 6)
Test Data Shape:  (54805, 6)
Checkout training and testing errors
training errors:  149.931 12.245 271.799 5.649
testing errors:  94.681 9.73 225.108 4.592
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164297, 6)
Test Data Shape:  (54009, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  94.681 9.73 225.108 4.592
Checkin evaluation:
mse, rmse, mre, mae:  53.488 7.314 185.054 3.044
Total Time Elapsed:  468.8501949310303
1 True 48
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 6)
Test Data Shape:  (54805, 6)
Checkout training and testing errors
training errors:  149.931 12.245 271.799 5.649
testing errors:  94.681 9.73 225.108 4.592
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164297, 6)
Test Data Shape:  (54009, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  94.681 9.73 225.108 4.592
Checkin evaluation:
mse, rmse, mre, mae:  51.384 7.168 180.338 3.012
Total Time Elapsed:  491.6245059967041
1 False 1
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 5)
Test Data Shape:  (54805, 5)
Checkout training and testing errors
training errors:  237.416 15.408 301.622 7.671
testing errors:  96.441 9.82 229.622 5.103
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164296, 5)
Test Data Shape:  (54009, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.441 9.82 229.622 5.103
Checkin evaluation:
mse, rmse, mre, mae:  72.708 8.527 282.631 3.876
Total Time Elapsed:  483.28047800064087
1 False 24
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 5)
Test Data Shape:  (54805, 5)
Checkout training and testing errors
training errors:  237.416 15.408 301.622 7.671
testing errors:  96.441 9.82 229.622 5.103
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164296, 5)
Test Data Shape:  (54009, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.441 9.82 229.622 5.103
Checkin evaluation:
mse, rmse, mre, mae:  62.349 7.896 237.808 3.705
Total Time Elapsed:  483.6417565345764
1 False 48
40
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 13
number of missing combinations: 68
Train Data Shape:  (165734, 5)
Test Data Shape:  (54805, 5)
Checkout training and testing errors
training errors:  237.416 15.408 301.622 7.671
testing errors:  96.441 9.82 229.622 5.103
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 7
number of missing combinations: 71
Train Data Shape:  (164296, 5)
Test Data Shape:  (54009, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.441 9.82 229.622 5.103
Checkin evaluation:
mse, rmse, mre, mae:  59.439 7.71 234.969 3.667
Total Time Elapsed:  499.4098184108734


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


5 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 6)
Test Data Shape:  (53146, 6)
Checkout training and testing errors
training errors:  154.135 12.415 240.344 5.823
testing errors:  91.115 9.545 199.402 4.729
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158273, 6)
Test Data Shape:  (52513, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.115 9.545 199.402 4.729
Checkin evaluation:
mse, rmse, mre, mae:  64.931 8.058 235.466 3.392
Total Time Elapsed:  436.8736660480499
5 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 6)
Test Data Shape:  (53146, 6)
Checkout training and testing errors
training errors:  154.135 12.415 240.344 5.823
testing errors:  91.115 9.545 199.402 4.729
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158273, 6)
Test Data Shape:  (52513, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.115 9.545 199.402 4.729
Checkin evaluation:
mse, rmse, mre, mae:  54.1 7.355 172.24 3.207
Total Time Elapsed:  443.4763000011444
5 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 6)
Test Data Shape:  (53146, 6)
Checkout training and testing errors
training errors:  154.135 12.415 240.344 5.823
testing errors:  91.115 9.545 199.402 4.729
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158273, 6)
Test Data Shape:  (52513, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  91.115 9.545 199.402 4.729
Checkin evaluation:
mse, rmse, mre, mae:  52.117 7.219 168.211 3.173
Total Time Elapsed:  455.38393998146057
5 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 5)
Test Data Shape:  (53146, 5)
Checkout training and testing errors
training errors:  244.393 15.633 270.056 7.975
testing errors:  93.594 9.674 203.056 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158271, 5)
Test Data Shape:  (52513, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.594 9.674 203.056 5.269
Checkin evaluation:
mse, rmse, mre, mae:  71.955 8.483 253.043 4.041
Total Time Elapsed:  450.9632377624512
5 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 5)
Test Data Shape:  (53146, 5)
Checkout training and testing errors
training errors:  244.393 15.633 270.056 7.975
testing errors:  93.594 9.674 203.056 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158271, 5)
Test Data Shape:  (52513, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.594 9.674 203.056 5.269
Checkin evaluation:
mse, rmse, mre, mae:  62.636 7.914 215.843 3.866
Total Time Elapsed:  456.77742052078247
5 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 16
number of missing combinations: 66
Train Data Shape:  (159180, 5)
Test Data Shape:  (53146, 5)
Checkout training and testing errors
training errors:  244.393 15.633 270.056 7.975
testing errors:  93.594 9.674 203.056 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 4
number of missing combinations: 71
Train Data Shape:  (158271, 5)
Test Data Shape:  (52513, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.594 9.674 203.056 5.269
Checkin evaluation:
mse, rmse, mre, mae:  60.157 7.756 213.401 3.826
Total Time Elapsed:  482.9334478378296


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


10 True 1
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 6)
Test Data Shape:  (53457, 6)
Checkout training and testing errors
training errors:  144.915 12.038 205.988 5.754
testing errors:  90.122 9.493 151.828 4.7
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160589, 6)
Test Data Shape:  (53178, 6)
Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  90.122 9.493 151.828 4.7
Checkin evaluation:
mse, rmse, mre, mae:  59.566 7.718 156.936 3.299
Total Time Elapsed:  437.67728185653687
10 True 24
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 6)
Test Data Shape:  (53457, 6)
Checkout training and testing errors
training errors:  144.915 12.038 205.988 5.754
testing errors:  90.122 9.493 151.828 4.7
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160589, 6)
Test Data Shape:  (53178, 6)
Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  90.122 9.493 151.828 4.7
Checkin evaluation:
mse, rmse, mre, mae:  50.379 7.098 122.527 3.114
Total Time Elapsed:  460.8298280239105
10 True 48
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 6)
Test Data Shape:  (53457, 6)
Checkout training and testing errors
training errors:  144.915 12.038 205.988 5.754
testing errors:  90.122 9.493 151.828 4.7
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160589, 6)
Test Data Shape:  (53178, 6)
Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  90.122 9.493 151.828 4.7
Checkin evaluation:
mse, rmse, mre, mae:  48.821 6.987 121.086 3.081
Total Time Elapsed:  469.222332239151
10 False 1
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 5)
Test Data Shape:  (53457, 5)
Checkout training and testing errors
training errors:  232.722 15.255 219.773 7.854
testing errors:  92.301 9.607 161.452 5.228
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160587, 5)
Test Data Shape:  (53178, 5)




Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  92.301 9.607 161.452 5.228
Checkin evaluation:
mse, rmse, mre, mae:  66.079 8.129 173.973 3.96
Total Time Elapsed:  461.81950402259827
10 False 24
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 5)
Test Data Shape:  (53457, 5)
Checkout training and testing errors
training errors:  232.722 15.255 219.773 7.854
testing errors:  92.301 9.607 161.452 5.228
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160587, 5)
Test Data Shape:  (53178, 5)




Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  92.301 9.607 161.452 5.228
Checkin evaluation:
mse, rmse, mre, mae:  58.18 7.628 147.12 3.78
Total Time Elapsed:  459.9306640625
10 False 48
39
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 36
number of missing combinations: 91
Train Data Shape:  (161440, 5)
Test Data Shape:  (53457, 5)
Checkout training and testing errors
training errors:  232.722 15.255 219.773 7.854
testing errors:  92.301 9.607 161.452 5.228
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 91
Train Data Shape:  (160587, 5)
Test Data Shape:  (53178, 5)




Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  92.301 9.607 161.452 5.228
Checkin evaluation:
mse, rmse, mre, mae:  56.123 7.492 145.326 3.743
Total Time Elapsed:  476.0773205757141


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


15 True 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 6)
Test Data Shape:  (52996, 6)
Checkout training and testing errors
training errors:  160.723 12.678 234.432 5.885
testing errors:  95.559 9.775 191.858 4.733
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157028, 6)
Test Data Shape:  (52507, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.559 9.775 191.858 4.733
Checkin evaluation:
mse, rmse, mre, mae:  66.303 8.143 195.974 3.377
Total Time Elapsed:  453.198050737381
15 True 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 6)
Test Data Shape:  (52996, 6)
Checkout training and testing errors
training errors:  160.723 12.678 234.432 5.885
testing errors:  95.559 9.775 191.858 4.733
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157028, 6)
Test Data Shape:  (52507, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.559 9.775 191.858 4.733
Checkin evaluation:
mse, rmse, mre, mae:  55.192 7.429 158.965 3.191
Total Time Elapsed:  434.8517396450043
15 True 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 6)
Test Data Shape:  (52996, 6)
Checkout training and testing errors
training errors:  160.723 12.678 234.432 5.885
testing errors:  95.559 9.775 191.858 4.733
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157028, 6)
Test Data Shape:  (52507, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  95.559 9.775 191.858 4.733
Checkin evaluation:
mse, rmse, mre, mae:  53.258 7.298 156.704 3.157
Total Time Elapsed:  452.54115557670593
15 False 1
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 5)
Test Data Shape:  (52996, 5)
Checkout training and testing errors
training errors:  254.397 15.95 255.338 8.034
testing errors:  97.818 9.89 196.392 5.263
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157026, 5)
Test Data Shape:  (52507, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.818 9.89 196.392 5.263
Checkin evaluation:
mse, rmse, mre, mae:  73.646 8.582 209.618 4.035
Total Time Elapsed:  440.90150475502014
15 False 24
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 5)
Test Data Shape:  (52996, 5)
Checkout training and testing errors
training errors:  254.397 15.95 255.338 8.034
testing errors:  97.818 9.89 196.392 5.263
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157026, 5)
Test Data Shape:  (52507, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.818 9.89 196.392 5.263
Checkin evaluation:
mse, rmse, mre, mae:  64.218 8.014 182.149 3.86
Total Time Elapsed:  452.19189620018005
15 False 48
38
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 31
number of missing combinations: 74
Train Data Shape:  (158103, 5)
Test Data Shape:  (52996, 5)
Checkout training and testing errors
training errors:  254.397 15.95 255.338 8.034
testing errors:  97.818 9.89 196.392 5.263
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 23
number of missing combinations: 72
Train Data Shape:  (157026, 5)
Test Data Shape:  (52507, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  97.818 9.89 196.392 5.263
Checkin evaluation:
mse, rmse, mre, mae:  61.745 7.858 180.342 3.824
Total Time Elapsed:  472.2643187046051


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


20 True 1
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 6)
Test Data Shape:  (53149, 6)
Checkout training and testing errors
training errors:  152.072 12.332 277.937 5.877
testing errors:  93.961 9.693 231.626 4.726
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155947, 6)
Test Data Shape:  (52532, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.961 9.693 231.626 4.726
Checkin evaluation:
mse, rmse, mre, mae:  72.333 8.505 275.176 3.639
Total Time Elapsed:  405.5531952381134
20 True 24
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 6)
Test Data Shape:  (53149, 6)
Checkout training and testing errors
training errors:  152.072 12.332 277.937 5.877
testing errors:  93.961 9.693 231.626 4.726
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155947, 6)
Test Data Shape:  (52532, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.961 9.693 231.626 4.726
Checkin evaluation:
mse, rmse, mre, mae:  60.051 7.749 202.426 3.448
Total Time Elapsed:  405.92526149749756
20 True 48
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 6)
Test Data Shape:  (53149, 6)
Checkout training and testing errors
training errors:  152.072 12.332 277.937 5.877
testing errors:  93.961 9.693 231.626 4.726
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155947, 6)
Test Data Shape:  (52532, 6)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  93.961 9.693 231.626 4.726
Checkin evaluation:
mse, rmse, mre, mae:  57.693 7.596 197.71 3.412
Total Time Elapsed:  423.77644896507263
20 False 1
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 5)
Test Data Shape:  (53149, 5)
Checkout training and testing errors
training errors:  245.493 15.668 321.021 8.071
testing errors:  96.158 9.806 237.021 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155946, 5)
Test Data Shape:  (52532, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.158 9.806 237.021 5.269
Checkin evaluation:
mse, rmse, mre, mae:  80.716 8.984 300.115 4.342
Total Time Elapsed:  412.5213258266449
20 False 24
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 5)
Test Data Shape:  (53149, 5)
Checkout training and testing errors
training errors:  245.493 15.668 321.021 8.071
testing errors:  96.158 9.806 237.021 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155946, 5)
Test Data Shape:  (52532, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.158 9.806 237.021 5.269
Checkin evaluation:
mse, rmse, mre, mae:  70.147 8.375 258.896 4.161
Total Time Elapsed:  417.0624077320099
20 False 48
35
Checkout predictions
(2438720, 16) (1970776, 16) (467944, 16)
number of missing combinations: 14
number of missing combinations: 38
Train Data Shape:  (157128, 5)
Test Data Shape:  (53149, 5)
Checkout training and testing errors
training errors:  245.493 15.668 321.021 8.071
testing errors:  96.158 9.806 237.021 5.269
Transition Matrix Computation
(2438720, 33) (1970776, 33) (467944, 33)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Summarize checkout predictions
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Checkin predictions
(2438720, 15) (1970751, 15) (467969, 15)
number of missing combinations: 13
number of missing combinations: 41
Train Data Shape:  (155946, 5)
Test Data Shape:  (52532, 5)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Evaluate the predictions
Checkout evaluation:
mse, rmse, mre, mae:  96.158 9.806 237.021 5.269
Checkin evaluation:
mse, rmse, mre, mae:  67.103 8.192 256.18 4.12
Total Time Elapsed:  426.1037654876709


In [11]:
df_checkout_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,94.681,9.73,225.108,4.592
1,1,24,True,94.681,9.73,225.108,4.592
2,1,48,True,94.681,9.73,225.108,4.592
3,1,1,False,96.441,9.82,229.622,5.103
4,1,24,False,96.441,9.82,229.622,5.103
5,1,48,False,96.441,9.82,229.622,5.103
6,5,1,True,91.115,9.545,199.402,4.729
7,5,24,True,91.115,9.545,199.402,4.729
8,5,48,True,91.115,9.545,199.402,4.729
9,5,1,False,93.594,9.674,203.056,5.269


In [12]:
df_checkin_errs

Unnamed: 0,n Clustering Iterations,n Transition Matrix,Temperature,mse,rmse,mre,mae
0,1,1,True,65.488,8.092,261.311,3.222
1,1,24,True,53.488,7.314,185.054,3.044
2,1,48,True,51.384,7.168,180.338,3.012
3,1,1,False,72.708,8.527,282.631,3.876
4,1,24,False,62.349,7.896,237.808,3.705
5,1,48,False,59.439,7.71,234.969,3.667
6,5,1,True,64.931,8.058,235.466,3.392
7,5,24,True,54.1,7.355,172.24,3.207
8,5,48,True,52.117,7.219,168.211,3.173
9,5,1,False,71.955,8.483,253.043,4.041
