<a href="https://colab.research.google.com/github/shengy90/MSc-Project/blob/master/notebooks/30th_July_Optimal_Clusters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1️⃣ Setup Notebook 💻**


### **Authenticate with BigQuery ☁️**

In [None]:
!pip install --upgrade google-cloud-bigquery[bqstorage,pandas]
!pip install --upgrade pandas-gbq

In [None]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [None]:
%%bigquery --project machine-learning-msc df --use_bqstorage_api
SELECT 
  COUNT(*) as total_rows
FROM `machine-learning-msc.low_carbon_london.household_consumption_daily_agg` 

In [None]:
df.head()

Unnamed: 0,total_rows
0,14841792


### **Importing Libraries⏬**

##### Standard Libraries

In [None]:
!pip install fbprophet
!pip install MiniSom



In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt 
import random
import datetime as dt

from minisom import MiniSom
from tqdm import tqdm
from datetime import date
from matplotlib.gridspec import GridSpec
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
 
sns.set()
%matplotlib inline

  import pandas.util.testing as tm


In [None]:
import pandas_gbq
def output_to_bq(forecast, table_id, project_id='machine-learning-msc'):
    pandas_gbq.to_gbq(forecast, table_id, project_id=project_id, if_exists='append')

##### Import Github Repository

In [None]:
%cd /content
!ls

/content
adc.json  mscproj  sample_data


In [None]:
!rm -rf mscproj
!git clone https://github.com/shengy90/MSc-Project mscproj
!git pull
%cd /content/mscproj/
!ls

Cloning into 'mscproj'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 377 (delta 5), reused 8 (delta 4), pack-reused 361[K
Receiving objects: 100% (377/377), 10.94 MiB | 26.06 MiB/s, done.
Resolving deltas: 100% (203/203), done.
fatal: not a git repository (or any of the parent directories): .git
/content/mscproj
bin	     __init__.py  notebooks  requirements.txt  sql
definitions  Makefile	  README.md  run.py	       src


In [None]:
%reload_ext autoreload 
%autoreload 2 
from src.train_prophet import TrainProphet
from src.train_clusters import TrainClusters
from src.train_clusters import Normaliser

# 2️⃣ **Generate SOM clusters**

### **Downloading Data from BQ**

In [None]:
%%bigquery --project machine-learning-msc df_test --use_bqstorage_api
WITH stg1 AS (
SELECT 
lcl_id,
IF(acorn_grouped = "Adversity", 1, 0) AS adversity,
IF(acorn_grouped = "Affluent", 1, 0) AS affluent,
IF(acorn_grouped = "Comfortable", 1, 0) AS comfortable,
FORMAT_DATETIME("%B", DATETIME(ts)) AS month_name,
dayofweek,
hhourly_rank,
ROUND(AVG(kwhh),4) AS hh_avg,
ROUND(MAX(kwhh),4) AS hh_max,
ROUND(MIN(kwhh),4) AS hh_min,
ROUND(STDDEV(kwhh),4) AS hh_stddev

FROM `machine-learning-msc.forecasting_20200719.test_set`
WHERE train_test_split = 'test'
AND ts >= '2012-11-01' AND ts < '2013-03-01'

GROUP BY 1,2,3,4,5,6,7
)

SELECT 
*,
ROW_NUMBER() OVER (PARTITION BY lcl_id, month_name ORDER BY dayofweek ASC, hhourly_rank ASC) AS weekly_rank
FROM stg1 
ORDER BY lcl_id, month_name, weekly_rank, hhourly_rank

In [None]:
%%bigquery --project machine-learning-msc df_train --use_bqstorage_api
WITH stg1 AS (
SELECT 
lcl_id,
IF(acorn_grouped = "Adversity", 1, 0) AS adversity,
IF(acorn_grouped = "Affluent", 1, 0) AS affluent,
IF(acorn_grouped = "Comfortable", 1, 0) AS comfortable,
FORMAT_DATETIME("%B", DATETIME(ts)) AS month_name,
dayofweek,
hhourly_rank,
ROUND(AVG(kwhh),4) AS hh_avg,
ROUND(MAX(kwhh),4) AS hh_max,
ROUND(MIN(kwhh),4) AS hh_min,
ROUND(STDDEV(kwhh),4) AS hh_stddev

FROM `machine-learning-msc.forecasting_20200719.train_set`
WHERE train_test_split = 'train'
AND ts >= '2012-11-01' AND ts < '2013-03-01'

GROUP BY 1,2,3,4,5,6,7
)

SELECT 
*,
ROW_NUMBER() OVER (PARTITION BY lcl_id, month_name ORDER BY dayofweek ASC, hhourly_rank ASC) AS weekly_rank
FROM stg1 
ORDER BY lcl_id, month_name, weekly_rank, hhourly_rank

### **Normalise Dataset**

In [None]:
value_list = ['hh_avg']
column_list = ['month_name', 'weekly_rank']
normaliser = Normaliser(value_list, column_list)
norm_df_train = normaliser.fit(df_train)
norm_df_test = normaliser.predict(df_test)

### **Train SOM**

In [None]:
for i in range(9):
    cluster_num = i+1
    print(f"Training {cluster_num} clusters....")
    som_cluster = TrainClusters(cluster_type="som")
    som_cluster.fit(norm_df_train, cluster_num=cluster_num, sigma=0.1, learning_rate=0.1)  

    train_pred = som_cluster.predict(norm_df_train)
    test_pred = som_cluster.predict(norm_df_test)

    train_pred['train_test_split'] = "train"
    test_pred['train_test_split'] = "test"

    som_results = pd.concat([train_pred[['lcl_id','cluster','train_test_split']], test_pred[['lcl_id','cluster','train_test_split']]])
    som_results['num_clusters'] = cluster_num
    som_results['cluster_type'] = 'som'

    # output_to_bq(som_results, 'clusters_20200739.clusters')
    print("Upload to BQ completed! 🎉")

Training 1 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 28.62229932330481



The topographic error is not defined for a 1-by-1 map.

2681it [00:01, 2186.75it/s]
1000it [00:00, 2354.37it/s]


Upload to BQ completed! 🎉
Training 2 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 24.530185802382075


2681it [00:01, 2167.18it/s]
1000it [00:00, 2260.22it/s]


Upload to BQ completed! 🎉
Training 3 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 21.59310210230596


2681it [00:01, 2116.74it/s]
1000it [00:00, 2286.66it/s]


Upload to BQ completed! 🎉
Training 4 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 21.062202682086788


2681it [00:01, 2089.48it/s]
1000it [00:00, 2179.91it/s]


Upload to BQ completed! 🎉
Training 5 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 

0it [00:00, ?it/s]


 quantization error: 19.841573891542318


2681it [00:01, 2130.59it/s]
1000it [00:00, 2213.93it/s]


Upload to BQ completed! 🎉
Training 6 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 19.49002343512572


2681it [00:01, 2100.44it/s]
1000it [00:00, 2031.20it/s]


Upload to BQ completed! 🎉
Training 7 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 

0it [00:00, ?it/s]


 quantization error: 18.918113461647287


2681it [00:01, 1656.47it/s]
1000it [00:00, 2094.01it/s]


Upload to BQ completed! 🎉
Training 8 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 

0it [00:00, ?it/s]


 quantization error: 18.75056029192445


2681it [00:01, 2049.31it/s]
1000it [00:00, 2136.91it/s]


Upload to BQ completed! 🎉
Training 9 clusters....
 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 18.623579866826525


2681it [00:01, 2037.97it/s]
1000it [00:00, 2116.14it/s]


Upload to BQ completed! 🎉


### **Train Agglomerative Clusters**

In [None]:
for i in range(9):
    cluster_num = i+1
    print(f"Training {cluster_num} clusters....")
    agglo_cluster = TrainClusters(cluster_type="agglo")
    agglo_cluster.fit(norm_df_train, cluster_num=cluster_num)

    train_pred = agglo_cluster.predict(norm_df_train)
    test_pred = agglo_cluster.predict(norm_df_test)

    train_pred['train_test_split'] = "train"
    test_pred['train_test_split'] = "test"
    
    agglo_results = pd.concat([train_pred[['lcl_id','cluster','train_test_split']], test_pred[['lcl_id','cluster','train_test_split']]])
    agglo_results['cluster'] = agglo_results['cluster'].astype(float)
    agglo_results['num_clusters'] = cluster_num
    agglo_results['cluster_type'] = 'agglo'

    output_to_bq(agglo_results, 'clusters_20200739.clusters')
    print("Upload to BQ completed! 🎉")

Training 1 clusters....


1it [00:02,  2.84s/it]


Upload to BQ completed! 🎉
Training 2 clusters....


1it [00:03,  3.92s/it]


Upload to BQ completed! 🎉
Training 3 clusters....


1it [00:03,  3.44s/it]


Upload to BQ completed! 🎉
Training 4 clusters....


1it [00:04,  4.55s/it]


Upload to BQ completed! 🎉
Training 5 clusters....


1it [00:02,  2.76s/it]


Upload to BQ completed! 🎉
Training 6 clusters....


1it [00:03,  3.39s/it]


Upload to BQ completed! 🎉
Training 7 clusters....


1it [00:02,  2.81s/it]


Upload to BQ completed! 🎉
Training 8 clusters....


1it [00:06,  6.57s/it]


Upload to BQ completed! 🎉
Training 9 clusters....


1it [00:03,  3.48s/it]

Upload to BQ completed! 🎉





# **3️⃣ Forecasting**

### **Downloading Data from BQ**

In [None]:
%%bigquery --project machine-learning-msc df_train --use_bqstorage_api
SELECT 
train.lcl_id,
train.ts AS ds,
train.kwhh AS y,
weather.air_temperature

FROM `machine-learning-msc.forecasting_20200719.train_set` train 
LEFT JOIN `machine-learning-msc.london_heathrow_hourly_weather_data.london_heathrow_hourly_weather` weather 
  ON TIMESTAMP_TRUNC(weather.ts, HOUR) = TIMESTAMP_TRUNC(train.ts, hour)
  

WHERE train.ts >= '2012-11-01' AND train.ts < '2013-03-01'
ORDER BY 1,2 ASC

In [None]:
%%bigquery --project machine-learning-msc df_test --use_bqstorage_api
SELECT 
train.lcl_id,
train.ts AS ds,
train.kwhh AS y,
weather.air_temperature

FROM `machine-learning-msc.forecasting_20200719.test_set` train 
LEFT JOIN `machine-learning-msc.london_heathrow_hourly_weather_data.london_heathrow_hourly_weather` weather 
  ON TIMESTAMP_TRUNC(weather.ts, HOUR) = TIMESTAMP_TRUNC(train.ts, hour)
  

WHERE train.ts >= '2012-11-01' AND train.ts < '2013-03-01'
ORDER BY 1,2 ASC

In [None]:
df_train['ds'] = df_train['ds'].dt.tz_localize(None) # remove timezones 
df_test['ds'] = df_test['ds'].dt.tz_localize(None) # remove timezones 

print(df_train.shape, df_test.shape)

(15439123, 4) (5758620, 4)


In [None]:
%%bigquery --project machine-learning-msc clusters --use_bqstorage_api
SELECT * FROM `machine-learning-msc.clusters_20200739.clusters`

In [None]:
clusters.head()

Unnamed: 0,lcl_id,cluster,train_test_split,num_clusters,cluster_type
0,MAC000034,0.0,test,8,agglo
1,MAC004877,0.0,test,8,agglo
2,MAC004954,0.0,test,8,agglo
3,MAC004970,0.0,test,8,agglo
4,MAC005198,0.0,test,8,agglo


### **Utility Functions..**

In [None]:
def train_clusters(df_train, df_test, test_period="2013-02-01"):
    forecast_dict = {}
    test_global_fc = pd.DataFrame()
    train_global_fc = pd.DataFrame()
    clusters = df_train.groupby('cluster').count().index.to_list()
    for cluster in clusters:
        cluster_dict = {} 
        print(f"\nTraining cluster: {cluster}") 
        print("---------------------------")
        df_train_cluster = df_train.query(f"cluster=={cluster}").copy()
        df_test_cluster = df_test.query(f"cluster=={cluster}").copy()
        model = TrainProphet(test_period)
        model.fit(df_train_cluster)
        model.evaluate_test_global_mape(df_test_cluster)
        cluster_dict['model'] = model 
        forecast_dict[f'cluster_{cluster}']=cluster_dict
        test_global_fc = pd.concat([test_global_fc, model.test_forecast])

        train_forecast = df_train[['cluster','ds','y']].copy()
        train_forecast['max_households'] = df_train['households_num'].max()
        train_forecast = train_forecast.merge(model.forecast[['ds', 'yhat']], left_on='ds', right_on='ds')
        train_forecast['y_global'] = train_forecast['y'] * train_forecast['max_households']
        train_forecast['yhat_global'] = train_forecast['yhat'] * train_forecast['max_households']
        train_global_fc = pd.concat([train_global_fc, train_forecast])

    return forecast_dict, test_global_fc, train_global_fc

In [None]:
def get_timeseries(df, clusters_df, cluster_type, num_clusters):
    clusters = clusters_df.query(f"cluster_type=='{cluster_type}' and num_clusters=={num_clusters}")
    out_df = df.merge(clusters[['lcl_id','cluster']], left_on='lcl_id', right_on='lcl_id')
    households_num = pd.DataFrame(out_df.groupby('cluster')['lcl_id'].nunique())
    households_num.rename(columns={'lcl_id':'households_num'}, inplace=True)

    timeseries = out_df.groupby(['cluster','ds']).mean().reset_index()
    timeseries = timeseries.merge(households_num, left_on='cluster', right_on='cluster')
    return timeseries

In [None]:
def train_cluster_forecast(df_train, df_test, cluster_type, clusters, cluster_list):
    results_dict = {}
    
    for cluster in cluster_list:
        cluster_forecast_dict = {}
        print(f"\n ----------------------------------")
        print(f"|Total number of clusters: {cluster}...   |")
        print(f" ----------------------------------")

        train = get_timeseries(df_train, clusters, cluster_type=cluster_type, num_clusters=cluster)
        test = get_timeseries(df_test, clusters, cluster_type=cluster_type, num_clusters=cluster)

        model_dict, global_test, global_train = train_clusters(train, test)

        cluster_forecast_dict['model'] = model_dict
        cluster_forecast_dict['global_test'] = global_test
        cluster_forecast_dict['global_train'] = global_train 

        results_dict[f"num_clusters_{cluster}"] = cluster_forecast_dict 

    return results_dict

### **Training Forecasts**

In [None]:
cluster_list = [2,3,4,5,6,7,8]
agglo_results = train_cluster_forecast(df_train, df_test, 'agglo', clusters, cluster_list)
som_results = train_cluster_forecast(df_train, df_test, 'som', clusters, cluster_list)

 ----------------------------------
|Total number of clusters: 2...   |
 ----------------------------------

Training cluster: 0.0
---------------------------
Training Mean Absolute Percentage Error: 7.790572916666669
Test Mean Absolute Percentage Error: 8.23

Training cluster: 1.0
---------------------------
Training Mean Absolute Percentage Error: 8.77916666666666
Test Mean Absolute Percentage Error: 29.25
 ----------------------------------
|Total number of clusters: 3...   |
 ----------------------------------

Training cluster: 0.0
---------------------------
Training Mean Absolute Percentage Error: 8.569873511904763
Test Mean Absolute Percentage Error: 64.62

Training cluster: 1.0
---------------------------
Training Mean Absolute Percentage Error: 8.77916666666666
Test Mean Absolute Percentage Error: 156.81

Training cluster: 2.0
---------------------------
Training Mean Absolute Percentage Error: 7.447537202380951
Test Mean Absolute Percentage Error: 8.110000000000001
 --------

### **Evaluate Forecast**

In [None]:
train_global_results = agglo_results['num_clusters_2']['global_train'].groupby('ds')[['y_global','yhat_global']].sum()
test_global_results = agglo_results['num_clusters_2']['global_test'].groupby('ds')[['y_global','yhat_global']].sum()

train_global_mape = np.round(np.mean(np.abs(train_global_results['yhat_global']/train_global_results['y_global']-1)),4)*100
test_global_mape = np.round(np.mean(np.abs(test_global_results['yhat_global']/test_global_results['y_global']-1)),4)*100

print(train_global_mape, test_global_mape)

6.660000000000001 9.180000000000001


In [None]:
def evaluate_results(results_dict):
    for num_clusters in results_dict.keys():
        train_results = results_dict[num_clusters]['global_train'].groupby('ds')[['y_global','yhat_global']].sum()
        test_results = results_dict[num_clusters]['global_test'].groupby('ds')[['y_global', 'yhat_global']].sum() 

        train_global_mape = np.round(np.mean(np.abs(train_results['yhat_global']/train_results['y_global']-1)),4)*100
        test_global_mape = np.round(np.mean(np.abs(test_results['yhat_global']/test_results['y_global']-1)),4)*100
        results_dict[num_clusters]['train_global_mape'] = train_global_mape
        results_dict[num_clusters]['test_global_mape'] = test_global_mape

        print(f"Number of Clusters = {num_clusters}: Train Global MAPE: {train_global_mape}. Test Global MAPE: {test_global_mape}")

In [None]:
evaluate_results(agglo_results)

Number of Clusters = num_clusters_2: Train Global MAPE: 6.660000000000001. Test Global MAPE: 9.180000000000001
Number of Clusters = num_clusters_3: Train Global MAPE: 6.569999999999999. Test Global MAPE: 71.78
Number of Clusters = num_clusters_4: Train Global MAPE: 12.22. Test Global MAPE: 74.11
Number of Clusters = num_clusters_5: Train Global MAPE: 9.27. Test Global MAPE: 77.21000000000001
Number of Clusters = num_clusters_6: Train Global MAPE: 8.64. Test Global MAPE: 58.13
Number of Clusters = num_clusters_7: Train Global MAPE: 9.06. Test Global MAPE: 57.52
Number of Clusters = num_clusters_8: Train Global MAPE: 8.35. Test Global MAPE: 34.02


In [None]:
evaluate_results(som_results)

Number of Clusters = num_clusters_2: Train Global MAPE: 6.800000000000001. Test Global MAPE: 8.89
Number of Clusters = num_clusters_3: Train Global MAPE: 6.77. Test Global MAPE: 8.82
Number of Clusters = num_clusters_4: Train Global MAPE: 11.49. Test Global MAPE: 7.7299999999999995
Number of Clusters = num_clusters_5: Train Global MAPE: 10.22. Test Global MAPE: 7.55
Number of Clusters = num_clusters_6: Train Global MAPE: 8.92. Test Global MAPE: 7.66
Number of Clusters = num_clusters_7: Train Global MAPE: 8.67. Test Global MAPE: 7.8100000000000005
Number of Clusters = num_clusters_8: Train Global MAPE: 9.48. Test Global MAPE: 7.55


### **Saving results to BigQuery**

In [None]:
agglo_results['num_clusters_2']['global_test']

Unnamed: 0,cluster,ds,y,air_temperature,households_num,max_households,yhat,y_global,yhat_global
0,0.0,2013-02-01 00:00:00,0.187505,7.9,969,969,0.198027,181.692505,191.888040
1,0.0,2013-02-01 00:30:00,0.166509,7.9,969,969,0.181534,161.347509,175.906466
2,0.0,2013-02-01 01:00:00,0.143679,7.7,969,969,0.165928,139.224679,160.783830
3,0.0,2013-02-01 01:30:00,0.127416,7.7,969,969,0.150054,123.466416,145.402075
4,0.0,2013-02-01 02:00:00,0.115903,7.8,969,969,0.134279,112.309903,130.116335
...,...,...,...,...,...,...,...,...,...
1339,1.0,2013-02-28 21:30:00,0.921581,4.0,31,31,1.155914,28.569000,35.833333
1340,1.0,2013-02-28 22:00:00,0.983355,3.5,31,31,1.103999,30.484000,34.223980
1341,1.0,2013-02-28 22:30:00,0.887387,3.5,31,31,1.039425,27.509000,32.222166
1342,1.0,2013-02-28 23:00:00,0.832645,2.7,31,31,0.982447,25.812000,30.455861


In [None]:
for cluster in agglo_results:
    global_train = agglo_results[cluster]['global_train']
    global_test = agglo_results[cluster]['global_test']

    global_train['cluster_type'] = "agglo"
    global_train['num_clusters'] = cluster 
    global_test['cluster_type'] = "agglo"
    global_test['num_clusters'] = cluster

    output_to_bq(global_train, table_id='20200739.train_results')
    output_to_bq(global_test, table_id='20200739.test_results')
    print(f"{cluster} results uploaded to BQ!")


23040 out of 23040 rows loaded.

1it [00:09,  9.53s/it]

2688 out of 2688 rows loaded.
1it [00:03,  3.13s/it]

0it [00:00, ?it/s][A

num_clusters_2 results uploaded to BQ!


INFO:pandas_gbq.gbq:51840 out of 51840 rows loaded.

1it [00:05,  5.56s/it]

4032 out of 4032 rows loaded.
1it [00:06,  6.03s/it]


num_clusters_3 results uploaded to BQ!



92160 out of 92160 rows loaded.

1it [00:30, 30.35s/it]

5376 out of 5376 rows loaded.

1it [00:09,  9.15s/it]

0it [00:00, ?it/s][A

num_clusters_4 results uploaded to BQ!


INFO:pandas_gbq.gbq:144000 out of 144000 rows loaded.

1it [00:11, 11.29s/it]

6720 out of 6720 rows loaded.

1it [00:06,  6.26s/it]

0it [00:00, ?it/s][A

num_clusters_5 results uploaded to BQ!


INFO:pandas_gbq.gbq:207360 out of 207360 rows loaded.

1it [00:14, 14.86s/it]

8064 out of 8064 rows loaded.

1it [00:08,  8.93s/it]

0it [00:00, ?it/s][A

num_clusters_6 results uploaded to BQ!


INFO:pandas_gbq.gbq:282240 out of 282240 rows loaded.

1it [00:18, 18.65s/it]

9408 out of 9408 rows loaded.

1it [00:03,  3.69s/it]

0it [00:00, ?it/s][A

num_clusters_7 results uploaded to BQ!


INFO:pandas_gbq.gbq:368640 out of 368640 rows loaded.

1it [00:23, 23.29s/it]

10752 out of 10752 rows loaded.

1it [00:05,  5.98s/it]

num_clusters_8 results uploaded to BQ!





In [None]:
for cluster in som_results:
    global_train = som_results[cluster]['global_train']
    global_test = som_results[cluster]['global_test']

    global_train['cluster_type'] = "som"
    global_train['num_clusters'] = cluster 
    global_test['cluster_type'] = "som"
    global_test['num_clusters'] = cluster

    output_to_bq(global_train, table_id='20200739.train_results')
    output_to_bq(global_test, table_id='20200739.test_results')
    print(f"{cluster} results uploaded to BQ!")


23040 out of 23040 rows loaded.

1it [00:06,  6.20s/it]

2688 out of 2688 rows loaded.
1it [00:02,  2.86s/it]

0it [00:00, ?it/s][A

num_clusters_2 results uploaded to BQ!


INFO:pandas_gbq.gbq:51840 out of 51840 rows loaded.

1it [00:06,  6.21s/it]

4032 out of 4032 rows loaded.
1it [01:14, 74.73s/it]

0it [00:00, ?it/s][A

num_clusters_3 results uploaded to BQ!


INFO:pandas_gbq.gbq:92160 out of 92160 rows loaded.

1it [00:12, 12.72s/it]

5376 out of 5376 rows loaded.

1it [00:03,  3.18s/it]

0it [00:00, ?it/s][A

num_clusters_4 results uploaded to BQ!


INFO:pandas_gbq.gbq:144000 out of 144000 rows loaded.

1it [00:15, 15.02s/it]

6720 out of 6720 rows loaded.

1it [00:03,  3.89s/it]

0it [00:00, ?it/s][A

num_clusters_5 results uploaded to BQ!


INFO:pandas_gbq.gbq:207360 out of 207360 rows loaded.

1it [00:15, 15.21s/it]

8064 out of 8064 rows loaded.

1it [00:04,  4.43s/it]

0it [00:00, ?it/s][A

num_clusters_6 results uploaded to BQ!


INFO:pandas_gbq.gbq:282240 out of 282240 rows loaded.

1it [00:17, 17.13s/it]

9408 out of 9408 rows loaded.

1it [00:07,  7.48s/it]

0it [00:00, ?it/s][A

num_clusters_7 results uploaded to BQ!


INFO:pandas_gbq.gbq:368640 out of 368640 rows loaded.

1it [00:37, 37.04s/it]

10752 out of 10752 rows loaded.

1it [00:04,  4.96s/it]

num_clusters_8 results uploaded to BQ!



