In [1]:
import sqlalchemy as sqla
import sys
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import subprocess
#To avoid pandas warnings
import warnings
warnings.filterwarnings('ignore')
#import geometry_processor_dp
# This function is needed to make the geometries compatible with PostGIS
def wkb_hexer(poly):
    return poly.wkb_hex
from geopandas.tools import sjoin

## Connection to DB 2020

In [2]:
# Connect to database 2020
# Chose the DB
db_input_2020 = input ("What database should we use?\n\n")

# Create the sqlalchemy engine
db_engine_2020 = sqla.create_engine(db_input_2020) #is taken by pd.read_sql_query function later on

# Test connection
try:
    db_connection = db_engine_2020.connect()
except Exception as exc:
    print("\nCouldn't connect to the database\n")
    try:
        print(exc.message, exc.args)
    except:
        pass
    sys.exit()
        
# If everything goes well, just be happy about it!
    print("\nHooray! We connected to the database!")

## Connection to DB 2021

In [3]:
# Chose the DB
db_input_2021 = input ("What database should we use?\n\n")

# Create the sqlalchemy engine
db_engine_2021 = sqla.create_engine(db_input_2021) #is taken by pd.read_sql_query function later on

# Test connection
try:
    db_connection = db_engine_2021.connect()
except Exception as exc:
    print("\nCouldn't connect to the database\n")
    try:
        print(exc.message, exc.args)
    except:
        pass
    sys.exit()
        
# If everything goes well, just be happy about it!
    print("\nHooray! We connected to the database!")

# Link SATOM data to CityGML (EnergyADE structure)
Need to find a link ogr_fid to gmlid


## Use merged_egid_intersect 
Contains already ogr to gmlid link

In [4]:
merged_egid_intersect = pd.read_sql_query("SELECT gmlid, ogr_fid, EGID FROM city.merged_egid_intersect", db_engine_2021)

In [5]:
ogr_to_gmlid = merged_egid_intersect.drop_duplicates(subset=["gmlid"]).drop(["egid"], axis=1)
ogr_to_gmlid

# One ogr_fid points to several gmlid, as they are elements of geometry belonging to the building

Unnamed: 0,gmlid,ogr_fid
0,_1A5EAF11-F74F-4297-9D23-7CC82254E52D,2981.0
1,_ffda891f-392a-4d14-b34b-aa726c7484fa,2981.0
2,UUID_d77e711e-5d75-4e5c-96cd-8147ca0b0d3c,2981.0
3,UUID_6c58654b-25d9-40f5-9a0f-6fc34ad24f4d,2981.0
4,UUID_4bb137ba-573c-4bf3-bd9a-d983f7c4d36c,2981.0
...,...,...
67615,_D0178B39-7E6E-439E-BD14-4424BECBBFFB,673.0
67616,UUID_62723213-f32c-4687-a538-694faddc1769,3262.0
67619,UUID_6b2d3181-fd76-4cd3-ace0-00ff0ae78d7b,673.0
67620,UUID_d39d9fd0-868f-40f6-afce-b488ed49ca7f,673.0


# TABLES TO CREATE

### nrg8_time_series
- objectclass_id: 202 (regular time series), 203 (irregular time series), 204 (regular timeseries file), 205 (irregular timeseries file)
- gmlid: FK
- values array: array of values for regular time series
- time interval: interval between values in array
- acquisition method: simulation, estimation, measurement...


### Gather energy data in satom

In [6]:
tblcliendatathistory = pd.read_sql_query("SELECT sdbus, datetime_int, datetime_time, cnt1_energy FROM satom.tblcliendatathistory", db_engine_2020)
tblclient = pd.read_sql_query("SELECT sdbus, batiment FROM satom.tblclient", db_engine_2020)
batiments = pd.read_sql_query("SELECT ogr_fid, client, numbat FROM satom.batiments", db_engine_2020)
mega =  pd.read_sql_query("SELECT megabatiment, meganumbat FROM satom.mega", db_engine_2020)

In [7]:
energy_data = pd.merge(tblcliendatathistory, tblclient, on="sdbus", how="left")
energy_data = pd.merge(energy_data, batiments, left_on="batiment", right_on="client", how="left")
energy_data = pd.merge(energy_data, mega, left_on="numbat", right_on="meganumbat", how="left")

In [8]:
energy_data

Unnamed: 0,sdbus,datetime_int,datetime_time,cnt1_energy,batiment,ogr_fid,client,numbat,megabatiment,meganumbat
0,270,1532016560,2018-07-19 18:09:20,950360.0,M3502B,1273.0,M3502B,4335.0,M3502B,4335.0
1,271,1532016575,2018-07-19 18:09:35,695300.0,M0965,915.0,M0965,749.0,CM3950,749.0
2,271,1532016575,2018-07-19 18:09:35,695300.0,M0965,915.0,M0965,749.0,M0965,749.0
3,272,1532016592,2018-07-19 18:09:52,408672.0,M4518,1563.0,M4518,2960.0,CM1105,2960.0
4,272,1532016592,2018-07-19 18:09:52,408672.0,M4518,1563.0,M4518,2960.0,M4518,2960.0
...,...,...,...,...,...,...,...,...,...,...
14966003,395,1583771840,2020-03-09 17:37:20,223168.0,M0152,1242.0,M0152,137.0,,
14966004,396,1583771856,2020-03-09 17:37:36,114565.0,M0132,1243.0,M0132,117.0,M0132,117.0
14966005,396,1583771856,2020-03-09 17:37:36,114565.0,M0132,1244.0,M0132,115.0,,
14966006,397,1583771874,2020-03-09 17:37:54,18592.0,M0974,1306.0,M0974,762.0,CM2953,762.0


In [9]:
# Store values into array for a certain ogr_fid

datetime_arrays = energy_data.groupby('ogr_fid')['datetime_time'].apply(list)
datetime_arrays = pd.DataFrame(datetime_arrays)
cnt1_arrays = energy_data.groupby('ogr_fid')['cnt1_energy'].apply(list)
cnt1_arrays = pd.DataFrame(cnt1_arrays) # Has cumulative values

In [10]:
datetime_arrays

Unnamed: 0_level_0,datetime_time
ogr_fid,Unnamed: 1_level_1
2.0,"[2019-11-05 10:05:12, 2019-11-05 10:10:18, 201..."
4.0,"[2018-07-19 20:03:42, 2018-07-19 20:48:26, 201..."
6.0,"[2018-07-19 19:40:42, 2018-07-20 01:40:31, 201..."
7.0,"[2018-07-19 19:40:42, 2018-07-20 01:40:31, 201..."
8.0,"[2018-07-19 19:40:42, 2018-07-20 01:40:31, 201..."
...,...
7823.0,"[2018-07-19 20:08:36, 2018-07-19 20:50:49, 201..."
7824.0,"[2018-07-19 20:08:36, 2018-07-19 20:50:49, 201..."
7825.0,"[2018-07-19 20:08:36, 2018-07-19 20:50:49, 201..."
7826.0,"[2018-07-19 20:08:36, 2018-07-19 20:50:49, 201..."


In [11]:
# Expand list to columns
cnt1_expanded = cnt1_arrays["cnt1_energy"].apply(pd.Series)
cnt1_expanded

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
ogr_fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2.0,0.0,0.0,2894.0,2894.0,2894.0,2894.0,2994.0,2994.0,2994.0,3101.0,...,,,,,,,,,,
4.0,741951.0,741951.0,741965.0,741965.0,741973.0,741973.0,741984.0,741993.0,742003.0,742007.0,...,,,,,,,,,,
6.0,1146370.0,1146370.0,1146370.0,1148620.0,1149010.0,1149010.0,1149010.0,1149010.0,1149510.0,1149740.0,...,,,,,,,,,,
7.0,1146370.0,1146370.0,1146370.0,1148620.0,1149010.0,1149010.0,1149010.0,1149010.0,1149510.0,1149740.0,...,,,,,,,,,,
8.0,1146370.0,1146370.0,1146370.0,1148620.0,1149010.0,1149010.0,1149010.0,1149010.0,1149510.0,1149740.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7823.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7824.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7825.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7826.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [12]:
# Difference between consecutive columns to see increase in consumption
cnt1_diff = cnt1_expanded.diff(periods=1, axis=1)
cnt1_diff

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
ogr_fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2.0,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
4.0,,0.0,14.0,0.0,8.0,0.0,11.0,9.0,10.0,4.0,...,,,,,,,,,,
6.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
7.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
8.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7823.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7824.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7825.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7826.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [None]:
# Rewrite rows to list and store in dataframe
list_energy = cnt1_diff.values.tolist()

cnt_df = []
cnt_df.append(list_energy)
cnt_df = pd.DataFrame(cnt_df).transpose()
cnt_df["ogr_fid"] = cnt1_arrays.index

In [None]:
cnt_df

In [None]:
# Create table nrg8_time_series

nrg8_time_series = {"id": [], "objectclass_id": [],"gmlid": [] ,"values_unit": [] ,"time_interval": [] , "time_interval_unit": [], "acquisition_method":[], "interpolation_type":[]}
nrg8_time_series = pd.DataFrame(nrg8_time_series)
nrg8_time_series["gmlid"] = ogr_to_gmlid["gmlid"]
nrg8_time_series["ogr_fid"] = ogr_to_gmlid["ogr_fid"] #add temporarily to insert energy data
nrg8_time_series = nrg8_time_series.merge(datetime_arrays, on="ogr_fid", how="left")
nrg8_time_series = nrg8_time_series.merge(cnt_df, on="ogr_fid", how="left")

row_nbr = len(nrg8_time_series["gmlid"])
nrg8_time_series["objectclass_id"] = pd.DataFrame(np.repeat(203, row_nbr)) #irregular time series
nrg8_time_series["id"] = pd.DataFrame(np.arange(row_nbr))
nrg8_time_series["acquisition_method"] = pd.DataFrame(np.repeat('Measurement', row_nbr))
nrg8_time_series["interpolation_type"] = pd.DataFrame(np.repeat('AverageInSucceedingInterval', row_nbr)) #to change
nrg8_time_series = nrg8_time_series.rename(columns= {0:'values_array'})

nrg8_time_series["values_unit"] = pd.DataFrame(np.repeat('kWh', row_nbr))

nrg8_time_series.head()

In [None]:
nrg8_time_series = nrg8_time_series.drop(["ogr_fid"], axis=1).rename(columns={'datetime_time':'time_array'})

# 2019 yearly value

In [13]:
# Expand time list to columns --> we want to see 2019 timestamps
time_expanded = datetime_arrays["datetime_time"].apply(pd.Series)

In [14]:
cnt1_diff

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
ogr_fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2.0,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
4.0,,0.0,14.0,0.0,8.0,0.0,11.0,9.0,10.0,4.0,...,,,,,,,,,,
6.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
7.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
8.0,,0.0,0.0,2250.0,390.0,0.0,0.0,0.0,500.0,230.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7823.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7824.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7825.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7826.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [15]:
# Keep only where not NaT (in column) or not NaN (in column)
time_expanded = time_expanded.iloc[:,238:25267]
cnt1_diff = cnt1_diff.iloc[:,238:25267]

In [15]:
# Fix index: put gmlid as index
time_expanded = time_expanded.reset_index().rename(columns={"index": "ogr_fid"})
time_expanded = pd.merge(time_expanded, ogr_to_gmlid, on='ogr_fid', how="left")
new_idx = time_expanded["gmlid"]
time_expanded = time_expanded.rename(index=new_idx).drop(["ogr_fid", "gmlid"], axis=1)


In [16]:
cnt1_diff = cnt1_diff.reset_index().rename(columns={"index": "ogr_fid"})
cnt1_diff = pd.merge(cnt1_diff, ogr_to_gmlid, on='ogr_fid', how="left")
new_idx = cnt1_diff["gmlid"]
cnt1_diff = cnt1_diff.rename(index=new_idx).drop(["ogr_fid", "gmlid"], axis=1)

In [17]:
time_expanded

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
_b0d8c8ac-0fe4-46d1-80b0-053faccb06de,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_5f22b47b-766e-4ea7-bc56-3d637f1426f2,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_d502ee79-7fd7-4016-838e-694d25c1027b,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_b1732c35-42a5-4b73-93c7-af630c2882f3,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
_61AD2D6C-1798-450F-B2EC-B5CE01BAAA26,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
_9594BC65-E99B-4179-9FC5-E131AA899E11,2018-07-19 20:08:36,2018-07-19 20:50:49,2018-07-20 02:04:32,2018-07-20 04:55:06,2018-07-20 08:05:25,2018-07-20 09:10:56,2018-07-20 10:53:28,2018-07-20 14:54:47,2018-07-20 19:12:25,2018-07-20 23:14:41,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_907a7230-3aa9-46c3-bf4d-b24151fa567b,2018-07-19 20:08:36,2018-07-19 20:50:49,2018-07-20 02:04:32,2018-07-20 04:55:06,2018-07-20 08:05:25,2018-07-20 09:10:56,2018-07-20 10:53:28,2018-07-20 14:54:47,2018-07-20 19:12:25,2018-07-20 23:14:41,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_37aa1b36-59c6-4b91-818a-921a1763c682,2018-07-19 20:08:36,2018-07-19 20:50:49,2018-07-20 02:04:32,2018-07-20 04:55:06,2018-07-20 08:05:25,2018-07-20 09:10:56,2018-07-20 10:53:28,2018-07-20 14:54:47,2018-07-20 19:12:25,2018-07-20 23:14:41,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_2d93b1b0-4e6f-4567-adf5-cd1b3249b798,2018-07-19 20:08:36,2018-07-19 20:50:49,2018-07-20 02:04:32,2018-07-20 04:55:06,2018-07-20 08:05:25,2018-07-20 09:10:56,2018-07-20 10:53:28,2018-07-20 14:54:47,2018-07-20 19:12:25,2018-07-20 23:14:41,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT


In [18]:
dates_2019 = time_expanded[(time_expanded > pd.Timestamp(2018, 12, 31, 23,59,59)) & (time_expanded < pd.Timestamp(2020, 1, 1, 0,0,0))]
dates_2019

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
_b0d8c8ac-0fe4-46d1-80b0-053faccb06de,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_5f22b47b-766e-4ea7-bc56-3d637f1426f2,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_d502ee79-7fd7-4016-838e-694d25c1027b,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_b1732c35-42a5-4b73-93c7-af630c2882f3,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
_61AD2D6C-1798-450F-B2EC-B5CE01BAAA26,2019-11-05 10:05:12,2019-11-05 10:10:18,2019-11-05 10:14:53,2019-11-05 13:28:06,2019-11-05 15:28:58,2019-11-05 19:29:18,2019-11-05 23:00:16,2019-11-06 03:30:23,2019-11-06 07:01:19,2019-11-06 17:01:26,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
_9594BC65-E99B-4179-9FC5-E131AA899E11,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_907a7230-3aa9-46c3-bf4d-b24151fa567b,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_37aa1b36-59c6-4b91-818a-921a1763c682,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
UUID_2d93b1b0-4e6f-4567-adf5-cd1b3249b798,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT


In [19]:
cnt1_2019 = cnt1_diff[(time_expanded > pd.Timestamp(2018, 12, 31, 23,59,59)) & (time_expanded < pd.Timestamp(2020, 1, 1, 0,0,0))]
cnt1_2019

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26048,26049,26050,26051,26052,26053,26054,26055,26056,26057
_b0d8c8ac-0fe4-46d1-80b0-053faccb06de,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
UUID_5f22b47b-766e-4ea7-bc56-3d637f1426f2,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
UUID_d502ee79-7fd7-4016-838e-694d25c1027b,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
UUID_b1732c35-42a5-4b73-93c7-af630c2882f3,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
_61AD2D6C-1798-450F-B2EC-B5CE01BAAA26,,0.0,2894.0,0.0,0.0,0.0,100.0,0.0,0.0,107.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
_9594BC65-E99B-4179-9FC5-E131AA899E11,,,,,,,,,,,...,,,,,,,,,,
UUID_907a7230-3aa9-46c3-bf4d-b24151fa567b,,,,,,,,,,,...,,,,,,,,,,
UUID_37aa1b36-59c6-4b91-818a-921a1763c682,,,,,,,,,,,...,,,,,,,,,,
UUID_2d93b1b0-4e6f-4567-adf5-cd1b3249b798,,,,,,,,,,,...,,,,,,,,,,


In [20]:
# Total consumption per year
cnt1_yearly = cnt1_2019.sum(axis=1, skipna=True)

"""Save the total energy per egid for future use"""
#gmlid_to_egid = merged_egid_intersect.drop_duplicates(subset=["gmlid"]).drop(["ogr_to_fid"], axis=1)
#energy_egid = pd.merge(yearly_energy, gmlid_to_egid, left_index=True, right_on="gmlid").drop_duplicates(["egid"], keep='first')
#energy_egid.to_csv("annual_output.csv")

'Save the total energy per egid for future use'

In [21]:
# The tot_energy needs to be typ numeric[] (list) in DB, so convert
cnt1_yearly = pd.DataFrame(cnt1_yearly)
cnt1_yearly = cnt1_yearly.reset_index()
list_values = list(map(lambda el:[el], cnt1_yearly.iloc[:,0].values.tolist()))
energy_list = []
energy_list.append(list_values)
energy_list = pd.DataFrame(energy_list).transpose()
energy_list['energy'] = cnt1_yearly.iloc[:,1]
energy_list = energy_list.rename(columns={0:'gmlid'})

In [30]:
energy_list 

Unnamed: 0,gmlid,energy
0,[_b0d8c8ac-0fe4-46d1-80b0-053faccb06de],19593.0
1,[UUID_5f22b47b-766e-4ea7-bc56-3d637f1426f2],19593.0
2,[UUID_d502ee79-7fd7-4016-838e-694d25c1027b],19593.0
3,[UUID_b1732c35-42a5-4b73-93c7-af630c2882f3],19593.0
4,[_61AD2D6C-1798-450F-B2EC-B5CE01BAAA26],19593.0
...,...,...
43017,[_9594BC65-E99B-4179-9FC5-E131AA899E11],0.0
43018,[UUID_907a7230-3aa9-46c3-bf4d-b24151fa567b],0.0
43019,[UUID_37aa1b36-59c6-4b91-818a-921a1763c682],0.0
43020,[UUID_2d93b1b0-4e6f-4567-adf5-cd1b3249b798],0.0


In [32]:
# Create table to add to annual_values for nrg8_time_serie
annual_values =  {"id": [], "objectclass_id": [],"gmlid": [] ,"values_array":[], "values_unit": [] ,"time_interval": [] , "time_interval_unit": [], "acquisition_method":[], "interpolation_type":[]}
annual_values = pd.DataFrame(annual_values)
annual_values["gmlid"] = energy_list["gmlid"]

row_nbr_start = pd.read_sql_query("SELECT COUNT(id) FROM citydb.nrg8_time_series", db_engine_2021) #Last line in current table in DB
row_nbr = len(annual_values["gmlid"]) 
row_nbr_stop = row_nbr_start + 1 + row_nbr 

annual_values["objectclass_id"] = pd.DataFrame(np.repeat(202, row_nbr)) #regular time series
annual_values["id"] = pd.DataFrame(np.arange(row_nbr_start.iloc[0,0] + 1, row_nbr_stop.iloc[0,0])) 
annual_values["acquisition_method"] = pd.DataFrame(np.repeat('Measurement', row_nbr))

annual_values["values_array"] = energy_list['energy'] #CHANGE
annual_values["values_unit"] = pd.DataFrame(np.repeat('kWh', row_nbr)) #check if this value is correct 
annual_values["time_interval"] = pd.DataFrame(np.repeat(1, row_nbr))
annual_values["time_interval_unit"] = pd.DataFrame(np.repeat('year', row_nbr))
annual_values["interpolation_type"] = pd.DataFrame(np.repeat('PrecedingTotal', row_nbr)) #to check

annual_values

Unnamed: 0,id,objectclass_id,gmlid,values_array,values_unit,time_interval,time_interval_unit,acquisition_method,interpolation_type
0,67011,202,[_b0d8c8ac-0fe4-46d1-80b0-053faccb06de],19593.0,kWh,1,year,Measurement,PrecedingTotal
1,67012,202,[UUID_5f22b47b-766e-4ea7-bc56-3d637f1426f2],19593.0,kWh,1,year,Measurement,PrecedingTotal
2,67013,202,[UUID_d502ee79-7fd7-4016-838e-694d25c1027b],19593.0,kWh,1,year,Measurement,PrecedingTotal
3,67014,202,[UUID_b1732c35-42a5-4b73-93c7-af630c2882f3],19593.0,kWh,1,year,Measurement,PrecedingTotal
4,67015,202,[_61AD2D6C-1798-450F-B2EC-B5CE01BAAA26],19593.0,kWh,1,year,Measurement,PrecedingTotal
...,...,...,...,...,...,...,...,...,...
43017,110028,202,[_9594BC65-E99B-4179-9FC5-E131AA899E11],0.0,kWh,1,year,Measurement,PrecedingTotal
43018,110029,202,[UUID_907a7230-3aa9-46c3-bf4d-b24151fa567b],0.0,kWh,1,year,Measurement,PrecedingTotal
43019,110030,202,[UUID_37aa1b36-59c6-4b91-818a-921a1763c682],0.0,kWh,1,year,Measurement,PrecedingTotal
43020,110031,202,[UUID_2d93b1b0-4e6f-4567-adf5-cd1b3249b798],0.0,kWh,1,year,Measurement,PrecedingTotal


## Insert into database

In [None]:
nrg8_time_series.to_sql(name='nrg8_time_series', con = db_engine_2021, if_exists='append', index=False)

In [None]:
db_connection.close()

In [None]:
annual_values.to_sql(name='nrg8_time_series', con = db_engine_2021, if_exists='append', index=False)
db_connection.close()