In [1]:
import pandas as pd
import argparse
import logging
import json
import os
import ast
import pathlib
import fiona
import numpy as np
import geopandas as gpd
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
from geopandas.tools import geocode
from shapely.geometry import Point
import matplotlib.pyplot as plt

In [2]:
# Define constants

BELLINZONA_UST = 2101
BLENIO_UST = 2102
LEVENTINA_UST = 2103
LOCARNO_UST = 2104
LUGANO_UST = 2105
MENDRISIO_UST = 2106
RIVIERA_UST = 2107
VALLEMAGGIA = 2108

### ODIS ### 
# AVG_RES_LIGHT_DENSITY = 8.3 # kWh/m2y ODIS
# AVG_RES_APP =  1400 # kWh/y ODIS, 

### ROHDATEN Tarife 2020 ELCOM ###
# H1 - 1600 kWh/an: logement de 2 pièces avec cuisinière électrique
# H2 - 2500 kWh/an: logement de 4 pièces avec cuisinière électrique; 
# H4 - 4500 kWh/an: logement de 5 pièces avec cuisinière électrique et sèche-linge (sans chauffe-eau électrique) 
# H8 - 7500 kWh/an: grand logement en propriété, avec large utilisation de l'électricité 
# C1 - 8000 kWh/an: très petite entreprise, puissance max.: 8 kW 
# C2 - 30 000 kWh/an: petite entreprise, puissance max.: 15 kW 
# C3 - 150 000 kWh/an: entreprise moyenne, puissance max.: 50 kW
# C4 - 500 000 kWh/an: grande entreprise, puissance max.: 150 kW, courant basse tension / C5 - courant moyenne tension, propre station de transformation 
# C6 - 1 500 000 kWh/an: grande entreprise, puissance max.: 400 kW, courant moyenne tension, propre station de transformation 
# C7 - 7 500 000 kWh/an: grande entreprise, puissance max.: 1630 kW, courant moyenne tension, propre station de transformation 

### Consumi di energia (in gigawattora), secondo la destinazione e il vettore energetico, in Ticino, nel 2020
S1_AGR_elec = 64000 # kWh/y
S2_IND_elec = 160000 #kWh/y
S3_SER_elec = 25000 #kWh/y

### https://pubdb.bfe.admin.ch/it/publication/download/10559 ###
# MFH_2pp_elec = 2190 # kWh/y =  apps and light
# MFH_1pp_elec_diff = 458.5 # kWh/y =  apps and light
# SFH_4pp_elec = 4048 # kWh/y =  apps and light
# SFH_1pp_elec_diff = 593.5 # kWh/y =  apps and light
# --> MFH 2PP = 2190
# --> SHF 3PP = 3454.5

AVG_DHW_PP = 40 # L/DAY/PERSON ODIS
AVG_M2_PP = 49.1 #TI - Superficie media per occupante, per Cantone, 2021, https://www.bfs.admin.ch/bfs/it/home/statistiche/costruzioni-abitazioni/abitazioni/condizioni-abitazione/superficie-persona.html
SUPERFICIE_MEDIA = 97.8 #https://www.bfs.admin.ch/bfs/it/home/statistiche/costruzioni-abitazioni/abitazioni/dimensioni.html

MFH_PP = 2.1 #https://www.bfs.admin.ch/bfs/it/home/statistiche/costruzioni-abitazioni/abitazioni/condizioni-abitazione/densita-utilizzazione.html
SFH_PP = 2.7

GKLAS = [1110,1121,1122,1130, 1211, 1212, 1220, 1230, 1251, 1261, 1262, 1263, 1264, 1265, 1272, 1241, 1242, 1271, 1274, 1275, 1276, 1277, 1278, 1231, 1252, 1273]

GKLAS_SFH = 1110
GKLAS_DFH = 1121
GKLAS_MFH = 1122
GKLAS_RES = [GKLAS_SFH, GKLAS_DFH, GKLAS_MFH]

GKLAS_HTG = [1110, 1121, 1122, 1130, 1211, 1212, 1220, 1230, 1251, 1261, 1262, 1263, 1264, 1265, 1272, 1231]
GKLAS_NOHTG = [1241, 1242, 1252, 1271, 1273, 1274, 1275, 1276, 1277, 1278]

GBAUP = [8011,8012,8013,8014,8015,8016,8017,8018,8019,8020,8021,8022,8023]
GSTAT = 1004 # existing
   
pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'

#SELECT CANTON
#CANTON = "Genève"
CANTON = "Ticino"

# Read the shp file and decode the Geopandas dataframe using the Swiss coordinates (epsg code: 2056)
SWISSTOPO_DISTRICT_FILE = "swissboundaries3d_2023-01_2056_5728.shp/swissBOUNDARIES3D_1_4_TLM_BEZIRKSGEBIET.shp" #shapefile downloaded from https://www.swisstopo.admin.ch/fr/geodata/landscape/boundaries3d.html
SWISSTOPO_CANTON_FILE = "swissboundaries3d_2023-01_2056_5728.shp/swissBOUNDARIES3D_1_4_TLM_KANTONSGEBIET.shp" #shapefile downloaded from https://www.swisstopo.admin.ch/fr/geodata/landscape/boundaries3d.html. NOTE: The actual shapefile (.shp) is useless without the companion files: .dbf, .shx, .prj etc..
### SWISSTOPO_CANTON_FILE = figureout how to get the municipal boundaries https://api3.geo.admin.ch/rest/services/api/MapServer/ch.swisstopo.swissboundaries3d-gemeinde-flaeche.fill

CANTON_FILE_PATH = MAP_DIRECTORY / SWISSTOPO_CANTON_FILE
DISTRICT_FILE_PATH = MAP_DIRECTORY / SWISSTOPO_DISTRICT_FILE

CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

### AGR, IND, SER

In [3]:
# Calculate the electric demand for agriculture, industry, and services - top down - CODE WORKS

### Consumi di energia (in gigawattora), secondo la destinazione e il vettore energetico, in Ticino, nel 2020
S1_AGR_elec = 61640 # kWh/y = consumo "Altro (cantieri, agricoltura ecc.)" 2020 / Aziende primario 2020
S2_IND_elec = 153650 #kWh/y consumo "Artigianato e industria"/ Aziende secondario 2020
S3_SER_elec = 22180 #kWh/y consumo "Commercio e servizi" /Aziende terzario 2020

fileDir = DATA_DIRECTORY / "support_data" 
data_s123 = pd.read_csv(fileDir/"S1_S2_S3.csv" , header=0, index_col=0)

S123 = pd.DataFrame()

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

for dis_num in UST_district:
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    dis_S1_tot_elec = data_s123.loc[DISTRICT, "S1_Aziende"]*S1_AGR_elec
    dis_S2_tot_elec = data_s123.loc[DISTRICT, "S2_Aziende"]*S2_IND_elec
    dis_S3_tot_elec = data_s123.loc[DISTRICT, "S3_Aziende"]*S3_SER_elec
    
    tot_elec = pd.DataFrame(np.array([[dis_S1_tot_elec], [dis_S2_tot_elec], [dis_S3_tot_elec]]),  columns = [DISTRICT], index = [ "S1_AGR", "S2_IND", "S3_SER"]) 
    
    S123 = pd.concat([S123,tot_elec], axis = 1) #,ignore_index=True,
S123["Total"] = S123.sum(axis=1)
S123

Unnamed: 0,Bellinzona,Blenio,Leventina,Locarno,Lugano,Mendrisio,Riviera,Vallemaggia,Total
S1_AGR,10725360,7643360,7088600,12081440,15718200,11465040,2835440,4438080,71995520
S2_IND,94494750,16286900,23047500,155032850,348478200,156569350,25659550,24430350,843999450
S3_SER,86080580,7119780,11666680,118707360,362421200,123054640,12709140,8716740,730476120


### RESIDENTIAL

In [4]:
def get_hh_MFH():

    pathlib.Path()
    NOTEBOOK_PATH = pathlib.Path().resolve()
    p = NOTEBOOK_PATH.parent
    DATA_DIRECTORY = p / "data"
    MAP_DIRECTORY = DATA_DIRECTORY /'maps'
    BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
    CANTON = "Ticino"
    CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

    REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
           'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
           'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
           'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
           'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
           'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

    UST_file = "UST_comune.csv"
    canton ="TI"
    UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
    UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
    UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

    TI_hh = pd.DataFrame()
    district_hh = pd.DataFrame()
    ewid_count = pd.DataFrame()


    AVG_HH_ELEC_TI = 2741 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
    UNOCC = 0.0283 # should be around 0.0283 for TI

    #dis_num = BELLINZONA_UST
    #com_num = 5001

    for dis_num in UST_district:
        communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]

        for com_num in communes:
            #print("comune: ", com_num)

            fileloc =  CANTON_CSV_DIRECTORY

            filename_gdf = "raw-gdf-"+str(com_num)+".csv"
            filename_data = "DATA_"+str(com_num)+".csv"

            if pathlib.Path(fileloc/filename_gdf).exists():
                data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
                #data_res = data_res_dirty.drop_duplicates(keep="first")
            elif pathlib.Path(fileloc/filename_data).exists():
                data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
                #data_res = data_res_dirty.drop_duplicates(keep="first")

            data_res = data_res.loc[data_res["gstat"] == 1004]
            data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
            data_res = data_res[REA_CODES_DESIRED_LC]

            data_res["gklas_fill"] =  data_res["gklas"]
            data_res = data_res.reset_index(drop=True)

            for g in range(len(data_res)):
                 if (np.isnan(data_res.at[g,"gklas"])):
                    if (data_res.at[g,"gkat"] == 1020):
                        data_res.loc[g,"gklas_fill"] = 1110
                    elif (data_res.at[g,"gkat"] == 1030):
                        data_res.loc[g,"gklas_fill"] = 1122

            data_res["egid_duplicate"] = data_res.duplicated(subset = ["egid"], keep = False)

            no_nan = data_res.dropna(subset = ["ewid"])
            no_nan = no_nan.reset_index(drop=True) #,, inplace=True

            for g in range(len(no_nan)):
                ewid_count.at[g,"ewid"] = ast.literal_eval(no_nan.at[g,"ewid"]) 

            no_nan["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

            duplicates = no_nan.loc[no_nan["egid_duplicate"] == True]
            group = duplicates.groupby("egid")["ewid_len"].sum()
            group = pd.DataFrame(group)

            # Replace ewid_len values in DataFrame A based on DataFrame B
            no_nan['ewid_len'] = no_nan.apply(lambda row: group.loc[row['egid'], 'ewid_len'] if row['egid'] in group.index else row['ewid_len'], axis=1)
            no_nan.drop_duplicates(subset =["egid"], keep="first", inplace = True)

            district_hh = pd.concat([district_hh,no_nan])

        abi_num = district_hh [["egid","gklas_fill","ewid_len"]]
        abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
        abi_num = abi_num.rename(columns={"egid":"count"})
        abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
        med = pd.DataFrame(abi_num).reset_index(level="number of households")
        med["TOT"] = med["number of households"]*med["count"]
        MFH = med.loc[1122.0]
        MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()

        DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
        hh = pd.DataFrame([MFH_avghh], index=["MFH_avg_households"], columns=[DISTRICT])

        TI_hh = pd.concat([TI_hh,hh], axis=1)
        district_hh = district_hh[0:0]

    TI_hh["TI"] = TI_hh.mean(axis=1)
    return TI_hh

In [5]:
def generate_number_list(num):
    number_list = [str(i) for i in range(1, num + 1)]
    return number_list

In [6]:
def get_hh_stats():
    pathlib.Path()
    NOTEBOOK_PATH = pathlib.Path().resolve()
    p = NOTEBOOK_PATH.parent
    DATA_DIRECTORY = p / "data"
    MAP_DIRECTORY = DATA_DIRECTORY /'maps'
    BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'

    #SELECT CANTON
    #CANTON = "Genève"
    CANTON = "Ticino"

    CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON


    UST_file = "UST_comune.csv"
    canton ="TI"
    UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
    UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
    UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

    sup_file = DATA_DIRECTORY / "support_data" 

    data_households_2020 = pd.read_excel(sup_file/"household_inventory_2020-10_2056.xlsx", sheet_name="ZWG_2020_Q3" , header=0, index_col=0)
    data_households_2023 = pd.read_excel(sup_file/"household_inventory_2023-03_2056.xlsx", sheet_name="ZWG_2023" , header=0, index_col=0)
    
    ## GET ratios for primary households and difference between 2023 and 2020 - WORKS!

    TI_hh_2020 = pd.DataFrame()
    district_hh_2020 = pd.DataFrame()
    district_hh_2023 = pd.DataFrame()

    for dis_num in UST_district:
        communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Nome del Comune"]    
        DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])

        for com_name in communes:
            hh_2020 = data_households_2020.loc[data_households_2020["Name"] == com_name]#
            hh_2020 = hh_2020.reset_index()

            hh_2023 = data_households_2023.loc[data_households_2023["Name"] == com_name]#
            hh_2023 = hh_2023.reset_index()

            district_hh_2020 = pd.concat([district_hh_2020, hh_2020])
            district_hh_2023 = pd.concat([district_hh_2023, hh_2023])

        dis_tot_hh_2020 = district_hh_2020["ZWG_3150"].sum()
        dis_tot_primary_2020 = district_hh_2020["ZWG_3010"].sum() + district_hh_2020["ZWG_3100"].sum()

        calc_dist_secondary_2020 = round(((dis_tot_hh_2020 - dis_tot_primary_2020)/dis_tot_hh_2020),3)

        ratio_secondary_mean_2020 = district_hh_2020["ZWG_3120"].mean()
        ratio_secondary_median_2020 = district_hh_2020["ZWG_3120"].median()

        dis_tot_hh_2023 = district_hh_2023["ZWG_3150"].sum()
        dis_tot_primary_2023 = district_hh_2023["ZWG_3010"].sum() + district_hh_2023["ZWG_3100"].sum()

        hh_diff = round((dis_tot_hh_2020/dis_tot_hh_2023),3)

        d_hh = pd.DataFrame([calc_dist_secondary_2020, hh_diff], index=["hh2020_p_ratio", "hh2023_diff"], columns=[DISTRICT])

        TI_hh_2020 = pd.concat([TI_hh_2020,d_hh], axis=1)
        district_hh_2020 = district_hh_2020[0:0]
        district_hh_2023 = district_hh_2023[0:0]

    #print("calc = ", calc_dist_secondary_2020, "mean = ", ratio_secondary_mean_2020, "median = ", ratio_secondary_median_2020)
    #print("tot_hh_2020 = ",dis_tot_hh_2020 ,"primary_hh_2020 = ",dis_tot_primary_2020, "ratio = ", dis_tot_primary_2020/dis_tot_hh_2020 )
    #print("tot_hh_2023 = ",dis_tot_hh_2023 ,"primary_hh_2023 = ",dis_tot_primary_2023, "ratio = ", dis_tot_primary_2023/dis_tot_hh_2023)

    return TI_hh_2020
    

In [21]:
# RESIDENTIAL ELECTRICITY DEMAND PER DISTRICT - Getting the number of households according to ERA (EWID) - WORKS!
## TEST combine with heating


#TO-DO:
# 1. DONE - combine with htg calc
# 2. DONE - to_csv
# 3. add a little bit of "complexity" - see if possible to seperate MFH and SFH and still get the right TI total res elect
# 4. secondary homes - for further accuracy, not random sample ? "Elles ont d’ailleurs un potentiel important, car les appartements en immeuble représentent plus de 75% du parc de résidence secondaire en Suisse https://makeheatsimple.ch/fr/2021/08/le-chauffage-des-residences-secondaires-a-portee-de-clic-meme-en-ppe/

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

sup_file = DATA_DIRECTORY / "support_data" 
# reading consumption data for SH and DHW per type of building and construction time
data_sh = pd.read_csv(sup_file/"demand_SH.csv" , header=0, index_col=0)
data_dhw = pd.read_csv(sup_file/"demand_DHW.csv", header=0, index_col=0)
fehh = pd.read_csv(sup_file/"fehh.csv", header=0, index_col=0)

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

# Define constants
EPSG_CODE = "EPSG:2056"
# BUFFER_DISTANCE = 5
GKLAS = [1110,1121,1122,1130, 1211, 1212, 1220, 1230, 1251, 1261, 1262, 1263, 1264, 1265, 1272, 1241, 1242, 1271, 1274, 1275, 1276, 1277, 1278, 1231, 1252, 1273]
GKLAS_HTG = [1110, 1121, 1122, 1130, 1211, 1212, 1220, 1230, 1251, 1261, 1262, 1263, 1264, 1265, 1272, 1231]
GKLAS_NOHTG = [1241, 1242, 1252, 1271, 1273, 1274, 1275, 1276, 1277, 1278]
GBAUP = [8011,8012,8013,8014,8015,8016,8017,8018,8019,8020,8021,8022,8023]
GSTAT = 1004 # existing

NET_ERA_SHARE = 0.9 # SIA 416 - la superficie netta di un locale o un gruppo di locali corrisponde ca. al 90% della superficie del piano [SIA 380-4 (2006)]

# SFH_ERA_FACTOR = 1.15 # ODIS
# DFH_ERA_FACTOR = 1.2 # ODIS
# MFH_ERA_FACTOR = 1.2 # ODIS

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_res = pd.DataFrame()
district_res = pd.DataFrame()
ewid_count = pd.DataFrame()

AVG_HH_ELEC_TI = 3499 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

avg_hh_MFH = get_hh_MFH()
hh_stats = get_hh_stats() 

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]    
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    num = round(avg_hh_MFH.at["MFH_avg_households",DISTRICT])

    MFH_hh_list = generate_number_list(num)
    MFH_avg = str(MFH_hh_list)

#    print(DISTRICT, MFH_avg)

    for com_num in communes:
        fileloc =  CANTON_CSV_DIRECTORY
        filename_gdf = "raw-gdf-"+str(com_num)+".csv"
        filename_data = "DATA_"+str(com_num)+".csv"

        if pathlib.Path(fileloc/filename_gdf).exists():
            data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        elif pathlib.Path(fileloc/filename_data).exists():
            data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")

        # Keep only existing residential buildings
        data_res = data_res.loc[data_res["gstat"] == 1004]
        data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
        data_res = data_res[REA_CODES_DESIRED_LC]
        
        # Setting up the base year to 2020, so drop buildings that where buit after 2020 
        data_res = data_res.drop(data_res[(data_res["gbauj"]==2021)|(data_res["gbauj"]==2022)|(data_res["gbauj"]==2022)].index)

        data_res["garea_fill"] = data_res["garea"]
        data_res["gbaup_fill"] = data_res["gbaup"]
        data_res["gastw_fill"] = data_res["gastw"]
        data_res["gklas_fill"] =  data_res["gklas"]
        
        data_res = data_res.reset_index()

        age_group = data_res[["gkat", "gklas", "gbaup"]]
        age_pivot = pd.pivot_table(age_group, values='gbaup', index=['gkat'], columns=['gklas'], aggfunc=np.median)
        age_pivot = age_pivot.round(0)
        age_col = list(age_pivot.columns)
        for code in GKLAS:
            if code in age_col:
                pass
            else:                
                age_pivot[code] = 8016.0

        size_group = data_res[["gkat", "gklas", "garea"]]
        size_pivot = pd.pivot_table(size_group, values='garea', index=['gkat'], columns=['gklas'], aggfunc=np.median)
        size_pivot = size_pivot.round(0)
        size_col = list(size_pivot.columns)
        
        for g in range(len(data_res)):
            if (data_res.at[g,"gkat"] == 1020):
                data_res.loc[g,"gklas_fill"] = 1110
            elif (data_res.at[g,"gkat"] == 1030):
                data_res.loc[g,"gklas_fill"] = 1122 # 63% of known 1040

            data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")

                    
            if (data_res.at[g,"ewid"]=="['no']"):
                if (data_res.at[g,"gklas_fill"] == 1110):
                    data_res.at[g,"ewid"] = "['1']"
                elif (data_res.at[g,"gklas_fill"] == 1121):
                    data_res.at[g,"ewid"] = "['1', '2']"
                elif (data_res.at[g,"gklas_fill"] == 1122):
                    data_res.at[g,"ewid"] = MFH_avg # GET real median
            else:
                pass
      
            if np.isnan(data_res.at[g,"gbaup"]):             
                data_res.at[g,"gbaup_fill"] = age_pivot.loc[data_res.at[g,"gkat"],data_res.at[g,"gklas_fill"]]

            if np.isnan(data_res.at[g,"garea"]):
                if data_res.at[g,"gklas_fill"] in GKLAS_NOHTG:
                    data_res.at[g,"garea_fill"] = 0
                else:    
                    if data_res.at[g,"gklas_fill"] in size_col:
                        pass
                    else:   
                        if data_res.at[g,"gklas_fill"] == 1130.0:
                            size_pivot.loc[1040.0,1130.0] = size_pivot.loc[1040.0, 1211.0]
                        else:
                            pass
                    data_res.at[g,"garea_fill"] = size_pivot.loc[(data_res.at[g,"gkat"],data_res.at[g,"gklas_fill"])]


        data_res["garea_fill"] = data_res["garea_fill"].astype(np.int64)
        data_res["gklas_fill"] = data_res["gklas_fill"].astype(np.int64)
        data_res["gbaup_fill"] = data_res["gbaup_fill"].astype(np.int64)
        data_res["gastw_fill"] = data_res["gastw_fill"].fillna(value = 1)
        
        for g in range(len(data_res)):
            ewid_count.at[g,"ewid"] = ast.literal_eval(data_res.at[g,"ewid"])      

        data_res["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))
        
        # sum the ewid_len per egid
        grouped = data_res.groupby('egid')["ewid_len"].sum().reset_index()
        grouped = pd.DataFrame(grouped)
        
        #drop duplicate egids
        data_res = data_res.drop_duplicates(subset=['egid'])
        
        # Merge dataframes based on column 'egid'
        data_res = data_res.merge(grouped, on='egid', how='left', suffixes=('_original', '_replacement'))

        # Update 'ewid' values based on the replacement values
        data_res['ewid_len'] = data_res['ewid_len_replacement'].fillna(data_res['ewid_len_original'])

        # Drop the original and replacement columns
        data_res = data_res.drop(columns=['ewid_len_original', 'ewid_len_replacement'])
                
        for g in range(len(data_res)):
            data_res.at[g,'k_SH'] = data_sh.loc[(data_res.at[g,"gbaup_fill"],str(data_res.at[g,"gklas_fill"]))]
            data_res.at[g,'k_DHW'] = data_dhw.loc[(data_res.at[g,"gbaup_fill"],str(data_res.at[g,"gklas_fill"]))]
            data_res.at[g,'k_fehh'] = fehh.loc[(data_res.at[g,"gbaup_fill"],str(data_res.at[g,"gklas_fill"]))]

        district_res = pd.concat([district_res,data_res],ignore_index=True)
    
    sample_size = int(len(district_res)*(1-UNOCC))

    # Take a random sample of 90% of the rows
    district_res_occ = district_res.sample(n=sample_size, random_state=1) #random state can be changed, otherwise always same sample
    district_res_occ.reset_index(drop=True, inplace = True)
    
    d_hh = (district_res_occ["ewid_len"].sum())
    
    #d_hh = d_hh*hh_stats.at["hh2023_diff",DISTRICT] #"hh2023_diff" = ratio of residences in 2020/2023
    d_hh_p = d_hh*(1-hh_stats.at["hh2020_p_ratio",DISTRICT]) # (1-"hh2020_p_ratio"= primary residences 
    d_hh_s = d_hh*(hh_stats.at["hh2020_p_ratio",DISTRICT])  # "hh2020_p_ratio" = ratio of secondary residences 
    
    num_rows = len(district_res_occ)
    num_ones = round(len(district_res_occ)*(1-hh_stats.at["hh2020_p_ratio",DISTRICT]))

    rand_values = np.array([1] * num_ones + [1/4] * (num_rows - num_ones))
    np.random.shuffle(rand_values)

    # Assign the generated random values to the "primary" column
    district_res_occ['primary'] = rand_values

    # calculating building attributes and demand
    district_res_occ['ERA'] = district_res_occ['garea_fill']*district_res_occ['gastw_fill']*NET_ERA_SHARE
    
    for g in range(len(district_res_occ)):
        if district_res_occ.at[g,'primary'] == 1:
            district_res_occ.at[g,'SH'] = district_res_occ.at[g,'k_SH']*district_res_occ.at[g,'ERA']*district_res_occ.at[g,'primary']
        else:
            district_res_occ.at[g,'SH'] = district_res_occ.at[g,'k_SH']*district_res_occ.at[g,'ERA']*(district_res_occ.at[g,'primary']/3)
            
    district_res_occ['DHW'] = district_res_occ['k_DHW']*district_res_occ['ERA']*district_res_occ['primary']
    district_res_occ['ELEC'] = district_res_occ['ewid_len']*AVG_HH_ELEC_TI*district_res_occ['primary']

    
    d_elec = district_res_occ["ELEC"].sum() 
    d_sh = district_res_occ["SH"].sum() 
    d_dhw = district_res_occ["DHW"].sum() 
    
    d = pd.DataFrame([d_hh, d_hh_p, d_hh_s, d_elec, d_sh, d_dhw], index=["hh","hh_p","hh_s","res_elec","res_sh","res_dhw"], columns=[DISTRICT])

    TI_res = pd.concat([TI_res,d], axis=1)
    district_res = district_res[0:0]

TI_res["TI"] = TI_res.sum(axis=1)

path = str(DATA_DIRECTORY)+ "/results/residential_demand.csv"
filepath = pathlib.Path(path)  
filepath.parent.mkdir(parents=True, exist_ok=True)  
TI_res.to_csv(filepath, sep=";", encoding='utf-8-sig', index_label='index')
TI_res

Unnamed: 0,Bellinzona,Blenio,Leventina,Locarno,Lugano,Mendrisio,Riviera,Vallemaggia,TI
hh,32299.0,6918.0,9334.0,56698.0,94766.0,29828.0,6156.0,6630.0,242629.0
hh_p,27131.16,2573.496,4433.65,32487.95,76949.99,24608.1,5029.452,2638.74,175852.5
hh_s,5167.84,4344.504,4900.35,24210.05,17816.01,5219.9,1126.548,3991.26,66776.46
res_elec,99483570.0,12816840.0,19806090.0,134994900.0,286270700.0,90270700.0,18747640.0,12850950.0,675241400.0
res_sh,335284200.0,45953420.0,80680910.0,450824200.0,1106362000.0,405798800.0,69990130.0,44201080.0,2539095000.0
res_dhw,56998470.0,6446376.0,11109450.0,75533270.0,183120200.0,60291340.0,10922390.0,6454955.0,410876500.0


# TESTS

In [7]:
# ELECTRICITY DEMAND PER DISTRICT - Getting the number of households according to ERA (EWID) - WORKS
# NOTE: there are egid duplicates but each have different ewids, for the purpose of elec keep all, but need to write code that combines the ewids for each egid. For heating work with EGID since the garea is duplicated

#TO-DO:
# 1. combine with htg calc
# 2. to_csv
# 3. add a little bit of "complexity" - see if possible to seperate MFH and SFH and still get the right TI total res elect
# 4. secondary homes - reduce consumption

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_elec = pd.DataFrame()
district_res_elec = pd.DataFrame()
ewid_count = pd.DataFrame()

AVG_HH_ELEC_TI = 3664 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

avg_hh_MFH = get_hh_MFH()
hh_stats = get_hh_stats() 

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]    
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    num = round(avg_hh_MFH.at["MFH_avg_households",DISTRICT])

    MFH_hh_list = generate_number_list(num)
    MFH_avg = str(MFH_hh_list)

#    print(DISTRICT, MFH_avg)

    for com_num in communes:
        fileloc =  CANTON_CSV_DIRECTORY
        filename_gdf = "raw-gdf-"+str(com_num)+".csv"
        filename_data = "DATA_"+str(com_num)+".csv"

        if pathlib.Path(fileloc/filename_gdf).exists():
            data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        elif pathlib.Path(fileloc/filename_data).exists():
            data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")

        data_res = data_res.loc[data_res["gstat"] == 1004]
        data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
        data_res = data_res[REA_CODES_DESIRED_LC]


        data_res["gklas_fill"] =  data_res["gklas"]
        data_res = data_res.reset_index()

        for g in range(len(data_res)):
            if (data_res.at[g,"gkat"] == 1020):
                data_res.loc[g,"gklas_fill"] = 1110
            elif (data_res.at[g,"gkat"] == 1030):
                data_res.loc[g,"gklas_fill"] = 1122 # 63% of known 1040

        data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")

        for g in range(len(data_res)):
            if (data_res.at[g,"ewid"]=="['no']"):
                if (data_res.at[g,"gklas_fill"] == 1110):
                    data_res.at[g,"ewid"] = "['1']"
                elif (data_res.at[g,"gklas_fill"] == 1121):
                    data_res.at[g,"ewid"] = "['1', '2']"
                elif (data_res.at[g,"gklas_fill"] == 1122):
                    data_res.at[g,"ewid"] = MFH_avg # GET real median
            else:
                pass

        for g in range(len(data_res)):
            ewid_count.at[g,"ewid"] = ast.literal_eval(data_res.at[g,"ewid"])      

        data_res["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

        district_res_elec = pd.concat([district_res_elec,data_res])

    d_hh = (1-UNOCC)*(district_res_elec["ewid_len"].sum()) #
    d_hh = d_hh*hh_stats.at["hh2023_diff",DISTRICT]
    d_hh_p = d_hh*(1-hh_stats.at["hh2020_p_ratio",DISTRICT])
    d_hh_s = d_hh*(hh_stats.at["hh2020_p_ratio",DISTRICT])
    d_elec = d_hh_p*AVG_HH_ELEC_TI + d_hh_s*(AVG_HH_ELEC_TI/4)
    d = pd.DataFrame([d_hh, d_hh_p, d_hh_s, d_elec], index=["hh","hh_p","hh_s","res_elec"], columns=[DISTRICT])

    TI_elec = pd.concat([TI_elec,d], axis=1)
    district_res_elec = district_res_elec[0:0]

TI_elec["TI"] = TI_elec.sum(axis=1)
TI_elec

Unnamed: 0,Bellinzona,Blenio,Leventina,Locarno,Lugano,Mendrisio,Riviera,Vallemaggia,TI
hh,31649.41,6739.898,9178.087,54139.7,88197.8,29558.04,5984.407,6408.237,231855.6
hh_p,26585.5,2507.242,4359.592,31022.05,71616.62,24385.39,4889.26,2550.478,167916.1
hh_s,5063.905,4232.656,4818.496,23117.65,16581.19,5172.658,1095.146,3857.759,63939.46
res_elec,102047800.0,13063650.0,20387290.0,134840500.0,277591600.0,94086210.0,18917400.0,12878660.0,673813200.0


In [None]:
sample_size = int(len(data_res) * 0.9)

# Take a random sample of 90% of the rows
sample_df = data_res.sample(n=sample_size, random_state=42)
sample_df

In [None]:
# Get number of households per district using https://data.geo.admin.ch/browser/index.html#/collections/ch.are.wohnungsinventar-zweitwohnungsanteil?.language=en

# 1. GET Secondary households per commune
# 2. GET difference in number of households 2020 and 2023

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'

#SELECT CANTON
#CANTON = "Genève"
CANTON = "Ticino"

CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON


UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

In [None]:
TI_hh_2020 = pd.DataFrame()
district_hh_2020 = pd.DataFrame()
district_hh_2023 = pd.DataFrame()

dis_num = LUGANO_UST
communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Nome del Comune"]

for com_name in communes:
    #print("comune: ", com_num)
    hh_2020 = data_households_2020.loc[data_households_2020["Name"] == com_name]#
    hh_2020 = hh_2020.reset_index()
    
    hh_2023 = data_households_2023.loc[data_households_2023["Name"] == com_name]#
    hh_2023 = hh_2023.reset_index()

    district_hh_2020 = pd.concat([district_hh_2020, hh_2020])
    district_hh_2023 = pd.concat([district_hh_2023, hh_2023])

dis_tot_hh_2020 = district_hh_2020["ZWG_3150"].sum()
dis_tot_primary_2020 = district_hh_2020["ZWG_3010"].sum() + district_hh_2020["ZWG_3100"].sum()
calc_dist_secondary_2020 = round(((dis_tot_hh_2020 - dis_tot_primary_2020)/dis_tot_hh_2020),3)
ratio_secondary_mean_2020 = district_hh_2020["ZWG_3120"].mean()
ratio_secondary_median_2020 = district_hh_2020["ZWG_3120"].median()

dis_tot_hh_2023 = district_hh_2023["ZWG_3150"].sum()
dis_tot_primary_2023 = district_hh_2023["ZWG_3010"].sum() + district_hh_2023["ZWG_3100"].sum()

hh_diff = round((dis_tot_hh_2023 - dis_tot_hh_2020),3)

d_hh = pd.DataFrame([calc_dist_secondary_2020, hh_diff], index=["hh2020_p_ratio", "hh2023_diff"], columns=[DISTRICT])

TI_hh_2020 = pd.concat([TI_hh_2020,d_hh], axis=1)
district_hh = district_hh[0:0]

#print("calc = ", calc_dist_secondary_2020, "mean = ", ratio_secondary_mean_2020, "median = ", ratio_secondary_median_2020)
#print("tot_hh_2020 = ",dis_tot_hh_2020 ,"primary_hh_2020 = ",dis_tot_primary_2020, "ratio = ", dis_tot_primary_2020/dis_tot_hh_2020 )
#print("tot_hh_2023 = ",dis_tot_hh_2023 ,"primary_hh_2023 = ",dis_tot_primary_2023, "ratio = ", dis_tot_primary_2023/dis_tot_hh_2023)

TI_hh_2020

In [18]:
## GET ratios for primary households and difference between 2023 and 2020 - WORKS!
sup_file = DATA_DIRECTORY / "support_data" 

data_households_2020 = pd.read_excel(sup_file/"household_inventory_2020-10_2056.xlsx", sheet_name="ZWG_2020_Q3" , header=0, index_col=0)
data_households_2023 = pd.read_excel(sup_file/"household_inventory_2023-03_2056.xlsx", sheet_name="ZWG_2023" , header=0, index_col=0)
data_households_2023.head()

TI_hh_2020 = pd.DataFrame()
district_hh_2020 = pd.DataFrame()
district_hh_2023 = pd.DataFrame()

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Nome del Comune"]    
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])

    for com_name in communes:
        hh_2020 = data_households_2020.loc[data_households_2020["Name"] == com_name]#
        hh_2020 = hh_2020.reset_index()

        hh_2023 = data_households_2023.loc[data_households_2023["Name"] == com_name]#
        hh_2023 = hh_2023.reset_index()

        district_hh_2020 = pd.concat([district_hh_2020, hh_2020])
        district_hh_2023 = pd.concat([district_hh_2023, hh_2023])

    dis_tot_hh_2020 = district_hh_2020["ZWG_3150"].sum()
    dis_tot_primary_2020 = district_hh_2020["ZWG_3010"].sum() + district_hh_2020["ZWG_3100"].sum()
    
    calc_dist_secondary_2020 = round(((dis_tot_hh_2020 - dis_tot_primary_2020)/dis_tot_hh_2020),3)
    
    ratio_secondary_mean_2020 = district_hh_2020["ZWG_3120"].mean()
    ratio_secondary_median_2020 = district_hh_2020["ZWG_3120"].median()

    dis_tot_hh_2023 = district_hh_2023["ZWG_3150"].sum()
    dis_tot_primary_2023 = district_hh_2023["ZWG_3010"].sum() + district_hh_2023["ZWG_3100"].sum()

    hh_diff = round((dis_tot_hh_2020/dis_tot_hh_2023),3)

    d_hh = pd.DataFrame([dis_tot_hh_2020, dis_tot_primary_2020, dis_tot_hh_2023, calc_dist_secondary_2020, hh_diff], index=["hh2020_tot", "hh2020_p","hh2023_tot","hh2020_p_ratio", "hh2023_diff"], columns=[DISTRICT])

    TI_hh_2020 = pd.concat([TI_hh_2020,d_hh], axis=1)
    district_hh_2020 = district_hh_2020[0:0]
    district_hh_2023 = district_hh_2023[0:0]

path = str(DATA_DIRECTORY)+ "/support_data/"+ "housing_inventory_stats.csv"
filepath = pathlib.Path(path)  
filepath.parent.mkdir(parents=True, exist_ok=True)  
TI_hh_2020.to_csv(filepath, sep=";", encoding='utf-8-sig', index_label='index')
TI_hh_2020

Unnamed: 0,Bellinzona,Blenio,Leventina,Locarno,Lugano,Mendrisio,Riviera,Vallemaggia
hh2020_tot,31672.0,6981.0,9186.0,55812.0,89054.0,29599.0,5779.0,6659.0
hh2020_p,26617.0,2595.0,4363.0,31979.0,72323.0,24425.0,4720.0,2650.0
hh2023_tot,32968.0,7180.0,9385.0,59265.0,97105.0,30435.0,6027.0,6950.0
hh2020_p_ratio,0.16,0.628,0.525,0.427,0.188,0.175,0.183,0.602
hh2023_diff,0.961,0.972,0.979,0.942,0.917,0.973,0.959,0.958


In [None]:
# Get number of households per district - WORKS
import ast

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_hh = pd.DataFrame()
district_hh = pd.DataFrame()
ewid_count = pd.DataFrame()


AVG_HH_ELEC_TI = 2741 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

#dis_num = BELLINZONA_UST
#com_num = 5001

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]

    for com_num in communes:
        #print("comune: ", com_num)

        fileloc =  CANTON_CSV_DIRECTORY

        filename_gdf = "raw-gdf-"+str(com_num)+".csv"
        filename_data = "DATA_"+str(com_num)+".csv"

        if pathlib.Path(fileloc/filename_gdf).exists():
            data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        elif pathlib.Path(fileloc/filename_data).exists():
            data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")

        data_res = data_res.loc[data_res["gstat"] == 1004]
        data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
        data_res = data_res[REA_CODES_DESIRED_LC]

        data_res["gklas_fill"] =  data_res["gklas"]
        data_res = data_res.reset_index(drop=True)

        for g in range(len(data_res)):
             if (np.isnan(data_res.at[g,"gklas"])):
                if (data_res.at[g,"gkat"] == 1020):
                    data_res.loc[g,"gklas_fill"] = 1110
                elif (data_res.at[g,"gkat"] == 1030):
                    data_res.loc[g,"gklas_fill"] = 1122

        data_res["egid_duplicate"] = data_res.duplicated(subset = ["egid"], keep = False)

        no_nan = data_res.dropna(subset = ["ewid"])
        no_nan = no_nan.reset_index(drop=True) #,, inplace=True

        for g in range(len(no_nan)):
            ewid_count.at[g,"ewid"] = ast.literal_eval(no_nan.at[g,"ewid"]) 

        no_nan["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

        duplicates = no_nan.loc[no_nan["egid_duplicate"] == True]
        group = duplicates.groupby("egid")["ewid_len"].sum()
        group = pd.DataFrame(group)

        # Replace ewid_len values in DataFrame A based on DataFrame B
        no_nan['ewid_len'] = no_nan.apply(lambda row: group.loc[row['egid'], 'ewid_len'] if row['egid'] in group.index else row['ewid_len'], axis=1)
        no_nan.drop_duplicates(subset =["egid"], keep="first", inplace = True)

        district_hh = pd.concat([district_hh,no_nan])
    
    abi_num = district_hh [["egid","gklas_fill","ewid_len"]]
    abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
    abi_num = abi_num.rename(columns={"egid":"count"})
    abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
    med = pd.DataFrame(abi_num).reset_index(level="number of households")
    med["TOT"] = med["number of households"]*med["count"]
    MFH = med.loc[1122.0]
    MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()

    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    hh = pd.DataFrame([MFH_avghh], index=["MFH_avg_households"], columns=[DISTRICT])

    TI_hh = pd.concat([TI_hh,hh], axis=1)
    district_hh = district_hh[0:0]

TI_hh["TI"] = TI_hh.mean(axis=1)
TI_hh

The municipalities have to show the total number of apartments and first apartments in the apartment inventory, however, there is no obligation to declare first apartments, equivalent apartments or second apartments as such in the RBD. Therefore, the data from the apartment inventory regarding second apartments cannot be compared with data from other municipalities.

In [None]:
# ELECTRICITY DEMAND PER DISTRICT - Getting the number of households according to ERA (EWID) - WORKS
# 1. there are egid duplicates but each have different ewids, for the purpose of elec keep all, but need to write code that combines the ewids for each egid. For heating work with EGID since the garea is duplicated
# 2. combine with htg calc
# 3. to_csv
# 4. add a little bit of "complexity" - see if possible to seperate MFH and SFH and still get the right TI total res elect

import ast

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_elec = pd.DataFrame()
district_res_elec = pd.DataFrame()
ewid_count = pd.DataFrame()
#dis_num = 2101

AVG_HH_ELEC_TI = 2741 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]

    for com_num in communes:
        #print("comune: ", com_num)
        fileloc =  CANTON_CSV_DIRECTORY

        filename_gdf = "raw-gdf-"+str(com_num)+".csv"
        filename_data = "DATA_"+str(com_num)+".csv"
        
        if pathlib.Path(fileloc/filename_gdf).exists():
            data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        elif pathlib.Path(fileloc/filename_data).exists():
            data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        
        data_res = data_res.loc[data_res["gstat"] == 1004]
        data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
        data_res = data_res[REA_CODES_DESIRED_LC]


        data_res["gklas_fill"] =  data_res["gklas"]
        data_res = data_res.reset_index()

        for g in range(len(data_res)):
            if (data_res.at[g,"gkat"] == 1020):
                data_res.loc[g,"gklas_fill"] = 1110
            elif (data_res.at[g,"gkat"] == 1030):
                data_res.loc[g,"gklas_fill"] = 1122 # 63% of known 1040

        data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")

        for g in range(len(data_res)):
            if (data_res.at[g,"ewid"]=="['no']"):
                if (data_res.at[g,"gklas_fill"] == 1110):
                    data_res.at[g,"ewid"] = "['1']"
                elif (data_res.at[g,"gklas_fill"] == 1121):
                    data_res.at[g,"ewid"] = "['1', '2']"
                elif (data_res.at[g,"gklas_fill"] == 1122):
                    data_res.at[g,"ewid"] = "['1', '2', '3', '4', '5', '6', '7']" # GET real median
            else:
                pass

        for g in range(len(data_res)):
            ewid_count.at[g,"ewid"] = ast.literal_eval(data_res.at[g,"ewid"])      

        data_res["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

        district_res_elec = pd.concat([district_res_elec,data_res])
        
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    d_hh = (1-UNOCC)*(district_res_elec["ewid_len"].sum()) #
    d_elec = d_hh*AVG_HH_ELEC_TI
    d = pd.DataFrame([d_hh, d_elec], index=["hh","res_elec"], columns=[DISTRICT])

    TI_elec = pd.concat([TI_elec,d], axis=1)
    district_res_elec = district_res_elec[0:0]

TI_elec["TI"] = TI_elec.sum(axis=1)
TI_elec

In [None]:
TI_hh = pd.DataFrame()
district_hh = pd.DataFrame()

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Nome del Comune"]

    for com_name in communes:
        #print("comune: ", com_num)
        hh = data_households.loc[data_households["Name"] == com_name]#
        hh = hh.reset_index(drop=True)
        
        district_hh = pd.concat([district_hh, hh])
        
    DISTRICT_NAME = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])

    TI_hh = pd.concat([TI_hh,hh], axis=1)
    district_hh = district_hh[0:0]

TI_hh["TI"] = TI_hh.mean(axis=1)
TI_hh

In [None]:
# Get number of households per district using rea data
import ast

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_hh = pd.DataFrame()
district_hh = pd.DataFrame()
ewid_count = pd.DataFrame()


AVG_HH_ELEC_TI = 2741 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

#dis_num = BELLINZONA_UST
#com_num = 5001

for dis_num in UST_district:
    communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]

    for com_num in communes:
        #print("comune: ", com_num)

        fileloc =  CANTON_CSV_DIRECTORY

        filename_gdf = "raw-gdf-"+str(com_num)+".csv"
        filename_data = "DATA_"+str(com_num)+".csv"

        if pathlib.Path(fileloc/filename_gdf).exists():
            data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")
        elif pathlib.Path(fileloc/filename_data).exists():
            data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
            #data_res = data_res_dirty.drop_duplicates(keep="first")

        data_res = data_res.loc[data_res["gstat"] == 1004]
        data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
        data_res = data_res[REA_CODES_DESIRED_LC]

        data_res["gklas_fill"] =  data_res["gklas"]
        data_res = data_res.reset_index(drop=True)

        for g in range(len(data_res)):
             if (np.isnan(data_res.at[g,"gklas"])):
                if (data_res.at[g,"gkat"] == 1020):
                    data_res.loc[g,"gklas_fill"] = 1110
                elif (data_res.at[g,"gkat"] == 1030):
                    data_res.loc[g,"gklas_fill"] = 1122

        data_res["egid_duplicate"] = data_res.duplicated(subset = ["egid"], keep = False)

        no_nan = data_res.dropna(subset = ["ewid"])
        no_nan = no_nan.reset_index(drop=True) #,, inplace=True

        for g in range(len(no_nan)):
            ewid_count.at[g,"ewid"] = ast.literal_eval(no_nan.at[g,"ewid"]) 

        no_nan["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

        duplicates = no_nan.loc[no_nan["egid_duplicate"] == True]
        group = duplicates.groupby("egid")["ewid_len"].sum()
        group = pd.DataFrame(group)

        # Replace ewid_len values in DataFrame A based on DataFrame B
        no_nan['ewid_len'] = no_nan.apply(lambda row: group.loc[row['egid'], 'ewid_len'] if row['egid'] in group.index else row['ewid_len'], axis=1)
        no_nan.drop_duplicates(subset =["egid"], keep="first", inplace = True)
        
        abi_num = no_nan[["egid","gklas_fill","ewid_len"]]
        abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
        abi_num = abi_num.rename(columns={"egid":"count"})
        abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
        med = pd.DataFrame(abi_num).reset_index(level="number of households")
        med["TOT"] = med["number of households"]*med["count"]
        MFH = med.loc[1122.0]
        MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()

        data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")
        
        for g in range(len(data_res)):
            if (data_res.at[g,"ewid"]=="['no']"):
                if (data_res.at[g,"gklas_fill"] == 1110):
                    data_res.at[g,"ewid_len"] = 1
                elif (data_res.at[g,"gklas_fill"] == 1121):
                    data_res.at[g,"ewid_len"] = 2
                elif (data_res.at[g,"gklas_fill"] == 1122):
                    data_res.at[g,"ewid_len"] = MFH_avghh # GET real median
            else:
                pass
            
        district_hh = pd.concat([district_hh,data_res])
    
    abi_num = district_hh[["egid","gklas_fill","ewid_len"]]
    abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
    abi_num = abi_num.rename(columns={"egid":"count"})
    abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
    med = pd.DataFrame(abi_num).reset_index(level="number of households")
    med["TOT"] = med["number of households"]*med["count"]
    print(med)
    MFH = med.loc[1122]
    MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()

    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    hh = pd.DataFrame([MFH_avghh], index=["MFH_avg_households"], columns=[DISTRICT])

    TI_hh = pd.concat([TI_hh,hh], axis=1)
    district_hh = district_hh[0:0]

TI_hh["TI"] = TI_hh.mean(axis=1)
TI_hh

In [None]:
# Get number of households per egid - per comune - WORKS
import ast

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]

UST_file = "UST_comune.csv"
canton ="TI"
UST_comune = pd.read_csv(MAP_DIRECTORY/UST_file)
UST_comune = UST_comune[UST_comune['Cantone'] == canton].reset_index()
UST_district = UST_comune['Numero del Distretto'].drop_duplicates().reset_index(drop=True)

TI_elec = pd.DataFrame()
district_res_elec = pd.DataFrame()
ewid_count = pd.DataFrame()


AVG_HH_ELEC_TI = 2741 # kWh/y USTAT 2020 = tot apparecchi/tot abitazioni ewid 
UNOCC = 0.0283 # should be around 0.0283 for TI

dis_num = BELLINZONA_UST
com_num = 5001
#for dis_num in UST_district:
# communes = UST_comune.loc[UST_comune["Numero del Distretto"] == dis_num, "Numero UST del Comune"]

# for com_num in communes:
    #print("comune: ", com_num)
    
fileloc =  CANTON_CSV_DIRECTORY

filename_gdf = "raw-gdf-"+str(com_num)+".csv"
filename_data = "DATA_"+str(com_num)+".csv"

if pathlib.Path(fileloc/filename_gdf).exists():
    data_res = pd.read_csv(fileloc/filename_gdf , header=0, index_col=0)
    #data_res = data_res_dirty.drop_duplicates(keep="first")
elif pathlib.Path(fileloc/filename_data).exists():
    data_res = pd.read_csv(fileloc/filename_data , header=0, index_col=0)
    #data_res = data_res_dirty.drop_duplicates(keep="first")

data_res = data_res.loc[data_res["gstat"] == 1004]
data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
data_res = data_res[REA_CODES_DESIRED_LC]

data_res["gklas_fill"] =  data_res["gklas"]
data_res = data_res.reset_index(drop=True)

for g in range(len(data_res)):
     if (np.isnan(data_res.at[g,"gklas"])):
        if (data_res.at[g,"gkat"] == 1020):
            data_res.loc[g,"gklas_fill"] = 1110
        elif (data_res.at[g,"gkat"] == 1030):
            data_res.loc[g,"gklas_fill"] = 1122

data_res["egid_duplicate"] = data_res.duplicated(subset = ["egid"], keep = False)

no_nan = data_res.dropna(subset = ["ewid"])
no_nan = no_nan.reset_index(drop=True) #,, inplace=True

for g in range(len(no_nan)):
    ewid_count.at[g,"ewid"] = ast.literal_eval(no_nan.at[g,"ewid"]) 

no_nan["ewid_len"] = ewid_count.ewid.apply(lambda x: len(x))

duplicates = no_nan.loc[no_nan["egid_duplicate"] == True]
group = duplicates.groupby("egid")["ewid_len"].sum()
group = pd.DataFrame(group)

# Replace ewid_len values in DataFrame A based on DataFrame B
no_nan['ewid_len'] = no_nan.apply(lambda row: group.loc[row['egid'], 'ewid_len'] if row['egid'] in group.index else row['ewid_len'], axis=1)
no_nan.drop_duplicates(subset =["egid"], keep="first", inplace = True)

abi_num = no_nan[["egid","gklas_fill","ewid_len"]]
abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
abi_num = abi_num.rename(columns={"egid":"count"})
abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
med = pd.DataFrame(abi_num).reset_index(level="number of households")
med["TOT"] = med["number of households"]*med["count"]
MFH = med.loc[1122.0]
#MFH
MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()
#MFH_avghh
data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")
        
for g in range(len(data_res)):
    if (data_res.at[g,"ewid"]=="['no']"):
        if (data_res.at[g,"gklas_fill"] == 1110):
            data_res.at[g,"ewid_len"] = 1
        elif (data_res.at[g,"gklas_fill"] == 1121):
            data_res.at[g,"ewid_len"] = 2
        elif (data_res.at[g,"gklas_fill"] == 1122):
            data_res.at[g,"ewid_len"] = MFH_avghh # GET real median
    else:
        pass

hh_num = data_res[["egid","gklas_fill","ewid_len"]]
hh_num = hh_num.groupby(["gklas_fill","ewid_len"]).count()
hh_num = hh_num.rename(columns={"egid":"count"})
hh_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
hh_med = pd.DataFrame(hh_num).reset_index(level="number of households")
hh_med["TOT"] = hh_med["number of households"]*med["count"]
hh_MFH = hh_med.loc[1122.0]
#MFH
hh_MFH_avghh = hh_MFH["TOT"].sum()/MFH["count"].sum()
#district_hh = pd.concat([district_hh,data_res])

In [None]:
no_nan

In [None]:
abi_num = no_nan[["egid","gklas_fill","ewid_len"]]
abi_num = abi_num.groupby(["gklas_fill","ewid_len"]).count()
abi_num = abi_num.rename(columns={"egid":"count"})
abi_num.rename_axis(index={"gklas_fill":"gklas","ewid_len":"number of households"}, inplace = True)
abi_num

In [None]:
med = pd.DataFrame(abi_num).reset_index(level="number of households")
med["TOT"] = med["number of households"]*med["count"]
MFH = med.loc[1122.0]
MFH
MFH_avghh = MFH["TOT"].sum()/MFH["count"].sum()
MFH_avghh

In [None]:
fileDir = DATA_DIRECTORY / "support_data" 
data_s123 = pd.read_csv(fileDir/"S1_S2_S3.csv" , header=0, index_col=0)
data_s123

### S1 -AGRICULTURE, S2 -INDUSTRY, S3 -SERVICES

In [None]:
# LOOP THROUGH ALL DISTRICT
#for dis_num in UST_district:

from operator import itemgetter

DICT = {"SFH":1110.0, "DFH":1121.0, "MFH":1122.0, "HABITAT_COMMUNAUTAIRE":1130.0, "HOTEL":1211.0, "HEBERGEMENT":1212.0, "OFFICE":1220.0, "COMMERCIAL":1230.0, "RESTO_BAR":1231.0, "TRANSP_STATIONS":1241.0, "GARAGE": 1242.0, 
        "INDUSTRIAL":1251.0, "RESERVOIRS":1252.0, "CULTURAL":1261.0, "MUSEUM_LIBRARY":1262.0, "ACADEMIC":1263.0, "HOSPITAL":1264.0, "SPORTS":1265.0, "AGRICULTURE":1271.0, "RELIGIOUS":1272.0, "ANCIENT":1273.0, "OTHER_PUBLIC":1274.0, "OUTSIDE": 1275.0,
        "ANIMALS":1276.0, "GREENHOUSE":1277.0, "AGRI_STORAGE":1278.0}

RES_code = itemgetter("SFH", "DFH", "MFH")(DICT)
AGR_code = itemgetter("AGRICULTURE")(DICT) #, "ANIMALS", "GREENHOUSE", "AGRI_STORAGE"
IND_code = itemgetter("INDUSTRIAL")(DICT)
SER_code = itemgetter("HABITAT_COMMUNAUTAIRE","HOTEL", "HEBERGEMENT", "OFFICE", "COMMERCIAL", "RESTO_BAR", "TRANSP_STATIONS","CULTURAL", "MUSEUM_LIBRARY", "ACADEMIC", "HOSPITAL", "SPORTS")(DICT)
OTHER_code = itemgetter("GARAGE","RESERVOIRS", "ANCIENT", "OUTSIDE", "RELIGIOUS","OTHER_PUBLIC")(DICT)

# SINGLE DISTRICT
dis_num = LUGANO_UST
DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
dis_filename = DISTRICT + ".csv"
dis_csv = CANTON_CSV_DIRECTORY / dis_filename
dis_rea = pd.read_csv(dis_csv, sep= ";", header=0, index_col=0).set_index("EGID")
dis_rea = dis_rea.drop(["index.1"], axis=1)

dr = dis_rea.drop(["GABBJ","GSTAT", "GBAUJ", "GBAUP","GWAERZW1","GENW1","GWAERDATW1", "GWAERDATH1", "GENH1", "GWAERZH1","GWAERZW2","GENW2","GWAERDATW2", "GWAERDATH2", "GENH2", "GWAERZH2"], axis=1) #
dr = dr.reset_index()
#dr.dtypes

RES = dr.loc[dr["GKLAS_fill"].isin(RES_code)]
AGR = dr.loc[dr["GKLAS_fill"] == (AGR_code)]
IND = dr.loc[dr["GKLAS_fill"] == (IND_code)]
SER = dr.loc[dr["GKLAS_fill"].isin(SER_code)]
OTHER = dr.loc[dr["GKLAS_fill"].isin(OTHER_code)]

print(DISTRICT, "RES:", RES["EGID"].count() , "AGR:", AGR["EGID"].count(), "IND:", IND["EGID"].count() , "SER:", SER["EGID"].count(), "OTHER:", OTHER["EGID"].count())

### RESIDENTIAL

In [None]:
# PER COMUNE -  Getting the number of households according to ERA (EWID) -> matches pretty well with this layer: https://map.geo.admin.ch/?layers=ch.are.wohnungsinventar-zweitwohnungsanteil
import ast

pathlib.Path()
NOTEBOOK_PATH = pathlib.Path().resolve()
p = NOTEBOOK_PATH.parent
DATA_DIRECTORY = p / "data"
MAP_DIRECTORY = DATA_DIRECTORY /'maps'
BLDG_DIRECTORY = DATA_DIRECTORY /'Buildings_rea'
CANTON = "Ticino"
CANTON_CSV_DIRECTORY = BLDG_DIRECTORY / CANTON

REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]


#for com_num in communes:
com_num = 5269    
print("comune: ", com_num)
fileloc =  CANTON_CSV_DIRECTORY
# reading comune raw gdf file
filename = "raw-gdf-"+str(com_num)+".csv"
data_res = pd.read_csv(fileloc/filename , header=0, index_col=0)
data_res = data_res.loc[data_res["gstat"] == 1004]
data_res = data_res.loc[(data_res["gkat"] == 1020) | (data_res["gkat"] == 1030)]
data_res = data_res[REA_CODES_DESIRED_LC]


data_res["gklas_fill"] =  data_res["gklas"]
data_res = data_res.reset_index()

for g in range(len(data_res)):
    if (data_res.at[g,"gkat"] == 1020):
        data_res.loc[g,"gklas_fill"] = 1110
    elif (data_res.at[g,"gkat"] == 1030):
        data_res.loc[g,"gklas_fill"] = 1122 # 63% of known 1040
        
data_res["ewid"] = data_res["ewid"].fillna(value = "['no']")

for g in range(len(data_res)):
    if (data_res.at[g,"ewid"]=="['no']"):
        if (data_res.at[g,"gklas_fill"] == 1110):
            data_res.at[g,"ewid"] = "['1']"
        elif (data_res.at[g,"gklas_fill"] == 1121):
            data_res.at[g,"ewid"] = "['1', '2']"
        elif (data_res.at[g,"gklas_fill"] == 1122):
            data_res.at[g,"ewid"] = "['1', '2', '3', '4', '5', '6', '7']" # GET real median
    else:
        pass

for g in range(len(data_res)):
    data_res.at[g,"ewid"] = ast.literal_eval(data_res.at[g,"ewid"])      

data_res["ewid_len"] = data_res.ewid.apply(lambda x: len(x))

test = data_res[["gklas_fill","ewid","ewid_len"]]

print(test["ewid_len"].sum())


In [None]:
## PER COMUNE
REA_CODES_DESIRED_LC = ['egid', 'strname_deinr', 'ggdename', 'ggdenr',
       'gexpdat', 'gdekt', 'egrid','gebnr', 'gkode', 'gkodn', 'gksce', 'gstat', 'gkat', 'gklas',
       'gbauj', 'gbaup', 'gabbj', 'garea', 'gastw', 'gazzi', 'gebf', 'gwaerzh1',
       'genh1', 'gwaersceh1', 'gwaerdath1', 'gwaerzh2', 'genh2', 'gwaersceh2',
       'gwaerdath2', 'gwaerzw1', 'genw1', 'gwaerscew1', 'gwaerdatw1',
       'gwaerzw2', 'genw2', 'gwaerscew2', 'gwaerdatw2',"ewid"]
b = district_res_elec.loc[district_res_elec["ggdenr"] == 5009]# GGDENR Numéro OFS de la commune
b = b[REA_CODES_DESIRED_LC]
b = b.reset_index()
b.isna().sum()

In [None]:
# AGE GROUPBY - COUNT
b = district_res_elec.loc[district_res_elec["ggdenr"] == 5009]# GGDENR Numéro OFS de la commune

GKAT_group = b[["gkat", "gklas", "ewid"]]
GKAT_group = GKAT_group.groupby(["gkat", "gklas"]).count()
GKAT_group

In [None]:
# (1/3) Calculating number of households and ppl, and elec consumption - DOESN'tGIVE REASONABLE VALUES
for dis_num in UST_district:
    DISTRICT = str(UST_comune.loc[(UST_comune["Numero del Distretto"] == dis_num), "Nome del Distretto"].iloc[0])
    dis_filename = DISTRICT + ".csv"
    dis_csv = CANTON_CSV_DIRECTORY / dis_filename
    dis_rea = pd.read_csv(dis_csv, sep= ";", header=0, index_col=0).set_index("EGID")
    dis_rea = dis_rea.drop(["index.1"], axis=1)
    dr = dis_rea.drop(["GABBJ","GSTAT", "GBAUJ", "GBAUP","GWAERZW1","GENW1","GWAERDATW1", "GWAERDATH1", "GENH1", "GWAERZH1","GWAERZW2","GENW2","GWAERDATW2", "GWAERDATH2", "GENH2", "GWAERZH2"], axis=1) #
    dr = dr.reset_index()
    
    dr["ppl"] = 2.4
    dr["elec"] = SFH_4pp_elec
    dr["num_hh"] = 1

for g in range(len(dr)):
    
    ### RESIDENTIAL
    if dr.at[g,"GKLAS_fill"] == 1110:
        dr.at[g,"ppl"] = min(0.93*SFH_PP,(dr.at[g,"GAREA_fill"]*dr.at[g,"GASTW_fill"]/AVG_M2_PP))
        dr.at[g,"elec"] = SFH_4pp_elec + (dr.at[g,"ppl"]-4)*SFH_1pp_elec_diff
        
    elif dr.at[g,"GKLAS_fill"] == 1121:
        dr.at[g,"num_hh"] = 2
        dr.at[g,"ppl"] = dr.at[g,"num_hh"]*(min(1.1*MFH_PP,(dr.at[g,"GAREA_fill"]*dr.at[g,"GASTW_fill"]/dr.at[g,"num_hh"]/AVG_M2_PP)))
        dr.at[g,"elec"] = SFH_4pp_elec + (dr.at[g,"ppl"]-4)*SFH_1pp_elec_diff
    
    elif dr.at[g,"GKLAS_fill"] == 1122:
#        if (dr.at[g,"GAREA_fill"]*dr.at[g,"GASTW_fill"]) <SUPERFICIE_MEDIA:
#            dr.at[g,"num_hh"] = dr.at[g,"GASTW_fill"]
#        else:
        dr.at[g,"num_hh"] = 0.6*(dr.at[g,"GAREA_fill"]*dr.at[g,"GASTW_fill"])/(1.5*SUPERFICIE_MEDIA)
        dr.at[g,"ppl"] = dr.at[g,"num_hh"]*min(1.1*MFH_PP,(dr.at[g,"GAREA_fill"]*dr.at[g,"GASTW_fill"]/dr.at[g,"num_hh"]/AVG_M2_PP))
        dr.at[g,"elec"] = MFH_2pp_elec + (dr.at[g,"ppl"]-2)*MFH_1pp_elec_diff    
    
    ### SERVICES
    else:
        dr.at[g,"ppl"] = 0
        dr.at[g,"elec"] = 0

In [None]:
# (2/3) Calculating number of households and ppl, and elec consumption - DOESN'tGIVE REASONABLE VALUES
dr_SFH = dr.loc[dr["GKLAS_fill"] == 1110]
print("SFH_median: ",round(dr_SFH["ppl"].median(),1), "SFH_mean: ",round(dr_SFH["ppl"].mean(),1))
        
dr_DFH = dr.loc[dr["GKLAS_fill"] == 1121]
print("DFH_median: ", round(dr_DFH["ppl"].median()/dr_DFH["num_hh"].median(),1), "DFH_mean: ",round(dr_DFH["ppl"].mean()/dr_DFH["num_hh"].median(),1))

dr_MFH = dr.loc[dr["GKLAS_fill"] == 1122]
print("MFH_median: ",round(dr_MFH["ppl"].median()/dr_MFH["num_hh"].median(),1), "MFH_mean: ",round(dr_MFH["ppl"].mean()/dr_MFH["num_hh"].median(),1))

In [None]:
# (3/3) Calculating number of households and ppl, and elec consumption - DOESN'tGIVE REASONABLE VALUES
dr_hh = dr["num_hh"].sum()
dr_ppl = dr["ppl"].sum()
dr_elec = dr["elec"].sum()
print( DISTRICT," hh:",dr_hh, " ppl:",dr_ppl, " elec kWh:", dr_elec)