In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os

In [2]:
from FoliumPlot import folium_plot, folium_layout
from Functions import *

## ST CITY PERFORMANCE

### administrative map

In [3]:
# map path and layer path
map_adm_path = "map/gadm36_FIN_shp.zip"
map_adm_city_layer = "gadm36_FIN_3.shp"

In [4]:
# import and filter
filter_col_map_adm = ['NAME_1', 'NAME_2', 'NAME_3', 'NAME_4', 'geometry']
map_adm_city = gpd.read_file("zip://./" + map_adm_path + '!/' + map_adm_city_layer).filter(filter_col_map_adm)

### Import Data

In [7]:
# path and import
data_st_city_per_path = 'data/st/01_data_st_per_city.csv'
data_st_city_per_raw = pd.read_csv(data_st_city_per_path, na_values=[0])

data_st_city_per_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3373 entries, 0 to 3372
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date_year           3373 non-null   int64  
 1   date_month          3373 non-null   int64  
 2   country_iso         3373 non-null   object 
 3   client_region_name  3373 non-null   object 
 4   client_city_name    3373 non-null   object 
 5   operator_name       3373 non-null   object 
 6   device_count_all    3373 non-null   int64  
 7   sample_origin_all   3373 non-null   int64  
 8   sample_count_all    3373 non-null   int64  
 9   device_count_4g     3369 non-null   float64
 10  sample_origin_4g    3369 non-null   float64
 11  sample_count_4g     3369 non-null   float64
 12  device_count_5g     1712 non-null   float64
 13  sample_origin_5g    1712 non-null   float64
 14  sample_count_5g     1712 non-null   float64
 15  mid_dl_all          3373 non-null   float64
 16  mid_ul

### Aggregate Data

In [8]:
# initial parameters
filter_st_city_per_year = 2020
filter_st_city_per_month = 1
filter_st_city_per_operators = ["DNA", "Elisa", "Telia"]
col_st_city_per_year = "date_year"
col_st_city_per_month = "date_month"
col_st_city_per_operator = "operator_name"
col_st_city_per_group = ["client_region_name", "client_city_name"]
col_st_city_per_sum = [
    "device_count_4g",
    "sample_origin_4g",
    "sample_count_4g",
    "device_count_5g",
    "sample_origin_5g",
    "sample_count_5g",
]
col_st_city_per_mean = [
    "mid_dl_4g",
    "mid_ul_4g",
    "latency_4g",
    "avg_rsrp_rscp_4g",
    "avg_snr_4g",
    "mid_dl_5g",
    "mid_ul_5g",
    "latency_5g",
    "avg_rsrp_rscp_5g",
    "avg_snr_5g",
]
col_st_city_per_share = [_ + "_pct" for _ in col_st_city_per_sum + col_st_city_per_mean]

In [9]:
# group calculation
data_st_city_per_group = (
    data_st_city_per_raw.query(
        "{} * 12 + {} >= @filter_st_city_per_year * 12 + @filter_st_city_per_month".format(
            col_st_city_per_year, col_st_city_per_month
        )
    )
    .query("{} in @filter_st_city_per_operators".format(col_st_city_per_operator))
    .groupby(col_st_city_per_group + [col_st_city_per_operator])
    .agg(
        {
            **{kpi: np.sum for kpi in col_st_city_per_sum},
            **{kpi: np.mean for kpi in col_st_city_per_mean},
        }
    )
    .reset_index()
    .replace(0, np.nan)
)

data_st_city_per_group.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   client_region_name  237 non-null    object 
 1   client_city_name    237 non-null    object 
 2   operator_name       237 non-null    object 
 3   device_count_4g     237 non-null    float64
 4   sample_origin_4g    237 non-null    float64
 5   sample_count_4g     237 non-null    float64
 6   device_count_5g     222 non-null    float64
 7   sample_origin_5g    222 non-null    float64
 8   sample_count_5g     222 non-null    float64
 9   mid_dl_4g           237 non-null    float64
 10  mid_ul_4g           237 non-null    float64
 11  latency_4g          237 non-null    float64
 12  avg_rsrp_rscp_4g    237 non-null    float64
 13  avg_snr_4g          237 non-null    float64
 14  mid_dl_5g           222 non-null    float64
 15  mid_ul_5g           222 non-null    float64
 16  latency_

### Calculate KPI

In [10]:
# calculate market share kpi
data_st_city_per_group[
    [_ + "_pct" for _ in col_st_city_per_sum]
] = data_st_city_per_group.groupby(col_st_city_per_group)[
    col_st_city_per_sum
].transform(
    lambda x: round(x / np.sum(x), 2)
)

In [11]:
# normalize performance kpi
data_st_city_per_group[
    [_ + "_pct" for _ in col_st_city_per_mean]
] = data_st_city_per_group.groupby(col_st_city_per_group)[
    col_st_city_per_mean
].transform(
    lambda x: np.round((x - x.min()) / (x.max() - x.min()), 2)
)

In [12]:
data_st_city_per_group.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 35 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   client_region_name    237 non-null    object 
 1   client_city_name      237 non-null    object 
 2   operator_name         237 non-null    object 
 3   device_count_4g       237 non-null    float64
 4   sample_origin_4g      237 non-null    float64
 5   sample_count_4g       237 non-null    float64
 6   device_count_5g       222 non-null    float64
 7   sample_origin_5g      222 non-null    float64
 8   sample_count_5g       222 non-null    float64
 9   mid_dl_4g             237 non-null    float64
 10  mid_ul_4g             237 non-null    float64
 11  latency_4g            237 non-null    float64
 12  avg_rsrp_rscp_4g      237 non-null    float64
 13  avg_snr_4g            237 non-null    float64
 14  mid_dl_5g             222 non-null    float64
 15  mid_ul_5g             2

### Convert to gdf

In [13]:
# convert to geo data by joining adm map
col_st_city_per_merge_left = ["NAME_2", "NAME_3"]
col_st_city_per_merge_right = ["client_region_name", "client_city_name"]

data_st_city_per_geo = map_adm_city.merge(
    data_st_city_per_group,
    left_on=col_st_city_per_merge_left,
    right_on=col_st_city_per_merge_right,
)
data_st_city_per_geo.info()

In [14]:
# save gdf data
output_gdf_path = "data/st/data_st_city_per_geo"
data_st_city_per_geo.to_file(output_gdf_path)

  data_st_city_per_geo.to_file(output_gdf_path)


In [15]:
# save gdf column name
output_col_name_path = "data/st/data_st_city_per_geo/col_name.csv"
pd.DataFrame(data_st_city_per_geo.drop("geometry", axis=1).columns).to_csv(
    output_col_name_path, index=False
)

### Visualize geo data

In [17]:
# initial parameter
vis_output_path = r"output\ST_City_Performance_"
vis_dataset = data_st_city_per_geo
vis_n_col = np.r_[: vis_dataset.shape[1]]
vis_col_group = ["client_region_name", "client_city_name"]
vis_col_operator = "operator_name"
vis_filter_operator = ["DNA", "Elisa", "Telia"]
vis_col_list = [
    "mid_dl_4g",
    "mid_ul_4g",
    "avg_rsrp_rscp_4g",
    "mid_dl_5g",
    "mid_ul_5g",
    "avg_rsrp_rscp_5g",
    "device_count_4g_pct",
    "device_count_5g_pct",
]
vis_color_dict = {"DNA": "deeppink", "Elisa": "darkblue", "Telia": "blueviolet"}
vis_layout = (1, len(vis_filter_operator) + 1)

In [19]:
# use for loop to create multiple layers
for vis_col in vis_col_list:
    vis_palette_n = palette_n_dict(vis_dataset, vis_col, 5, 2, "RdYlGn")
    layer_list = []
    # kpi layer
    for vis_operator in vis_filter_operator:
        vis_gdf = vis_dataset[(vis_dataset[vis_col_operator] == vis_operator)]
        layer_list.append(dict(gdf=vis_gdf, col=vis_col, group_name=vis_operator))
    # competition layer
    if "_pct" in vis_col:
        vis_gdf = vis_dataset.loc[
            vis_dataset.groupby(vis_col_group)[vis_col].idxmax().dropna()
        ]
    else:
        vis_gdf = vis_dataset.query("{} == 1".format(vis_col + "_pct"))
    layer_list.append(
        dict(gdf=vis_gdf, col=vis_col_operator, color_dict=vis_color_dict)
    )
    # folium layout
    m = folium_layout(
        layer_list=layer_list,
        layout=vis_layout,
        palette_n=vis_palette_n,
        n_col=vis_n_col,
        groups=True,
        legend=True,
        weight=1,
        opacity=0.9,
        fill_opacity=1.0,
        color="lightgrey",
    )
    # output
    output_file = vis_output_path + vis_col + ".html"
    m.save(output_file)

## ST CITY TERMINAL

### Import Data

In [21]:
# path and import
data_st_city_ter_path = 'data/st/02_data_st_ter_city_3m.csv'
data_st_city_ter_raw = pd.read_csv(data_st_city_ter_path, na_values=[0])

data_st_city_ter_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 282 entries, 0 to 281
Data columns (total 35 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   area_level1_name       282 non-null    object 
 1   area_level2_name       282 non-null    object 
 2   operator_name          282 non-null    object 
 3   sample_origin_4g_ios   249 non-null    float64
 4   sample_count_4g_ios    249 non-null    float64
 5   device_count_4g_ios    249 non-null    float64
 6   avg_dl_4g_ios          249 non-null    float64
 7   sample_origin_4g_ad    253 non-null    float64
 8   sample_count_4g_ad     253 non-null    float64
 9   device_count_4g_ad     253 non-null    float64
 10  avg_dl_4g_ad           252 non-null    float64
 11  sample_origin_ip12_5g  101 non-null    float64
 12  sample_count_ip12_5g   101 non-null    float64
 13  device_count_ip12_5g   101 non-null    float64
 14  avg_dl_ip12_5g         101 non-null    float64
 15  sample

### Aggregate Data

In [22]:
# initial parameters
filter_st_city_ter_reg = "device_count"
filter_st_city_ter_operators = ["DNA", "Elisa", "Telia"]
filter_st_city_ter_regex = "device_count"
col_st_city_ter_operator = "operator_name"
col_st_city_ter_group = ["area_level1_name", "area_level2_name"]
col_st_city_ter_kpi = [
    "LTE_Average_Speed",
    "NR_Samples",
    "NR_Average_Speed",
    "NR_Speed_Gain",
    "NR_Terminal_Rate",
    "NR_Register_Rate",
    "NR_Function_Rate",
    "NR_Fallback_Rate",
    "I12_Samples",
    "I12_Average_Speed",
    "I12_Speed_Gain",
    "I12_Terminal_Rate",
    "I12_Register_Rate",
    "I12_Function_Rate",
    "I12_Fallback_Rate",
]
col_st_city_ter_function = [
    cal_4g_average_speed,
    cal_nr_total_samples,
    cal_nr_average_speed,
    cal_nr_speed_gain_ratio,
    cal_nr_terminal_ratio,
    cal_nr_register_ratio,
    cal_nr_function_ratio,
    cal_nr_fallback_ratio,
    cal_ip12_total_samples,
    cal_ip12_average_speed,
    cal_ip12_speed_gain_ratio,
    cal_ip12_terminal_ratio,
    cal_ip12_register_ratio,
    cal_ip12_function_ratio,
    cal_ip12_fallback_ratio,
]

In [23]:
# group calculation
data_st_city_ter_group = data_st_city_ter_raw.query(
    "{} in @filter_st_city_ter_operators".format(col_st_city_ter_operator)
)

data_st_city_ter_group.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 243 entries, 0 to 281
Data columns (total 35 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   area_level1_name       243 non-null    object 
 1   area_level2_name       243 non-null    object 
 2   operator_name          243 non-null    object 
 3   sample_origin_4g_ios   239 non-null    float64
 4   sample_count_4g_ios    239 non-null    float64
 5   device_count_4g_ios    239 non-null    float64
 6   avg_dl_4g_ios          239 non-null    float64
 7   sample_origin_4g_ad    237 non-null    float64
 8   sample_count_4g_ad     237 non-null    float64
 9   device_count_4g_ad     237 non-null    float64
 10  avg_dl_4g_ad           237 non-null    float64
 11  sample_origin_ip12_5g  101 non-null    float64
 12  sample_count_ip12_5g   101 non-null    float64
 13  device_count_ip12_5g   101 non-null    float64
 14  avg_dl_ip12_5g         101 non-null    float64
 15  sample

### Calculate KPI

In [24]:
# calculate market share kpi
for kpi, function in zip(col_st_city_ter_kpi, col_st_city_ter_function):
    data_st_city_ter_group = data_st_city_ter_group.assign(
        **{kpi: lambda x: function(x, filter_st_city_ter_regex)}
    )

data_st_city_ter_group.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 243 entries, 0 to 281
Data columns (total 50 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   area_level1_name       243 non-null    object 
 1   area_level2_name       243 non-null    object 
 2   operator_name          243 non-null    object 
 3   sample_origin_4g_ios   239 non-null    float64
 4   sample_count_4g_ios    239 non-null    float64
 5   device_count_4g_ios    239 non-null    float64
 6   avg_dl_4g_ios          239 non-null    float64
 7   sample_origin_4g_ad    237 non-null    float64
 8   sample_count_4g_ad     237 non-null    float64
 9   device_count_4g_ad     237 non-null    float64
 10  avg_dl_4g_ad           237 non-null    float64
 11  sample_origin_ip12_5g  101 non-null    float64
 12  sample_count_ip12_5g   101 non-null    float64
 13  device_count_ip12_5g   101 non-null    float64
 14  avg_dl_ip12_5g         101 non-null    float64
 15  sample

  r = np.divide(ter_5g_total, ter_total)
  r = np.divide(ter_5g_reg, ter_5g_total)
  r = np.divide(ter_5g_fun, ter_5g_total)
  r = np.divide(ter_5g_fb, ter_5g_fun)
  r = np.divide(ter_5g_ip12, ter_5g_total)
  r = np.divide(ter_5g_fun_ip12, ter_5g_ip12)


### Calculate TNS

In [25]:
# TNS kpi dict 1
data_st_city_ter_tns1_dict = {
    "TNS_T": ["NR_Terminal_Rate", "I12_Terminal_Rate"],
    "TNS_N": ["NR_Register_Rate", "NR_Average_Speed"],
    "TNS_S": ["NR_Function_Rate"],
}

In [26]:
# TNS kpi threshold 2
print(
    data_st_city_ter_group[set(sum(data_st_city_ter_tns1_dict.values(), []))].quantile(
        q=[0, 0.25, 0.5, 0.75, 1]
    )
)

# auto configure
data_st_city_ter_tns1_thd = data_st_city_ter_group[
    set(sum(data_st_city_ter_tns1_dict.values(), []))
].quantile(q=[0.5])

# manual configure
# data_st_city_ter_tns1_thd = pd.DataFrame({
#     'NR_Terminal_Rate': [20],
#     'I12_Terminal_Rate': [20],
#     'NR_Register_Rate': [30],
#     'NR_Average_Speed': [200],
#     'NR_Function_Rate': [50],
# })

      NR_Register_Rate  NR_Average_Speed  I12_Terminal_Rate  NR_Function_Rate  NR_Terminal_Rate
0.00               0.0               0.0               0.00              0.00               0.0
0.25               9.0             163.0              22.00             40.25              15.0
0.50              17.0             210.0              28.00             54.00              18.0
0.75              30.0             261.5              35.75             65.75              22.0
1.00              56.0             692.0             100.00            100.00             100.0


In [27]:
# TNS kpi cat 1
data_st_city_ter_tns1_thd.index = ['Ready']
data_st_city_ter_tns1_thd_default = 'Not Ready'

In [28]:
# TNS categorization 1
data_st_city_ter_tns = add_kpi_cat(
    kpi_df=data_st_city_ter_group,
    kpi_dict=data_st_city_ter_tns1_dict,
    kpi_thd=data_st_city_ter_tns1_thd,
    cat_default=data_st_city_ter_tns1_thd_default,
)

In [29]:
# TNS kpi dict 2
data_st_city_ter_tns2_dict = {
    "TNS_TNS": ["TNS_T", "TNS_N", "TNS_S"],
}

In [30]:
# TNS kpi threshold 2
data_st_city_ter_tns2_thd = pd.DataFrame({'TNS_T': ['Ready', 'Ready', 'Ready', '*'],
                                          'TNS_N': ['Ready', 'Ready', '*', 'Ready'],
                                          'TNS_S': ['Ready', '*', 'Ready', 'Ready'],
                                          })

In [31]:
# TNS kpi cat 2
data_st_city_ter_tns2_thd.index = ['01 TNS Ready',
                                   '02 TN Ready',
                                   '03 TS Ready',
                                   '04 NS Ready']
data_st_city_ter_tns2_thd_default = '05 Not Ready'

In [32]:
# TNS categorization 2
data_st_city_ter_tns = add_kpi_cat(kpi_df=data_st_city_ter_tns,
                                   kpi_dict=data_st_city_ter_tns2_dict,
                                   kpi_thd=data_st_city_ter_tns2_thd,
                                   cat_default=data_st_city_ter_tns2_thd_default)
print(data_st_city_ter_tns['TNS_TNS'].value_counts())

05 Not Ready    157
01 TNS Ready     44
04 NS Ready      33
03 TS Ready       6
02 TN Ready       3
Name: TNS_TNS, dtype: int64


### Convert to gdf

In [34]:
# convert to geo data by joining adm map
col_st_city_ter_merge_left = ["NAME_2", "NAME_3"]
col_st_city_ter_merge_right = ["area_level1_name", "area_level2_name"]

data_st_city_ter_geo = map_adm_city.merge(
    data_st_city_ter_tns,
    left_on=col_st_city_ter_merge_left,
    right_on=col_st_city_ter_merge_right,
)

data_st_city_ter_geo.info()

### Visualize geo data

#### visualize geo kpi

In [36]:
# initial parameter
vis_output_path = r"output\ST_City_Terminal_"
vis_dataset = data_st_city_ter_geo
vis_n_col = np.r_[: vis_dataset.shape[1]]
vis_col_group = ["area_level1_name", "area_level2_name"]
vis_col_operator = "operator_name"
vis_col_nr_sample = "I12_Samples"
vis_filter_sample = 3
vis_filter_operator = ["DNA", "Elisa", "Telia"]
vis_col_list = [
    "NR_Samples",
    "NR_Average_Speed",
    "NR_Terminal_Rate",
    "NR_Register_Rate",
    "NR_Function_Rate",
    "NR_Fallback_Rate",
    "I12_Samples",
    "I12_Average_Speed",
    "I12_Terminal_Rate",
    "I12_Register_Rate",
    "I12_Function_Rate",
    "I12_Fallback_Rate",
]
vis_color_dict = {"DNA": "deeppink", "Elisa": "darkblue", "Telia": "blueviolet"}
vis_layout = (1, len(vis_filter_operator) + 1)

In [37]:
# use for loop to create multiple layers
for vis_col in vis_col_list:
    # vis_col = 'device_count_4g_pct'
    # vis_palette_n = {'RdYlGn': [50, 100, 200, 500]}
    vis_palette_n = palette_n_dict(
        vis_dataset[vis_dataset[vis_col_nr_sample] > vis_filter_sample],
        vis_col,
        5,
        0,
        "RdYlGn",
    )

    layer_list = []
    # kpi layer
    for vis_operator in vis_filter_operator:
        vis_gdf = vis_dataset[
            (vis_dataset[vis_col_operator] == vis_operator)
            & (vis_dataset[vis_col_nr_sample] > vis_filter_sample)
        ]
        layer_list.append(dict(gdf=vis_gdf, col=vis_col, group_name=vis_operator))

    # competition layer
    vis_gdf = vis_dataset.loc[
        vis_dataset.groupby(vis_col_group)[vis_col].idxmax().dropna()
    ]
    layer_list.append(
        dict(
            gdf=vis_gdf[(vis_gdf[vis_col_nr_sample] > vis_filter_sample)],
            col=vis_col_operator,
            group_name=vis_col,
            color_dict=vis_color_dict,
        )
    )

    # folium layout
    m = folium_layout(
        layer_list=layer_list,
        layout=vis_layout,
        palette_n=vis_palette_n,
        n_col=vis_n_col,
        groups=True,
        legend=True,
        weight=1,
        opacity=0.9,
        fill_opacity=1.0,
        color="lightgrey",
    )
    # output
    output_file = vis_output_path + vis_col + ".html"
    m.save(output_file)

#### visualize TSN

In [39]:
# create layer list
layer_list = []
vis_col = "TNS_TNS"
vis_color_dict = {
    "01 TNS Ready": "Red",
    "02 TN Ready": "Blue",
    "03 TS Ready": "Green",
    "04 NS Ready": "Orange",
    "05 Not Ready": "Grey",
}
for filter_operator in vis_filter_operator:
    vis_gdf = vis_dataset[
        (vis_dataset[vis_col_operator] == filter_operator)
        & (vis_dataset[vis_col_nr_sample] > vis_filter_sample)
    ]
    layer_list.append(dict(gdf=vis_gdf, col=vis_col, group_name=filter_operator))

# folium layout
m = folium_layout(
    layer_list=layer_list,
    layout=(1, len(layer_list)),
    n_col=vis_n_col,
    color_dict=vis_color_dict,
    weight=1,
    opacity=0.8,
    fill_opacity=0.6,
)
# output
output_file = vis_output_path + vis_col + ".html"
m.save(output_file)

## ST CITY VALUE

### Import Data

In [40]:
# path and import
data_value_city_path = 'data/Finland_City_Info.csv'
data_value_city_raw = pd.read_csv(data_value_city_path)

data_value_city_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   NAME_1                        81 non-null     object 
 1   NAME_2                        81 non-null     object 
 2   NAME_3                        81 non-null     object 
 3   Surface_Area                  81 non-null     int64  
 4   Inhabitants                   81 non-null     int64  
 5   Inhabitants_Density           81 non-null     float64
 6   Average_age                   81 non-null     int64  
 7   Average_income                81 non-null     int64  
 8   Accumulated_purchasing_power  81 non-null     int64  
 9   Buildings                     81 non-null     int64  
 10  Building_Density              81 non-null     float64
 11  Inhabitants_per_Building      81 non-null     float64
 12  Enterprise_Density            81 non-null     float64
 13  Enterpr

### Calculate KPI

In [41]:
# kpi dict
data_value_city_dict = {
    "2c_value": ["Inhabitants_Density", "Average_income"],
    "2b_value": ["Enterprise_Density"],
    "2h_value": ["Building_Density", "Average_income"],
}

In [42]:
# kpi threshold
print(
    data_value_city_raw[set(sum(data_value_city_dict.values(), []))].quantile(
        q=[0, 0.25, 0.5, 0.75, 1]
    )
)

# auto configure
data_value_city_thd = data_value_city_raw[
    set(sum(data_value_city_dict.values(), []))
].quantile(q=[0.75, 0.5, 0.25])

# manual configure
# data_value_city_thd = pd.DataFrame({'Inhabitants_Density': [50, 10],
#                                   'Average_income': [25000, 20000],
#                                   'Enterprise_Density': [15, 1],
#                                   'Building_Density': [20, 5],
#                                   })

      Average_income  Enterprise_Density  Building_Density  Inhabitants_Density
0.00         19793.0                 0.2               0.3                  0.0
0.25         22341.0                 1.7               2.8                  6.0
0.50         23310.0                 3.9               5.3                 11.0
0.75         24493.0                 8.3               8.9                 24.0
1.00         31021.0               215.8              59.1                455.0


In [43]:
# kpi cat 
data_value_city_thd.index = ['1 High', '2 Medium High', '3 Medium Low']
data_value_city_thd_default = None

In [44]:
# categorization
data_value_city_kpi = add_kpi_cat(kpi_df=data_value_city_raw,
                                  kpi_dict=data_value_city_dict,
                                  kpi_thd=data_value_city_thd,
                                  cat_default=data_value_city_thd_default)

data_value_city_kpi["2c_value"].value_counts()

### Convert to gdf

In [48]:
# merge data to gdf
col_value_city_merge = ["NAME_1", "NAME_2", "NAME_3"]
data_value_city_geo = map_adm_city.merge(data_value_city_kpi, on=col_value_city_merge)

data_value_city_geo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 80 entries, 0 to 79
Data columns (total 18 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   NAME_1                        80 non-null     object  
 1   NAME_2                        80 non-null     object  
 2   NAME_3                        80 non-null     object  
 3   geometry                      80 non-null     geometry
 4   Surface_Area                  80 non-null     int64   
 5   Inhabitants                   80 non-null     int64   
 6   Inhabitants_Density           80 non-null     float64 
 7   Average_age                   80 non-null     int64   
 8   Average_income                80 non-null     int64   
 9   Accumulated_purchasing_power  80 non-null     int64   
 10  Buildings                     80 non-null     int64   
 11  Building_Density              80 non-null     float64 
 12  Inhabitants_per_Building      80 non-null   

### Visualize geo data

In [None]:
m = folium_plot(
    gdf=data_value_city_geo,
    col="2c_value",
    palette_n="Purples_r",
    fill_opacity=1.0,
    color="lightgrey",
)
m

## CITY ABCDE

### Import Data

In [63]:
# path and import
data_ep_city_geo_path = 'data/ep/data_ep_city_geo'
data_ep_city_geo = gpd.read_file(data_ep_city_geo_path)
data_ep_city_geo.columns = pd.read_csv(data_ep_city_geo_path + '/col_name.csv').iloc[:, 0].to_list() + ['geometry']

data_ep_city_geo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   NAME_1         62 non-null     object  
 1   NAME_2         62 non-null     object  
 2   NAME_3         62 non-null     object  
 3   Total_Sites    62 non-null     int64   
 4   Swapped_Sites  62 non-null     int64   
 5   Swapped_5G     62 non-null     int64   
 6   Site_5G_pct    62 non-null     float64 
 7   geometry       62 non-null     geometry
dtypes: float64(1), geometry(1), int64(3), object(3)
memory usage: 4.0+ KB


vis_output_path

In [69]:
# initial parameters
vis_output_path = r"output\Value_ABCDE_City.html"
vis_col_group = ["NAME_1", "NAME_2", "NAME_3"]
vis_col_operator = "operator_name"
vis_filter_operator = "DNA"
vis_col_nr_sample = "I12_Samples"
vis_filter_nr_sample = 0

In [67]:
# parameter dict
vis_parameter_dict = {
    "A": {
        "group_name": "A - Alignment of TNS",
        "gdf": data_st_city_ter_geo,
        "col": "TNS_TNS",
        "palette_n": "Greens_r",
    },
    "B": {
        "group_name": "B - Building of 5G",
        "gdf": data_ep_city_geo,
        "col": "Site_5G_pct",
        "palette_n": "Oranges",
    },
    "C": {
        "group_name": "C - Competitiveness of Network",
        "gdf": data_st_city_ter_geo,
        "col": "I12_Average_Speed",
        "palette_n": "Reds_r",
    },
    "D": {
        "group_name": "D - Division of Market",
        "gdf": data_value_city_geo,
        "col": "2c_value",
        "palette_n": "Purples_r",
    },
    "E": {
        "group_name": "E - Estimation of Market-Share",
        "gdf": data_st_city_per_geo,
        "col": "device_count_4g_pct",
        "palette_n": "Blues",
    },
}

In [70]:
# folium plot
layer_list = []
for _, val in vis_parameter_dict.items():
    # get parameter value from dict
    group_name = val.get("group_name")
    gdf = val.get("gdf")
    col = val.get("col")
    palette_n = val.get("palette_n")

    # case handler
    if "C" in group_name:
        gdf["rank"] = gdf.groupby(vis_col_group)[col].rank(ascending=False).astype(str)
        col = "rank"
    if ("A" in group_name) or ("C" in group_name):
        gdf = gdf[gdf[vis_col_nr_sample] > vis_filter_nr_sample]
    if ("A" in group_name) or ("C" in group_name) or ("E" in group_name):
        gdf = gdf[(gdf[vis_col_operator] == vis_filter_operator)]
    if "E" in group_name:
        palette_n = {palette_n: [0.10, 0.20, 0.25, 0.33, 0.50, 1.00]}
    if "B" in group_name:
        gdf = gdf[(gdf[col] > 0)]
        palette_n = {palette_n: [0.01, 0.05, 0.10, 0.25, 0.50]}
    # add parameter to layer_list
    layer_list.append(
        dict(
            gdf=gdf,
            col=col,
            group_name=group_name,
            palette_n=palette_n,
        )
    )

# folium layout
m = folium_layout(
    layer_list=layer_list,
    layout=(1, 5),
    weight=1,
    opacity=0.9,
    fill_opacity=1.0,
    color="lightgrey",
    bin_decimal=2,
)

# output
output_file = vis_output_path
m.save(output_file)