In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [None]:
from FoliumPlot import folium_plot, folium_layout
from Functions import *

## ST GRID TERMINAL

### Import Data

In [3]:
# path and import
data_st_grid_ter_path = 'data/st/03a_data_st_ter_grid.csv'
data_st_grid_ter_raw = pd.read_csv(data_st_grid_ter_path, na_values=[0])

data_st_grid_ter_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53097 entries, 0 to 53096
Data columns (total 39 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date_year              53097 non-null  int64  
 1   date_month             53097 non-null  int64  
 2   client_region_name     53097 non-null  object 
 3   client_city_name       53097 non-null  object 
 4   Longitude              53097 non-null  float64
 5   Latitude               53097 non-null  float64
 6   operator_name          53097 non-null  object 
 7   sample_origin_4g_ios   22344 non-null  float64
 8   sample_count_4g_ios    22344 non-null  float64
 9   device_count_4g_ios    22344 non-null  float64
 10  avg_dl_4g_ios          22331 non-null  float64
 11  sample_origin_4g_ad    29706 non-null  float64
 12  sample_count_4g_ad     29706 non-null  float64
 13  device_count_4g_ad     29706 non-null  float64
 14  avg_dl_4g_ad           29679 non-null  float64
 15  sa

### Aggregate Data

In [4]:
# initial parameters
filter_st_grid_ter_year = 2020
filter_st_grid_ter_month = 1
filter_st_grid_ter_operators = ["DNA", "Elisa", "Telia"]
col_st_grid_ter_year = "date_year"
col_st_grid_ter_month = "date_month"
col_st_grid_ter_operator = "operator_name"
col_st_grid_ter_group = ["Longitude", "Latitude"]
col_st_grid_ter_sum = [
    "sample_origin_4g_ios",
    "sample_count_4g_ios",
    "device_count_4g_ios",
    "sample_origin_4g_ad",
    "sample_count_4g_ad",
    "device_count_4g_ad",
    "sample_origin_ip12_5g",
    "sample_count_ip12_5g",
    "device_count_ip12_5g",
    "sample_origin_ip12_fb",
    "sample_count_ip12_fb",
    "device_count_ip12_fb",
    "sample_origin_ip12_lk",
    "sample_count_ip12_lk",
    "device_count_ip12_lk",
    "sample_origin_ad_5g",
    "sample_count_ad_5g",
    "device_count_ad_5g",
    "sample_origin_ad_fb",
    "sample_count_ad_fb",
    "device_count_ad_fb",
    "sample_origin_ad_lk",
    "sample_count_ad_lk",
    "device_count_ad_lk",
]
col_st_grid_ter_mean = [
    "avg_dl_4g_ios",
    "avg_dl_4g_ad",
    "avg_dl_ip12_5g",
    "avg_dl_ip12_fb",
    "avg_dl_ip12_lk",
    "avg_dl_ad_5g",
    "avg_dl_ad_fb",
    "avg_dl_ad_lk",
]

In [5]:
# group calculation
data_st_grid_ter_group = (
    data_st_grid_ter_raw.query(
        "{} * 12 + {} >= @filter_st_grid_ter_year * 12 + @filter_st_grid_ter_month".format(
            col_st_grid_ter_year, col_st_grid_ter_month
        )
    )
    .query("{} in @filter_st_grid_ter_operators".format(col_st_grid_ter_operator))
    .groupby(col_st_grid_ter_group + [col_st_grid_ter_operator])
    .agg(
        {
            **{kpi: np.sum for kpi in col_st_grid_ter_sum},
            **{kpi: np.mean for kpi in col_st_grid_ter_mean},
        }
    )
    .reset_index()
    .replace(0, np.nan)
)

data_st_grid_ter_group.info()

### Convert to gdf

In [7]:
# convert to gdf with utm coordinates
col_st_grid_ter_coords_wgs = ["Longitude", "Latitude"]
col_st_grid_ter_coords_utm = ["Longitude_utm", "Latitude_utm"]

data_st_grid_ter_geo = gpd.GeoDataFrame(
    data_st_grid_ter_group,
    geometry=gpd.points_from_xy(
        data_st_grid_ter_group[col_st_grid_ter_coords_wgs[0]],
        data_st_grid_ter_group[col_st_grid_ter_coords_wgs[1]],
    ),
    crs=4326,
)

In [8]:
# convert back to wgs coordinates
data_st_grid_ter_utm = data_st_grid_ter_geo.to_crs(
    data_st_grid_ter_geo.estimate_utm_crs().to_epsg()
)
data_st_grid_ter_utm[col_st_grid_ter_coords_utm] = np.c_[
    data_st_grid_ter_utm.geometry.x, data_st_grid_ter_utm.geometry.y
]

data_st_grid_ter_utm.info()

### Interpolation Data

In [11]:
# path and import
data_st_grid_ter_interpolate_path = 'data/st/03b_data_st_ter_grid_pred.csv'
data_st_grid_ter_interpolate_raw = pd.read_csv(data_st_grid_ter_interpolate_path, na_values=[0])

In [12]:
# convert to gdf
data_st_grid_ter_interpolate_geo = gpd.GeoDataFrame(data_st_grid_ter_interpolate_raw,
                                                    geometry=gpd.points_from_xy(
                                                        data_st_grid_ter_interpolate_raw.Longitude,
                                                        data_st_grid_ter_interpolate_raw.Latitude),
                                                    crs=4326
                                                    )

data_st_grid_ter_interpolate_geo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 22221 entries, 0 to 22220
Data columns (total 40 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   Longitude              22221 non-null  float64 
 1   Latitude               22221 non-null  float64 
 2   operator_name          22221 non-null  object  
 3   sample_origin_4g_ios   10815 non-null  float64 
 4   sample_count_4g_ios    10815 non-null  float64 
 5   device_count_4g_ios    10815 non-null  float64 
 6   sample_origin_4g_ad    12676 non-null  float64 
 7   sample_count_4g_ad     12676 non-null  float64 
 8   device_count_4g_ad     12676 non-null  float64 
 9   sample_origin_ip12_5g  3667 non-null   float64 
 10  sample_count_ip12_5g   3667 non-null   float64 
 11  device_count_ip12_5g   3667 non-null   float64 
 12  sample_origin_ip12_fb  1636 non-null   float64 
 13  sample_count_ip12_fb   1636 non-null   float64 
 14  device_count_ip12_fb   1636 no

### Visualize geo data

In [14]:
# initial parameter
vis_output_file = r"output\ST_Grid_Terminal_"
vis_dataset = data_st_grid_ter_interpolate_geo
vis_n_col = np.r_[: vis_dataset.shape[1]]
vis_col_group = ["Longitude", "Latitude"]
vis_col_operator = "operator_name"
vis_filter_operator = ["DNA", "Elisa", "Telia"]
vis_col_list = ["avg_dl_4g_ios_pred", "avg_dl_ip12_5g_pred"]
vis_color_dict = {"DNA": "deeppink", "Elisa": "darkblue", "Telia": "blueviolet"}
vis_layout = (2, 2)

In [15]:
# use for loop to create multiple layers
for vis_col in vis_col_list:
    vis_palette_n = palette_n_dict(vis_dataset, vis_col, 5, 0, "RdYlGn")
    layer_list = []
    
    # kpi layer
    for vis_operator in vis_filter_operator:
        vis_gdf = vis_dataset[(vis_dataset[vis_col_operator] == vis_operator)]
        layer_list.append(dict(gdf=vis_gdf, col=vis_col, group_name=vis_operator))
    
    # competition layer
    vis_gdf = vis_dataset.loc[
        vis_dataset.groupby(vis_col_group)[vis_col].idxmax().dropna()
    ]
    
    layer_list.append(
        dict(
            gdf=vis_gdf,
            col=vis_col_operator,
            group_name=vis_col,
            color_dict=vis_color_dict,
        )
    )
    
    # folium layout
    m = folium_layout(
        layer_list=layer_list,
        layout=vis_layout,
        palette_n=vis_palette_n,
        n_col=vis_n_col,
        groups=True,
        legend=True,
    )
    # output
    output_file = vis_output_file + vis_col + ".html"
    m.save(output_file)