In [74]:
import pandas as pd
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW

In [75]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

In [76]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

---

<h3 style="text-align: center;">Предобработка данных</h3>

---

In [77]:
ds = pd.read_csv('../r_1_SPB_clear_w_time_to_m.csv',index_col=0)

In [78]:
ds = ds.drop(['m_lat','m_long','adress','Стиральная машина','district','underground'],axis = 1)

In [79]:
ds['time_to_metro'] = ds['time_to_metro'].apply(lambda x: round(x))

In [80]:
ds_shuffled = ds.sample(len(ds),random_state= 66).reset_index(drop=True)
ds_train = ds_shuffled[:int(len(ds)*0.85)]
ds_test = ds_shuffled[int(len(ds)*0.85):]

In [81]:
Y_train = ds_train['time_to_rent'].values.reshape((-1,1))
lat_tr = ds_train['a_lat'].values
long_tr = ds_train['a_long'].values
cords_tr = list(zip(lat_tr,long_tr))
X_train = ds_train.drop(['price_per_month','time_to_rent','a_lat','a_long'],axis = 1)

In [82]:
Y_test = ds_test['time_to_rent'].values.reshape((-1,1))
lat_ts = ds_test['a_lat'].values
long_ts = ds_test['a_long'].values
cords_ts = list(zip(lat_ts,long_ts))
X_test = ds_test.drop(['price_per_month','time_to_rent','a_lat','a_long'],axis = 1)

In [83]:
bin_cols = list(X_train.nunique()[X_train.nunique() == 2].index)
bin_cols

['Холодильник',
 'Телевизор',
 'Посудомоечная машина',
 'Кондиционер',
 'Интернет',
 'Санузел']

In [84]:
num_cols = list(X_train.drop(bin_cols,axis =1).select_dtypes(include = 'number').columns)
num_cols

['floor',
 'floors_count',
 'total_meters',
 'Площадь кухни',
 'Высота потолков',
 'Год постройки',
 'time_to_metro']

In [85]:
non_cat_cols = num_cols.copy()
non_cat_cols.extend(bin_cols)
cat_cols = list(X_train.drop(non_cat_cols,axis =1).columns)
cat_cols

['Балкон/лоджия', 'Вид из окон', 'Ремонт', 'Тип дома', 'Парковка']

In [86]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat',OneHotEncoder() ,cat_cols),
        ('num', 'passthrough', num_cols),
        ('bin','passthrough',bin_cols)
    ])

In [87]:
X_train_norm = preprocessor.fit_transform(X_train)
X_test_norm = preprocessor.transform(X_test)
X_train_norm.shape,X_test_norm.shape

((499, 34), (89, 34))

In [88]:
col_names = []
for item in preprocessor.get_feature_names_out():
    col_names.append(item.split('__')[1])

In [89]:
X_train = pd.DataFrame(X_train_norm,columns=col_names)
X_test = pd.DataFrame(X_test_norm,columns=col_names)
display(X_train,X_test)

Unnamed: 0,Балкон/лоджия_1 балкон,Балкон/лоджия_1 лоджия,"Балкон/лоджия_1 лоджия, 1 балкон",Балкон/лоджия_2 балкона,Балкон/лоджия_2 лоджии,Балкон/лоджия_нет балкона,Вид из окон_Во двор,Вид из окон_На улицу,Вид из окон_На улицу и двор,Ремонт_Дизайнерский,...,Площадь кухни,Высота потолков,Год постройки,time_to_metro,Холодильник,Телевизор,Посудомоечная машина,Кондиционер,Интернет,Санузел
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,10.0,2.800000,2008.0,11.0,1.0,1.0,0.0,0.0,1.0,1.0
1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,9.0,2.700000,2019.0,62.0,0.0,0.0,0.0,0.0,1.0,1.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,9.0,2.500000,2019.0,8.0,1.0,0.0,1.0,0.0,1.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,15.0,2.700000,2012.0,14.0,1.0,1.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,5.5,2.500000,1969.0,46.0,1.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,6.0,2.500000,1960.0,11.0,1.0,0.0,0.0,0.0,0.0,1.0
495,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,6.0,2.673842,1975.0,16.0,1.0,0.0,0.0,0.0,1.0,1.0
496,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,12.0,2.673842,2008.0,15.0,1.0,1.0,0.0,0.0,1.0,1.0
497,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,17.0,2.800000,2001.0,11.0,1.0,0.0,0.0,1.0,1.0,1.0


Unnamed: 0,Балкон/лоджия_1 балкон,Балкон/лоджия_1 лоджия,"Балкон/лоджия_1 лоджия, 1 балкон",Балкон/лоджия_2 балкона,Балкон/лоджия_2 лоджии,Балкон/лоджия_нет балкона,Вид из окон_Во двор,Вид из окон_На улицу,Вид из окон_На улицу и двор,Ремонт_Дизайнерский,...,Площадь кухни,Высота потолков,Год постройки,time_to_metro,Холодильник,Телевизор,Посудомоечная машина,Кондиционер,Интернет,Санузел
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,10.0,2.500000,2023.0,88.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,10.0,2.600000,2014.0,72.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,10.0,2.500000,1973.0,11.0,1.0,1.0,0.0,1.0,1.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,12.0,2.700000,2006.0,89.0,1.0,1.0,0.0,0.0,1.0,1.0
4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,17.4,2.600000,2023.0,42.0,1.0,1.0,1.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,17.0,2.673842,2014.0,10.0,1.0,1.0,0.0,0.0,1.0,1.0
85,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,13.0,2.700000,2020.0,84.0,1.0,0.0,0.0,0.0,1.0,1.0
86,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,17.0,3.000000,2022.0,42.0,1.0,1.0,0.0,0.0,1.0,1.0
87,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,6.3,2.600000,1979.0,15.0,1.0,1.0,0.0,0.0,0.0,1.0


In [90]:
import random

In [91]:
for col in X_train.columns:
    for i in range(len(X_train)):
        if X_train[col][i] == 0:
            X_train[col][i] = random.uniform(0, 0.0005)

In [92]:
X_train

Unnamed: 0,Балкон/лоджия_1 балкон,Балкон/лоджия_1 лоджия,"Балкон/лоджия_1 лоджия, 1 балкон",Балкон/лоджия_2 балкона,Балкон/лоджия_2 лоджии,Балкон/лоджия_нет балкона,Вид из окон_Во двор,Вид из окон_На улицу,Вид из окон_На улицу и двор,Ремонт_Дизайнерский,...,Площадь кухни,Высота потолков,Год постройки,time_to_metro,Холодильник,Телевизор,Посудомоечная машина,Кондиционер,Интернет,Санузел
0,1.000000,0.000189,0.000203,0.000126,0.000364,0.000385,1.000000,0.000002,0.000048,0.000195,...,10.0,2.800000,2008.0,11.0,1.000000,1.000000,0.000300,0.000355,1.000000,1.0
1,0.000489,1.000000,0.000436,0.000086,0.000355,0.000328,1.000000,0.000022,0.000206,0.000046,...,9.0,2.700000,2019.0,62.0,0.000318,0.000087,0.000106,0.000211,1.000000,1.0
2,1.000000,0.000491,0.000217,0.000043,0.000204,0.000206,0.000351,1.000000,0.000201,0.000112,...,9.0,2.500000,2019.0,8.0,1.000000,0.000108,1.000000,0.000444,1.000000,1.0
3,1.000000,0.000224,0.000282,0.000058,0.000364,0.000446,1.000000,0.000023,0.000358,1.000000,...,15.0,2.700000,2012.0,14.0,1.000000,1.000000,0.000384,0.000404,0.000414,1.0
4,1.000000,0.000492,0.000039,0.000292,0.000030,0.000260,1.000000,0.000005,0.000086,0.000070,...,5.5,2.500000,1969.0,46.0,1.000000,1.000000,0.000408,0.000178,0.000423,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,0.000349,0.000105,0.000056,0.000027,0.000027,1.000000,1.000000,0.000259,0.000220,0.000206,...,6.0,2.500000,1960.0,11.0,1.000000,0.000056,0.000263,0.000221,0.000428,1.0
495,0.000366,0.000307,0.000358,0.000162,0.000469,1.000000,1.000000,0.000403,0.000292,0.000108,...,6.0,2.673842,1975.0,16.0,1.000000,0.000074,0.000135,0.000374,1.000000,1.0
496,0.000289,1.000000,0.000486,0.000010,0.000293,0.000245,1.000000,0.000200,0.000420,0.000350,...,12.0,2.673842,2008.0,15.0,1.000000,1.000000,0.000457,0.000102,1.000000,1.0
497,1.000000,0.000322,0.000464,0.000500,0.000022,0.000114,1.000000,0.000121,0.000144,0.000490,...,17.0,2.800000,2001.0,11.0,1.000000,0.000321,0.000008,1.000000,1.000000,1.0


In [93]:
X_train_ = X_train.values
X_test_ = X_test.values

In [94]:
Y_train.shape

(499, 1)

In [95]:
X_train_.shape

(499, 34)

---

<h3 style="text-align: center;">Строим ГВР</h3>

---

In [96]:
gwr_selector = Sel_BW(cords_tr, Y_train, X_train_, spherical = True,fixed=False)# fixed = false -> bandwidth это число ближайших соседей
gwr_bw = gwr_selector.search()

In [97]:
print('GWR bandwidth =', gwr_bw)

GWR bandwidth = 498.0


In [98]:
gwr_results = GWR(cords_tr, Y_train, X_train_, gwr_bw,name_x = col_names).fit()
gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                 499
Number of covariates:                                                    35

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                           4311.049
Log-likelihood:                                                   -1246.055
AIC:                                                               2562.110
AICc:                                                              2569.876
BIC:                                                               1428.400
R2:                                                                   0.094
Adj. R2:                                                              0.028

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [99]:
import numpy as np

In [33]:
len(np.unique(gwr_results.params[:,2]))

408

In [100]:
local_R2 = gwr_results.localR2

In [101]:
min_r2=min(local_R2)[0]
min_r2

0.14381202119982478

In [102]:
diff = (max(local_R2)[0] - min(local_R2)[0])/4
diff

0.007188174481351013

In [103]:
intervals_for_r2 = [round(min_r2+diff*i,2) for i in range(0,5)]
intervals_for_r2

[0.14, 0.15, 0.16, 0.17, 0.17]

In [104]:
str_intervals = []
for i in range(1,5):
    str_intervals.append(str(intervals_for_r2[i-1]) + ' - ' + str(intervals_for_r2[i]))


In [39]:
color_palette = ['#BEC7DA','#7184AD','#395DA3','#03256C']

это значания параметров для каждого наблюдения

In [105]:
len(gwr_results.params[0])

35

это значения t-статистик для каждого наблюдения

In [106]:
len(gwr_results.tvalues[0])

35

---

<h3 style="text-align: center;">Строим карту</h3>

---

In [107]:
import geopandas as gpd
import osmnx as ox

In [108]:
gdf = ox.geocode_to_gdf('Санкт-Петербург', which_result=1)
gdf = gdf.to_crs('EPSG:4326')

In [109]:
import folium
style = {'fillColor': '#450B5C', 'color': '#450B5C','fillOpacity' : 0.05}
m = folium.Map([gdf.centroid.y, gdf.centroid.x], tiles='cartodbpositron')
folium.GeoJson(gdf,style_function=lambda x:style).add_to(m)
folium.FitBounds([[gdf.bounds.miny[0], gdf.bounds.minx[0]],[gdf.bounds.maxy[0], gdf.bounds.maxx[0]]]).add_to(m)



  m = folium.Map([gdf.centroid.y, gdf.centroid.x], tiles='cartodbpositron')
  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


<folium.map.FitBounds at 0x26bf9292980>

In [110]:
for i in range(len(cords_tr)):
    loc_r2 = local_R2[i][0]
    if loc_r2 <= intervals_for_r2[1]:
        clr = color_palette[0]
    elif loc_r2 <= intervals_for_r2[2]:
        clr = color_palette[1]
    elif loc_r2 <= intervals_for_r2[3]:
        clr = color_palette[2]
    else:
        clr = color_palette[3]
    folium.CircleMarker(
        location=cords_tr[i],
        radius=4,
        color='black',
        fill_color = clr,
        weight=1,
        fill=True,
        fill_opacity=1,
        opacity=1,
    ).add_to(m)

In [111]:
str_intervals

['0.14 - 0.15', '0.15 - 0.16', '0.16 - 0.17', '0.17 - 0.17']

In [112]:
from branca.element import Template, MacroElement

template = """
{% macro html(this, kwargs) %}

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>jQuery UI Draggable - Default functionality</title>
  <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">

  <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
  <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
  
  <script>
  $( function() {
    $( "#maplegend" ).draggable({
                    start: function (event, ui) {
                        $(this).css({
                            right: "auto",
                            top: "auto",
                            bottom: "auto"
                        });
                    }
                });
});

  </script>
</head>
<body>

 
<div id='maplegend' class='maplegend' 
    style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
     border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
     
<div class='legend-title'>Local R^2</div>
<div class='legend-scale'>
  <ul class='legend-labels'>
    <li><span style='background:#BEC7DA;opacity:0.7;'></span>0.14 - 0.15</li>
    <li><span style='background:#7184AD;opacity:0.7;'></span>0.15 - 0.16</li>
    <li><span style='background:#395DA3;opacity:0.7;'></span>0.16 - 0.17</li>
    <li><span style='background:#03256C;opacity:0.7;'></span>0.17 - 0.17</li>

  </ul>
</div>
</div>
 
</body>
</html>

<style type='text/css'>
  .maplegend .legend-title {
    text-align: left;
    margin-bottom: 5px;
    font-weight: bold;
    font-size: 90%;
    }
  .maplegend .legend-scale ul {
    margin: 0;
    margin-bottom: 5px;
    padding: 0;
    float: left;
    list-style: none;
    }
  .maplegend .legend-scale ul li {
    font-size: 80%;
    list-style: none;
    margin-left: 0;
    line-height: 18px;
    margin-bottom: 2px;
    }
  .maplegend ul.legend-labels li span {
    display: block;
    float: left;
    height: 17px;
    width: 17px;
    margin-right: 5px;
    margin-left: 0;
    border: 0.2em solid #0F1C3F;
    border-radius: 50%
    }
  .maplegend .legend-source {
    font-size: 80%;
    color: #777;
    clear: both;
    }
  .maplegend a {
    color: #777;
    }
</style>
{% endmacro %}"""

macro = MacroElement()
macro._template = Template(template)
m.get_root().add_child(macro)

---

<h3 style="text-align: center;">Сохроняем карту</h3>

---

In [91]:
import io
from PIL import Image

img_data = m._to_png(10)
img = Image.open(io.BytesIO(img_data))
img.save('image.png')

---

In [93]:
from selenium import webdriver
import os
import time

In [95]:
delay=5
fn='1.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
m.save(fn)

browser = webdriver.Chrome()
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('map.png')
browser.quit()