In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from geopy.geocoders import Nominatim
! pip install folium
import folium
! pip install pygeocoder
from pygeocoder import Geocoder
! pip install shapely
import urllib.request, json 
from shapely.geometry import Point, Polygon

print('Packages installed')

Packages installed


In [2]:
# Real estate data
sheet_url_RD = 'https://docs.google.com/spreadsheets/d/1CiUj-cWcO1D7KN_qfs6jArjfHf9njpUGlrc3Lk2GxO0/edit#gid=915412023'
csv_export_url_RD = sheet_url_RD.replace('/edit#gid=', '/export?format=csv&gid=')
aruodas_df = pd.read_csv(csv_export_url_RD)
aruodas_df.rename(columns={'Adresas': 'Adress',
                     'Rajonas':'Neighbourhood',
                     'Plotas':'Area',
                     'Kaina':'Price',
                     'Metai': 'Year',
                     'Šildymas': 'Heating',
                     'Kambarių sk': 'Rooms',
                     'Pastato tipas':'Type'}, inplace=True)
#Size of the dataframe
print(aruodas_df.shape)
aruodas_df = aruodas_df.dropna()
print(aruodas_df.shape)

(3684, 11)
(3683, 11)


In [3]:
# DataFrame for housing prices based on current market in aruodas.lt advertisments.
# Data has been cleaned after all websraping.
aruodas_df.head()

Unnamed: 0,name,Adress,Neighbourhood,Rooms,Area,Price,Heating,Type,Year,latitude,longitude
0,Elbingo g.Pilaitė1,Elbingo g.,Pilaitė,1,64.0,87000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026
1,Elbingo g.Pilaitė3,Elbingo g.,Pilaitė,3,55.0,76000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026
2,Elbingo g.Pilaitė2,Elbingo g.,Pilaitė,2,48.0,69000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026
3,Elbingo g.Pilaitė2,Elbingo g.,Pilaitė,2,48.0,86000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026
4,Žaliųjų Ežerų g.Santariškės2,Žaliųjų Ežerų g.,Santariškės,2,51.5,88000,Centrinis,Mūrinis,1982statyba.2017renovacija,54.75269,25.27912


In [4]:
# Companies in Vilnius data
sheet_url_ED = 'https://docs.google.com/spreadsheets/d/1j4hwuWKW3pnJfG9qdLoxSo8QU7V07HACyH11Y3SeXGw/edit#gid=550410933'
csv_export_url_ED = sheet_url_ED.replace('/edit#gid=', '/export?format=csv&gid=')
enterprise_df = pd.read_csv(csv_export_url_ED)
enterprise_df = enterprise_df.dropna()
enterprise_df.shape

(11341, 8)

In [5]:
#Quick look at our data for companies
enterprise_df.head()

Unnamed: 0,ID,name,avgWage,numInsured,tax,address,latitude,longitude
0,63287,UAB LABBIS,2183.85,53,26969.73,Žalgirio g. 92-301 LT-09303 VILNIUS,54.703743,25.276711
1,56132,UAB KOMPONENTAS,1317.62,5,1488.08,Kapsų g. 19 LT-02166 VILNIUS,54.660313,25.284592
2,57061,UAB ELTEL NETWORKS,1484.78,247,85636.92,Vilkpėdės g. 4 LT-03151 VILNIUS,54.664582,25.247461
3,56147,UAB IDW,1432.5,225,74027.54,Dariaus ir Girėno g. 65 A LT-02189 VILNIUS,54.646482,25.270569
4,59094,UAB KONICA MINOLTA BALTIA,2194.15,54,26969.69,J. Jasinskio g. 16 LT-01112 VILNIUS,54.688118,25.261523


In [6]:
# Data set containing GeoJson polygon points for municipalities in Vilnius
# Our having all our data points, sorted in municipalities will give us a better understanding and easier management of data.
sheet_url_MD = 'https://docs.google.com/spreadsheets/d/1aELRC65_N_WrufdIjQYlQSxN87wnsl8IufOzn76qJGA/edit#gid=1220023145'
csv_export_url_MD = sheet_url_MD.replace('/edit#gid=', '/export?format=csv&gid=')
municipalities_df = pd.read_csv(csv_export_url_MD)
municipalities_df.shape

(21, 2)

In [7]:
#Quick look at our data for municipalities_df
municipalities_df.head(10)

Unnamed: 0,Municipality,JsonPoly
0,Antakalnio seniūnija,https://global.mapit.mysociety.org/area/112195...
1,Fabijoniškių seniūnija,https://global.mapit.mysociety.org/area/112187...
2,Grigiškių seniūnija,https://global.mapit.mysociety.org/area/112159...
3,Justiniškių seniūnija,https://global.mapit.mysociety.org/area/112186...
4,Karoliniškių seniūnija,https://global.mapit.mysociety.org/area/112177...
5,Lazdynų seniūnija,https://global.mapit.mysociety.org/area/112160...
6,Naujamiesčio seniūnija,https://global.mapit.mysociety.org/area/118468...
7,Naujininkų seniūnija,https://global.mapit.mysociety.org/area/112196...
8,Naujosios Vilnios seniūnija,https://global.mapit.mysociety.org/area/112196...
9,Panerių seniūnija,https://global.mapit.mysociety.org/area/112159...


In [8]:
# In next step I am going to create polygons representing municipalities out of parsed json data
json_list = municipalities_df['JsonPoly'].to_list()
poly_list = []

for i in json_list:
    with urllib.request.urlopen(i) as url:
        data = json.load(url)
        points = [(j[0], j[1]) for j in data['coordinates'][0]]
        polygon = Polygon(points)
        poly_list.append(polygon)


In [9]:
# Inserting newly made shapely polygons to dataframe
municipalities_df.insert(2, "ShapelyPolyObj", poly_list, True)
municipalities_df.head(10)

Unnamed: 0,Municipality,JsonPoly,ShapelyPolyObj
0,Antakalnio seniūnija,https://global.mapit.mysociety.org/area/112195...,"POLYGON ((25.4256197 54.7298686, 25.4253218 54..."
1,Fabijoniškių seniūnija,https://global.mapit.mysociety.org/area/112187...,"POLYGON ((25.2143712 54.7502259, 25.2145271 54..."
2,Grigiškių seniūnija,https://global.mapit.mysociety.org/area/112159...,"POLYGON ((25.1098852 54.6638582, 25.1002898 54..."
3,Justiniškių seniūnija,https://global.mapit.mysociety.org/area/112186...,"POLYGON ((25.1990125 54.7130838, 25.199211 54...."
4,Karoliniškių seniūnija,https://global.mapit.mysociety.org/area/112177...,"POLYGON ((25.2257631 54.6995595, 25.2264139 54..."
5,Lazdynų seniūnija,https://global.mapit.mysociety.org/area/112160...,"POLYGON ((25.1918925 54.6830475, 25.1895424 54..."
6,Naujamiesčio seniūnija,https://global.mapit.mysociety.org/area/118468...,"POLYGON ((25.2866943 54.6694587, 25.2861705 54..."
7,Naujininkų seniūnija,https://global.mapit.mysociety.org/area/112196...,"POLYGON ((25.313623 54.5992503, 25.3116733 54...."
8,Naujosios Vilnios seniūnija,https://global.mapit.mysociety.org/area/112196...,"POLYGON ((25.4579355 54.6874558, 25.4588796 54..."
9,Panerių seniūnija,https://global.mapit.mysociety.org/area/112159...,"POLYGON ((25.2028753 54.6356714, 25.2048006 54..."


In [10]:
# Now I need to check if my enterprise_df and aruodas_df coord points are within any of these polies.
# If so, I want them to return dictionaries for both of dataframes, containing Name(enterprise or housing) and Municipality

def check_points(df1,df2):
    temp_dict = {}
    for i in zip(df1['longitude'].to_list(), df1['latitude'].to_list(),df1['name'].to_list()):
        point = Point(i[0],i[1])
        for j in zip(df2['Municipality'].to_list(), df2['ShapelyPolyObj'].to_list()):
            if j[1].contains(point):
                temp_dict[i[2]] = j[0]
    return temp_dict

In [11]:
enterprise_check = check_points(enterprise_df, municipalities_df)
aruodas_check = check_points(aruodas_df, municipalities_df)

In [12]:
temp_aruodas_df = pd.DataFrame.from_dict(list(aruodas_check.items()))
temp_enterprise_df = pd.DataFrame.from_dict(list(enterprise_check.items()))



In [13]:
temp_aruodas_df.rename(columns={0:'name', 1:'municipality'}, inplace=True)
temp_enterprise_df.rename(columns={0:'name', 1:'municipality'}, inplace=True)

In [14]:
temp_enterprise_df.head()

Unnamed: 0,name,municipality
0,UAB LABBIS,Šnipiškių seniūnija
1,UAB KOMPONENTAS,Naujininkų seniūnija
2,UAB ELTEL NETWORKS,Vilkpėdės seniūnija
3,UAB IDW,Naujininkų seniūnija
4,UAB KONICA MINOLTA BALTIA,Naujamiesčio seniūnija


In [15]:
# Adding newly parsed municipalities to main dataframes.
aruodas_df = pd.merge(aruodas_df, temp_aruodas_df, on='name')
enterprise_df = pd.merge(enterprise_df, temp_enterprise_df, on='name')

In [16]:
enterprise_df.head()

Unnamed: 0,ID,name,avgWage,numInsured,tax,address,latitude,longitude,municipality
0,63287,UAB LABBIS,2183.85,53,26969.73,Žalgirio g. 92-301 LT-09303 VILNIUS,54.703743,25.276711,Šnipiškių seniūnija
1,56132,UAB KOMPONENTAS,1317.62,5,1488.08,Kapsų g. 19 LT-02166 VILNIUS,54.660313,25.284592,Naujininkų seniūnija
2,57061,UAB ELTEL NETWORKS,1484.78,247,85636.92,Vilkpėdės g. 4 LT-03151 VILNIUS,54.664582,25.247461,Vilkpėdės seniūnija
3,56147,UAB IDW,1432.5,225,74027.54,Dariaus ir Girėno g. 65 A LT-02189 VILNIUS,54.646482,25.270569,Naujininkų seniūnija
4,59094,UAB KONICA MINOLTA BALTIA,2194.15,54,26969.69,J. Jasinskio g. 16 LT-01112 VILNIUS,54.688118,25.261523,Naujamiesčio seniūnija


In [17]:
aruodas_df.head()

Unnamed: 0,name,Adress,Neighbourhood,Rooms,Area,Price,Heating,Type,Year,latitude,longitude,municipality
0,Elbingo g.Pilaitė1,Elbingo g.,Pilaitė,1,64.0,87000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026,Pilaitės seniūnija
1,Elbingo g.Pilaitė1,Elbingo g.,Pilaitė,1,64.0,87000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026,Pilaitės seniūnija
2,Elbingo g.Pilaitė3,Elbingo g.,Pilaitė,3,55.0,76000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026,Pilaitės seniūnija
3,Elbingo g.Pilaitė3,Elbingo g.,Pilaitė,3,55.0,76000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026,Pilaitės seniūnija
4,Elbingo g.Pilaitė2,Elbingo g.,Pilaitė,2,48.0,69000,Centriniskolektorinis,Mūrinis,2018,54.70807,25.17026,Pilaitės seniūnija
