# Lab 7 Data Visualisation on Python with Geopandas

## 1. Import data

In [None]:
# To run in colab:
# !git clone https://github.com/sigord/data_visualisation/
# !pip install -r /content/data_visualisation/LR6/requirements.txt

In [24]:
import pandas as pd
import numpy as np
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import shapely as shp
from shapely.wkt import loads
from pprint import pprint
import json

df=pd.read_excel("lab7_geo.xlsx", sheet_name="Sheet1", index_col=0)
drop_list = []
for i in df.index:
    # if last symbol is ")", then it is a polygon 
    if df.loc[i, "geometry"][-1] != ")":
        drop_list.append(i)
df.drop(drop_list, inplace=True)

# conver geometry column to geojson with initial dafaframe index as id
geoDict = {}
for i in df.index:
    try:
        geoDict[i] = loads(df.loc[i, "geometry"])
    except:
        geoDict[i] = np.nan
        print(f"Error at {i}")
geojson = gpd.GeoSeries(geoDict).to_json()
geojson = json.loads(geojson)
df['index'] = df.index
gdf = gpd.GeoDataFrame(df[list(df.columns)[:-2]])
gdf['index'] = gdf.index
gdf['geometry'] = gpd.GeoSeries(geoDict)
geojson = gdf.to_json()
geojson = json.loads(geojson)

#TODO: proper count the mean for task where it is needed with area calculation

# get area from geometry with EPSG:32637
# WGS 84 / UTM zone 37N

gdf.crs = "EPSG:4326"
gdf["geometry_2"] = gdf["geometry"].to_crs("EPSG:32637") # more precise
gdf["geometry_3"] = gdf["geometry"].to_crs({"proj":'cea'})
gdf["area1"] = gdf["geometry_2"].area / 10**6
gdf["area2"] = gdf["geometry_3"].area / 10**6

gdf.drop(["geometry_2", "geometry_3"], axis=1, inplace=True)


You are adding a column named 'geometry' to a GeoDataFrame constructed without an active geometry column. Currently, this automatically sets the active geometry column to 'geometry' but in the future that will no longer happen. Instead, either provide geometry to the GeoDataFrame constructor (GeoDataFrame(... geometry=GeoSeries()) or use `set_geometry('geometry')` to explicitly set the active geometry column.




## 2. Using plotly.express build a choropleth map showing population density inside administrative boundaries of Moscow districts

In [17]:
gdf2 = gdf[['Административный округ', 'Плотность_населения', 'geometry']]
# TODO: fix mean calculation
gdf2 = gdf2.dissolve(by="Административный округ", aggfunc="mean", as_index=False)
geojson = gdf2.to_json()
geojson = json.loads(geojson)
fig = px.choropleth(gdf2, geojson=geojson, 
                    color='Плотность_населения', 
                    locations='Административный округ',
                    featureidkey="properties.Административный округ",)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show();

## 3.	Using plotly.express plot a choropleth map showing the number of small and medium-sized enterprises within the administrative boundaries of Moscow districts

In [18]:
gdf3 = gdf.dissolve(by="Административный округ", aggfunc="sum", as_index=False)
geojson = gdf3.to_json()
geojson = json.loads(geojson)
fig = px.choropleth(gdf3, geojson=geojson, 
                    color='Количество_малых_и_средних_предприятий', 
                    locations='Административный округ',
                    featureidkey="properties.Административный округ",)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show();

## 4.	Using plotly.express plot a choropleth map showing the area of living quarters per 1 resident within the administrative boundaries of Moscow districts

In [21]:
gdf4 = gdf[['Административный округ', 'Площадь_жилых_помещений_на_1_жителя', 'geometry']]
# TODO: fix mean calculation
gdf4 = gdf4.dissolve(by="Административный округ", aggfunc="mean", as_index=False)
fig = px.choropleth(gdf2, geojson=geojson, 
                    color='Площадь_жилых_помещений_на_1_жителя', 
                    locations='Административный округ',
                    featureidkey="properties.Административный округ",)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show();

## 5.	Using plotly.express build a choropleth map showing the percentage of parks and forests from the area within the administrative boundaries of Moscow districts

In [23]:
gdf4 = gdf[['Административный округ', 'Процент_парков_и_лесов_от_площади', 'geometry']]
# TODO: fix mean calculation
gdf4 = gdf4.dissolve(by="Административный округ", aggfunc="mean", as_index=False)
fig = px.choropleth(gdf2, geojson=geojson, 
                    color='Процент_парков_и_лесов_от_площади', 
                    locations='Административный округ',
                    featureidkey="properties.Административный округ",)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show();

## 6.	Replace on the map from step 2 the palette with a divergent palette (blue-red)

## 7.	Display on the map in step 3 in the pop-up information the name of the administrative district.

## 8.	Using geo-spatial methods geopandas glue polygons into administrative districts and calculate in them an indicator - the ratio between population density and the number of small businesses. Using these data, construct a choropleth map.

## 9.	Construct a point geo-map (add markers on the map of Moscow corresponding to the buildings of Moscow State University, Moscow Engineering Physics Institute, Moscow Aviation Institute, Russian Academy of National Economy and Public Administration). Add a caption to each marker.

## 10.	 Build a bubble geo-map (add markers on the map of Moscow corresponding to the centroids of the administrative districts of Moscow). Add a caption to each marker. Determine marker size from population density.
