# Lecture 4
Learning goals
 + pathlib
 + buffers and topology
 + chloropleth maps


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

import geopandas as gpd

## Pathlib and managing paths (esp. across operating systems!)
You've probably noticed that Windows uses \ to separate directories, while Mac and Linux file systems
use /.  Also, some directories seem to begin with `\\`.  What a mess!  Enter _pathlib_.

In [None]:
Path.cwd()

In [None]:
datasets_dir = Path('..\..\datasets')
# ls ../../

In [None]:
datasets_dir

In [None]:
moscow_gdb_path = datasets_dir / 'moscow' / 'moscow_vectors' / 'Moscow.gdb'
print(moscow_gdb_path)
print(type(moscow_gdb_path))

In [None]:
moscow_gdb_path.exists()

In [None]:
moscow_gdb_path.is_file()

Amazing, clear, cheatsheet for much of what pathlib can do:
https://github.com/chris1610/pbpython/blob/master/extras/Pathlib-Cheatsheet.pdf

In [None]:
my_dir_name = 'TCB_work'
my_path = Path(my_dir_name)

Use path.exists() and path.mkdir().  path.rmdir() to remove a directory

In [None]:
my_path.mkdir()

#### Globbing

In [None]:
list( moscow_gdb_path.glob('*') )

In [None]:
for file in moscow_gdb_path.glob('*gdbtabl*'):
    print(file)
    

In [None]:
import fiona
moscow_gdb = str(moscow_gdb_path)

fiona.listlayers(moscow_gdb)

In [None]:
parcels = gpd.read_file(moscow_gdb, layer='Parcels')
roads = gpd.read_file(moscow_gdb, layer='Centerlines')
zoning = gpd.read_file(moscow_gdb, layer='Moscow_Zoning')
city_limits = gpd.read_file(moscow_gdb, layer='Moscow_City_Limit')
parks = gpd.read_file(moscow_gdb, layer='Moscow_Parks')


In [None]:
parcels.columns

In [None]:
parcels.centroid

## Property sales over time

In [None]:
import pandas as pd
print( parcels['PM_DEEDCDT'].astype(str)[:4] )

parcels['SaleDate'] = pd.to_datetime(
    parcels['PM_DEEDCDT'], 
    errors='coerce', 
    format='%Y%m%d')




In [None]:
parcels['SaleDate']

In [None]:
dates = parcels['PM_DEEDCDT'].to_numpy()
yr = np.full( dates.shape, np.nan )

for i in range(len(dates)):
    if dates[i] > 0:
        yr[i] = int( str(dates[i])[:4] )
#         print(date)
    else:
        yr[i] = np.nan
#     print(date)
# np.datetime_as_string(parcels['SaleDate'])

parcels['SaleYear'] = yr
# parcels['SaleYear'] = parcels['SaleDate'].to_numpy().astype('M8[Y]')

In [None]:
fig, ax = plt.subplots()
ax.hist(parcels['SaleDate'], bins=np.arange(np.datetime64('1970-01-01'), 
                                            np.datetime64('2022-01-01'), 
                                            np.timedelta64(365, 'D')/365 ) )
ax.set_ylabel('Number of property sales in Moscow')
# ax.set_xlim(np.datetime64('2010-01-01'), np.datetime64('2012-01-01') )
ax.set_xlim(np.datetime64('2010-12-15'), np.datetime64('2011-01-01') )



In [None]:
# np.arange(np.datetime64('2015-01-01'), 
#                                             np.datetime64('2022-01-01'), 
#                                             np.timedelta64(365.25, 'D')/12 )
np.timedelta64(365, 'D')/12

In [None]:
# parcels['SaleDate'].to_numpy().astype('M8[Y]')

In [None]:
# [np.datetime64(i,'Y') for i in parcels['SaleYear'][:2]]


In [None]:
parcels['SaleYear'][:2].astype('float64')

In [None]:
parks[1]

In [None]:
buffer_radius = 1000

i=4

buffered_park = parks.iloc[i:i+1].buffer(buffer_radius) # Careful!  I'm not sure why this is, but geopandas requires a slice here, not a single index
# .buffer(buffer_radius) # Buffer in native dimensions, feet
overlapping_parcels = parcels.overlaps(buffered_park.geometry.to_numpy()[0]) 
    # buffered_park.plot()



<div class="alert alert-block alert-warning">

## Generate a list of all parcels that are not within 1000 ft of a park
You're a city planner.  You want to reach out to citizens of Moscow who are underserved with access to parks.
But first you need to know who those citizens are..
</div>

<div class="alert alert-block alert-warning">

### How long ago did 50% of the properties change hands?
As in, fill in the blank: "Half of properties in the Moscow area have been sold within _____ years."
</div>

In [None]:
roads

In [None]:
zoning.columns

In [None]:
from shapely.geometry import Polygon, LineString, Point
parcels['Dwntn_dist'] = parcels.distance(Point(2.311e6, 1.85e6))

In [None]:
city_limits.bounds


In [None]:
parcels

In [None]:
%matplotlib widget
fig, ax = plt.subplots( figsize=(8,8) )
# parcels.plot(column='Val_per_foot', ax=ax, legend=True, vmax=200);
# parcels.plot(column='PM_TOT_VAL', ax=ax, legend=True, vmax=400000);
# parcels.plot(column='PM_IMP_VAL', ax=ax, legend=True, vmin=150000, vmax=600000);

# parcels.plot(column='Dwntn_dist', ax=ax, legend=True, vmax=5280);
zoning.plot(column='LANDUSECODE', ax=ax)

# parcels.plot(column= 'SaleYear', ax=ax, legend=True, vmin=1990)

ax.set_xlim( city_limits.bounds.to_numpy()[0][::2] )
ax.set_ylim( city_limits.bounds.to_numpy()[0][1::2] )

roads.plot(ax=ax, color='gray')

In [None]:
city_limits.bounds
# ax.set_xlim( city_limits.bounds.to_numpy()[0][:] )
city_limits.bounds.to_numpy()[0][:]


### Roads close to schools and parks?