# Making a map

### Narrow my data, so that I only have the information I want to display
### Merge it with my geojson file
### Do all the Mapbox stuff!

In [None]:
import pandas as pd

df = pd.read_csv("analysis_output/clean_output_20221213.csv")

df

In [2]:
# note: need to add Index=False in my other notebook for I don't get this problem
df = df.drop(columns=['Unnamed: 0']) 

In [3]:
df.groupby('district')['price_sqm'].mean()

district
Blackrock     6592.547486
Dublin 1      5361.135593
Dublin 10     3516.083333
Dublin 11     3777.193548
Dublin 12     4319.337838
Dublin 13     4948.848214
Dublin 14     5756.484848
Dublin 15     4054.015873
Dublin 16     5043.401961
Dublin 17     3598.933333
Dublin 18     5212.950980
Dublin 2      7228.773585
Dublin 20     4083.133333
Dublin 22     3535.712329
Dublin 24     3508.840336
Dublin 3      5299.738462
Dublin 4      7437.981928
Dublin 5      4640.831683
Dublin 6      7153.440678
Dublin 6W     5894.837209
Dublin 7      5190.763889
Dublin 8      5582.251969
Dublin 9      4672.031447
Glenageary    6427.788889
Name: price_sqm, dtype: float64

## Narrowing down my data into a smaller frame

First make subset dataframes with the information I want to display:

In [4]:
df1 = df.groupby('district')['price_sqm'].mean().reset_index(name='price_sqm')
df2 = df.groupby('district')['bedrooms'].mean().reset_index(name='bedrooms')
df3 = df.groupby('district')['bathrooms'].mean().reset_index(name='bathrooms')
df4 = df.groupby('district')['price'].median().reset_index(name='price')
df1['price_sqm'] = round(df1['price_sqm'],2)
df2['bedrooms']  = round(df2['bedrooms'])
df3['bathrooms']  = round(df3['bathrooms'])
df4['price']  = round(df4['price'])

Put all of that into one readable column: 

In [5]:
df1["string"] = "<b>" + df1['district'] + "</b>" + "<br>Median sale price: €" + df4['price'].astype(str) + "<br>Mean price per square metre: €" + df1['price_sqm'].astype(str) + "<br>Mean number of bedrooms / bathrooms per property" + df2['bedrooms'].astype(str) + " / "+ df3['bathrooms'].astype(str) 

df1.tail(40)

Unnamed: 0,district,price_sqm,string
0,Blackrock,6592.55,<b>Blackrock</b><br>Median sale price: €710000...
1,Dublin 1,5361.14,<b>Dublin 1</b><br>Median sale price: €360000<...
2,Dublin 10,3516.08,<b>Dublin 10</b><br>Median sale price: €260000...
3,Dublin 11,3777.19,<b>Dublin 11</b><br>Median sale price: €285000...
4,Dublin 12,4319.34,<b>Dublin 12</b><br>Median sale price: €375000...
5,Dublin 13,4948.85,<b>Dublin 13</b><br>Median sale price: €430000...
6,Dublin 14,5756.48,<b>Dublin 14</b><br>Median sale price: €645000...
7,Dublin 15,4054.02,<b>Dublin 15</b><br>Median sale price: €365000...
8,Dublin 16,5043.4,<b>Dublin 16</b><br>Median sale price: €525000...
9,Dublin 17,3598.93,<b>Dublin 17</b><br>Median sale price: €285000...


I want this but with the correct html formatting: 

In [6]:

output = df1.groupby('district')['string'].apply(lambda x: "<div class='district'><h1><b>Property Listings</b></h1><P>{0}</P></div>".format('</p><p> '.join(x))).reset_index(name='properties.article')
output


Unnamed: 0,district,properties.article
0,Blackrock,<div class='district'><h1><b>Property Listings...
1,Dublin 1,<div class='district'><h1><b>Property Listings...
2,Dublin 10,<div class='district'><h1><b>Property Listings...
3,Dublin 11,<div class='district'><h1><b>Property Listings...
4,Dublin 12,<div class='district'><h1><b>Property Listings...
5,Dublin 13,<div class='district'><h1><b>Property Listings...
6,Dublin 14,<div class='district'><h1><b>Property Listings...
7,Dublin 15,<div class='district'><h1><b>Property Listings...
8,Dublin 16,<div class='district'><h1><b>Property Listings...
9,Dublin 17,<div class='district'><h1><b>Property Listings...


In [7]:
output.iloc[3]['properties.article']

"<div class='district'><h1><b>Property Listings</b></h1><P><b>Dublin 11</b><br>Median sale price: €285000<br>Mean price per square metre: €3777.19<br>Mean number of bedrooms per property: 3.0<br>Mean number of bathrooms per property: 2.0</P></div>"

In [8]:
properties = df.groupby('district')['address'].nunique().reset_index(name='properties.headline')
properties

Unnamed: 0,district,properties.headline
0,Blackrock,179
1,Dublin 1,59
2,Dublin 10,24
3,Dublin 11,93
4,Dublin 12,148
5,Dublin 13,112
6,Dublin 14,165
7,Dublin 15,188
8,Dublin 16,102
9,Dublin 17,15


In [9]:
output = output.merge(properties, how='left', on='district')

In [10]:
df1 = df1.drop(columns = ["string"])

In [11]:
output = output.merge(df1, how='left', on='district')

In [12]:
output.columns = ["district", "properties.article", "properties.headline", "properties.price_sqm"]

In [13]:
output['properties.headline'] = output['properties.headline'].astype(str) + " properties"

In [14]:
output['properties.name'] = output['district'].astype(str)
# output['properties.group_name'] = output['district'].astype(str)

In [15]:
output['district'] = output['district'].str.lower()

In [16]:
output

Unnamed: 0,district,properties.article,properties.headline,properties.price_sqm,properties.name
0,blackrock,<div class='district'><h1><b>Property Listings...,179 properties,6592.55,Blackrock
1,dublin 1,<div class='district'><h1><b>Property Listings...,59 properties,5361.14,Dublin 1
2,dublin 10,<div class='district'><h1><b>Property Listings...,24 properties,3516.08,Dublin 10
3,dublin 11,<div class='district'><h1><b>Property Listings...,93 properties,3777.19,Dublin 11
4,dublin 12,<div class='district'><h1><b>Property Listings...,148 properties,4319.34,Dublin 12
5,dublin 13,<div class='district'><h1><b>Property Listings...,112 properties,4948.85,Dublin 13
6,dublin 14,<div class='district'><h1><b>Property Listings...,165 properties,5756.48,Dublin 14
7,dublin 15,<div class='district'><h1><b>Property Listings...,188 properties,4054.02,Dublin 15
8,dublin 16,<div class='district'><h1><b>Property Listings...,102 properties,5043.4,Dublin 16
9,dublin 17,<div class='district'><h1><b>Property Listings...,15 properties,3598.93,Dublin 17


In [17]:
#Add some color
output['properties.color'] = "#35476E"
output

Unnamed: 0,district,properties.article,properties.headline,properties.price_sqm,properties.name,properties.color
0,blackrock,<div class='district'><h1><b>Property Listings...,179 properties,6592.55,Blackrock,#35476E
1,dublin 1,<div class='district'><h1><b>Property Listings...,59 properties,5361.14,Dublin 1,#35476E
2,dublin 10,<div class='district'><h1><b>Property Listings...,24 properties,3516.08,Dublin 10,#35476E
3,dublin 11,<div class='district'><h1><b>Property Listings...,93 properties,3777.19,Dublin 11,#35476E
4,dublin 12,<div class='district'><h1><b>Property Listings...,148 properties,4319.34,Dublin 12,#35476E
5,dublin 13,<div class='district'><h1><b>Property Listings...,112 properties,4948.85,Dublin 13,#35476E
6,dublin 14,<div class='district'><h1><b>Property Listings...,165 properties,5756.48,Dublin 14,#35476E
7,dublin 15,<div class='district'><h1><b>Property Listings...,188 properties,4054.02,Dublin 15,#35476E
8,dublin 16,<div class='district'><h1><b>Property Listings...,102 properties,5043.4,Dublin 16,#35476E
9,dublin 17,<div class='district'><h1><b>Property Listings...,15 properties,3598.93,Dublin 17,#35476E


### geojson > pandas > mapbox

In [18]:
#Some nice imports
import requests
import json
import numpy as np
import pandas as pd
from pandas import json_normalize


In [19]:
##Load the geojson file Exported from Mapshaper

with open('./spatial_layers/dublin_districts.geojson') as json_data:
    geometry_data = json.load(json_data)
    

In [20]:
geometry_data

{'type': 'FeatureCollection',
 'name': 'dublin_districts',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'RoutingKey': 'A94', 'Descriptor': 'BLACKROCK'},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-6.195783, 53.27454, 0.0],
       [-6.196721, 53.274051, 0.0],
       [-6.198655, 53.273515, 0.0],
       [-6.198424, 53.273187, 0.0],
       [-6.200951, 53.272999, 0.0],
       [-6.202846, 53.273543, 0.0],
       [-6.202463, 53.273873, 0.0],
       [-6.202863, 53.274344, 0.0],
       [-6.203509, 53.2744, 0.0],
       [-6.203779, 53.274938, 0.0],
       [-6.203866, 53.27497, 0.0],
       [-6.203539, 53.275419, 0.0],
       [-6.203537, 53.276058, 0.0],
       [-6.203238, 53.276099, 0.0],
       [-6.201739, 53.275734, 0.0],
       [-6.200934, 53.276524, 0.0],
       [-6.205683, 53.277997, 0.0],
       [-6.206378, 53.27823, 0.0],
       [-6.210778, 53.279672, 0.0],
       [-6.214161, 53

In [21]:
##Normalize the hierarchy  so you have simple rows in a dataframe
##Note that you need to extract it from geometry_data['features']
df = pd.DataFrame.from_dict(json_normalize(geometry_data['features']), orient='columns')


In [22]:
df['properties.Descriptor'] = df['properties.Descriptor'].str.lower()

In [23]:
df.head()

Unnamed: 0,type,properties.RoutingKey,properties.Descriptor,geometry.type,geometry.coordinates
0,Feature,A94,blackrock,MultiPolygon,"[[[[-6.195783, 53.27454, 0.0], [-6.196721, 53...."
1,Feature,A96,glenageary,MultiPolygon,"[[[[-6.160525, 53.277182, 0.0], [-6.159042, 53..."
2,Feature,D01,dublin 1,MultiPolygon,"[[[[-6.260391, 53.361446, 0.0], [-6.260147, 53..."
3,Feature,D02,dublin 2,MultiPolygon,"[[[[-6.25347, 53.331912, 0.0], [-6.254928, 53...."
4,Feature,D03,dublin 3,MultiPolygon,"[[[[-6.167247, 53.370866, 0.0], [-6.164621, 53..."


In [24]:
output

Unnamed: 0,district,properties.article,properties.headline,properties.price_sqm,properties.name,properties.color
0,blackrock,<div class='district'><h1><b>Property Listings...,179 properties,6592.55,Blackrock,#35476E
1,dublin 1,<div class='district'><h1><b>Property Listings...,59 properties,5361.14,Dublin 1,#35476E
2,dublin 10,<div class='district'><h1><b>Property Listings...,24 properties,3516.08,Dublin 10,#35476E
3,dublin 11,<div class='district'><h1><b>Property Listings...,93 properties,3777.19,Dublin 11,#35476E
4,dublin 12,<div class='district'><h1><b>Property Listings...,148 properties,4319.34,Dublin 12,#35476E
5,dublin 13,<div class='district'><h1><b>Property Listings...,112 properties,4948.85,Dublin 13,#35476E
6,dublin 14,<div class='district'><h1><b>Property Listings...,165 properties,5756.48,Dublin 14,#35476E
7,dublin 15,<div class='district'><h1><b>Property Listings...,188 properties,4054.02,Dublin 15,#35476E
8,dublin 16,<div class='district'><h1><b>Property Listings...,102 properties,5043.4,Dublin 16,#35476E
9,dublin 17,<div class='district'><h1><b>Property Listings...,15 properties,3598.93,Dublin 17,#35476E


In [25]:
merged_df = pd.merge(df, output, left_on='properties.Descriptor', right_on='district')

In [26]:
merged_df

Unnamed: 0,type,properties.RoutingKey,properties.Descriptor,geometry.type,geometry.coordinates,district,properties.article,properties.headline,properties.price_sqm,properties.name,properties.color
0,Feature,A94,blackrock,MultiPolygon,"[[[[-6.195783, 53.27454, 0.0], [-6.196721, 53....",blackrock,<div class='district'><h1><b>Property Listings...,179 properties,6592.55,Blackrock,#35476E
1,Feature,A96,glenageary,MultiPolygon,"[[[[-6.160525, 53.277182, 0.0], [-6.159042, 53...",glenageary,<div class='district'><h1><b>Property Listings...,180 properties,6427.79,Glenageary,#35476E
2,Feature,D01,dublin 1,MultiPolygon,"[[[[-6.260391, 53.361446, 0.0], [-6.260147, 53...",dublin 1,<div class='district'><h1><b>Property Listings...,59 properties,5361.14,Dublin 1,#35476E
3,Feature,D02,dublin 2,MultiPolygon,"[[[[-6.25347, 53.331912, 0.0], [-6.254928, 53....",dublin 2,<div class='district'><h1><b>Property Listings...,53 properties,7228.77,Dublin 2,#35476E
4,Feature,D03,dublin 3,MultiPolygon,"[[[[-6.167247, 53.370866, 0.0], [-6.164621, 53...",dublin 3,<div class='district'><h1><b>Property Listings...,130 properties,5299.74,Dublin 3,#35476E
5,Feature,D04,dublin 4,MultiPolygon,"[[[[-6.229622, 53.309004, 0.0], [-6.234355, 53...",dublin 4,<div class='district'><h1><b>Property Listings...,166 properties,7437.98,Dublin 4,#35476E
6,Feature,D05,dublin 5,MultiPolygon,"[[[[-6.193686, 53.390431, 0.0], [-6.191689, 53...",dublin 5,<div class='district'><h1><b>Property Listings...,101 properties,4640.83,Dublin 5,#35476E
7,Feature,D06,dublin 6,MultiPolygon,"[[[[-6.247765, 53.327407, 0.0], [-6.247899, 53...",dublin 6,<div class='district'><h1><b>Property Listings...,177 properties,7153.44,Dublin 6,#35476E
8,Feature,D07,dublin 7,MultiPolygon,"[[[[-6.261806, 53.35985, 0.0], [-6.261706, 53....",dublin 7,<div class='district'><h1><b>Property Listings...,143 properties,5190.76,Dublin 7,#35476E
9,Feature,D09,dublin 9,MultiPolygon,"[[[[-6.231598, 53.369927, 0.0], [-6.231923, 53...",dublin 9,<div class='district'><h1><b>Property Listings...,159 properties,4672.03,Dublin 9,#35476E


Great! Now we have built a out all of our special properties for the template.

It's time to turn this back into **json format** we orient by records because that gives us an array of dictionaries.


In [27]:
merged_df = merged_df.drop(columns=['district'])

In [28]:
# save backup 

df.to_csv(r'./backups/backup_final_df.csv', index = False)

In [29]:
merged_df

Unnamed: 0,type,properties.RoutingKey,properties.Descriptor,geometry.type,geometry.coordinates,properties.article,properties.headline,properties.price_sqm,properties.name,properties.color
0,Feature,A94,blackrock,MultiPolygon,"[[[[-6.195783, 53.27454, 0.0], [-6.196721, 53....",<div class='district'><h1><b>Property Listings...,179 properties,6592.55,Blackrock,#35476E
1,Feature,A96,glenageary,MultiPolygon,"[[[[-6.160525, 53.277182, 0.0], [-6.159042, 53...",<div class='district'><h1><b>Property Listings...,180 properties,6427.79,Glenageary,#35476E
2,Feature,D01,dublin 1,MultiPolygon,"[[[[-6.260391, 53.361446, 0.0], [-6.260147, 53...",<div class='district'><h1><b>Property Listings...,59 properties,5361.14,Dublin 1,#35476E
3,Feature,D02,dublin 2,MultiPolygon,"[[[[-6.25347, 53.331912, 0.0], [-6.254928, 53....",<div class='district'><h1><b>Property Listings...,53 properties,7228.77,Dublin 2,#35476E
4,Feature,D03,dublin 3,MultiPolygon,"[[[[-6.167247, 53.370866, 0.0], [-6.164621, 53...",<div class='district'><h1><b>Property Listings...,130 properties,5299.74,Dublin 3,#35476E
5,Feature,D04,dublin 4,MultiPolygon,"[[[[-6.229622, 53.309004, 0.0], [-6.234355, 53...",<div class='district'><h1><b>Property Listings...,166 properties,7437.98,Dublin 4,#35476E
6,Feature,D05,dublin 5,MultiPolygon,"[[[[-6.193686, 53.390431, 0.0], [-6.191689, 53...",<div class='district'><h1><b>Property Listings...,101 properties,4640.83,Dublin 5,#35476E
7,Feature,D06,dublin 6,MultiPolygon,"[[[[-6.247765, 53.327407, 0.0], [-6.247899, 53...",<div class='district'><h1><b>Property Listings...,177 properties,7153.44,Dublin 6,#35476E
8,Feature,D07,dublin 7,MultiPolygon,"[[[[-6.261806, 53.35985, 0.0], [-6.261706, 53....",<div class='district'><h1><b>Property Listings...,143 properties,5190.76,Dublin 7,#35476E
9,Feature,D09,dublin 9,MultiPolygon,"[[[[-6.231598, 53.369927, 0.0], [-6.231923, 53...",<div class='district'><h1><b>Property Listings...,159 properties,4672.03,Dublin 9,#35476E


In [30]:
ok_json = json.loads(merged_df.to_json(orient='records'))

In [31]:
ok_json

[{'type': 'Feature',
  'properties.RoutingKey': 'A94',
  'properties.Descriptor': 'blackrock',
  'geometry.type': 'MultiPolygon',
  'geometry.coordinates': [[[[-6.195783, 53.27454, 0.0],
     [-6.196721, 53.274051, 0.0],
     [-6.198655, 53.273515, 0.0],
     [-6.198424, 53.273187, 0.0],
     [-6.200951, 53.272999, 0.0],
     [-6.202846, 53.273543, 0.0],
     [-6.202463, 53.273873, 0.0],
     [-6.202863, 53.274344, 0.0],
     [-6.203509, 53.2744, 0.0],
     [-6.203779, 53.274938, 0.0],
     [-6.203866, 53.27497, 0.0],
     [-6.203539, 53.275419, 0.0],
     [-6.203537, 53.276058, 0.0],
     [-6.203238, 53.276099, 0.0],
     [-6.201739, 53.275734, 0.0],
     [-6.200934, 53.276524, 0.0],
     [-6.205683, 53.277997, 0.0],
     [-6.206378, 53.27823, 0.0],
     [-6.210778, 53.279672, 0.0],
     [-6.214161, 53.28078, 0.0],
     [-6.21642, 53.279015, 0.0],
     [-6.216321, 53.278978, 0.0],
     [-6.217974, 53.278061, 0.0],
     [-6.218084, 53.277804, 0.0],
     [-6.218131, 53.277821, 0.0],
   

But because we had to normalize the hierarchy of the geojson document we now have to rebuild the hierarchy so this json document becomes geojson, the function below does just that:


In [32]:

def process_to_geojson(file):
    geo_data = {"type": "FeatureCollection", "features":[]}
    for row in file:
        this_dict = {"type": "Feature", "properties":{}, "geometry": {}}
        for key, value in row.items():
            key_names = key.split('.')
            if key_names[0] == 'geometry':
                this_dict['geometry'][key_names[1]] = value
            if str(key_names[0]) == 'properties':
                this_dict['properties'][key_names[1]] = value
        geo_data['features'].append(this_dict)
    return geo_data


In [33]:
geo_format = process_to_geojson(ok_json)

In [34]:
geo_format

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'RoutingKey': 'A94',
    'Descriptor': 'blackrock',
    'article': "<div class='district'><h1><b>Property Listings</b></h1><P><b>Blackrock</b><br>Median sale price: €710000<br>Mean price per square metre: €6592.55<br>Mean number of bedrooms per property: 3.0<br>Mean number of bathrooms per property: 2.0</P></div>",
    'headline': '179 properties',
    'price_sqm': 6592.55,
    'name': 'Blackrock',
    'color': '#35476E'},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-6.195783, 53.27454, 0.0],
       [-6.196721, 53.274051, 0.0],
       [-6.198655, 53.273515, 0.0],
       [-6.198424, 53.273187, 0.0],
       [-6.200951, 53.272999, 0.0],
       [-6.202846, 53.273543, 0.0],
       [-6.202463, 53.273873, 0.0],
       [-6.202863, 53.274344, 0.0],
       [-6.203509, 53.2744, 0.0],
       [-6.203779, 53.274938, 0.0],
       [-6.203866, 53.27497, 0.0],
       [-6.203539, 53.275419, 0.0],
       [

Now we can export this to a file!

In [35]:
#Variable name
with open('geo-data.js', 'w') as outfile:
    outfile.write("var infoData = ")
#geojson output
with open('geo-data.js', 'a') as outfile:
    json.dump(geo_format, outfile)


In [36]:
!ls

Analysis.ipynb                       df_to_maps.ipynb
Final_steps-dataframes-to-maps.ipynb geo-data.js
Scrape.ipynb                         map.html
[34manalysis_output[m[m                      [34mscrape_output[m[m
[34mbackups[m[m                              [34mspatial_layers[m[m
