In [1]:
import geojson
import pandas
import json
from shapely.geometry import shape, Point
from collections import OrderedDict
import csv
import numpy as np 

In [2]:
path = 'TrentoTreeMap/data/geo_data_trees.geojson'

In [3]:
with open(path, 'r') as data:
    loaded_json = geojson.load(data)
    features = loaded_json['features']
    tree_names = set()
    for feature in features:
        properties = feature['properties']
        tree_names.add(properties['Name'])
    
    result = {}
    for tree in tree_names:
        result[tree] = {'count': 0, 'total_canopy_cover': 0}
        
    for feature in features: 
        properties = feature['properties']
        result[properties['Name']]['count'] += 1
        result[properties['Name']]['total_canopy_cover'] += float(properties['Canopy Cover (m2)'])
    
    sorted_dictionary = OrderedDict(sorted(result.items(), key=lambda item: item[1]['count'], reverse=True))
    
    result = []
    for key, value in sorted_dictionary.items():
        elem = sorted_dictionary[key]
        elem['mean_canopy_cover'] = elem['total_canopy_cover'] / elem['count']
        #elem.pop('total_canopy_cover')
        result += [[key, elem['count'], elem['mean_canopy_cover']]]
        
    with open('data1.csv', 'w+') as dump:
        writer = csv.writer(dump)
        writer.writerow(['name', 'count', 'mean_canopy_cover'])
        for line in result:
            writer.writerow(line)

In [4]:
circoscrizioni_path = 'TrentoTreeMap/data/circoscrizioni.json'

In [9]:
df = pandas.read_csv('data1.csv')
top_n_tree_names = list(df['name'][:5])

with open(circoscrizioni_path, 'r') as c:
    with open(path, 'r') as data:
        loaded_json = geojson.load(data)
        circoscrizioni = geojson.load(c)
        result = {'SARDAGNA': {'Other': 1}, 'MEANO': {}, 'ARGENTARIO': {}, 'BONDONE': {}, 'MATTARELLO': {}, 'OLTREFERSINA': {}, 'POVO': {}, 
                  'RAVINA-ROMAGNANO': {}, 'S.GIUSEPPE-S.CHIARA': {}, 'VILLAZZANO': {}, 'GARDOLO': {}, 
                  'CENTRO STORICO PIEDICASTELLO': {}}
        for f in loaded_json['features']:
            try:
                point = Point(f['geometry']['coordinates'])        
                for feature in circoscrizioni['features']:
                    polygon = shape(feature['geometry'])
                    if polygon.contains(point):
                        tree_name = f['properties']['Name']
                        circoscrizione = feature['properties']['nome']
                        try:
                            if tree_name in top_n_tree_names:
                                curr_count = result[circoscrizione][tree_name]
                                result[circoscrizione][tree_name] += 1
                            else:
                                curr_count = result[circoscrizione]['Other']
                                result[circoscrizione]['Other'] += 1
                        except KeyError:
                            if tree_name in top_n_tree_names:
                                result[circoscrizione][tree_name] = 1
                            else:
                                result[circoscrizione]['Other'] = 1
            except TypeError:
                continue
            
        with open('data2.csv', 'w+') as dump:
            tree_names = set()
            for feature in loaded_json['features']:
                properties = feature['properties']
                tree_names.add(properties['Name'])
                
            writer = csv.writer(dump)
            headers = ['circoscrizione'] + top_n_tree_names + ['Other']
            writer.writerow(headers)
            
            for k, v in result.items():
                to_write = [k]
                for nome_albero in headers[1:]:
                    try:
                        to_write += [result[k][nome_albero]]
                    except KeyError:
                        to_write += [0]
                
                writer.writerow(to_write)

In [10]:
df = pandas.read_csv('data2.csv')
df.loc[:, df.iloc[0] != 0].iloc[0]
df.sum(axis=1)

  df.sum(axis=1)


0       39
1      206
2      454
3      292
4      264
5     2695
6      189
7      319
8     3024
9      416
10    1651
11    2964
dtype: int64

In [11]:
df

Unnamed: 0,circoscrizione,Celtis australis,Aesculus hippocastanum,Carpinus betulus,Tilia cordata,Platanus x hispanica,Other
0,SARDAGNA,0,0,0,0,0,39
1,MEANO,9,0,2,1,9,185
2,ARGENTARIO,61,9,18,5,12,349
3,BONDONE,1,21,3,9,2,256
4,MATTARELLO,25,1,13,6,6,213
5,OLTREFERSINA,341,34,126,116,31,2047
6,POVO,21,13,1,1,2,151
7,RAVINA-ROMAGNANO,15,3,5,34,23,239
8,S.GIUSEPPE-S.CHIARA,83,405,123,31,53,2329
9,VILLAZZANO,19,0,33,35,3,326


In [13]:
sum(list(df.sum(axis=1)))

  sum(list(df.sum(axis=1)))


12513