In [17]:
import json
import pandas as pd

In [18]:
with open("../data/places.json") as places_file:
    places_dict = json.load(places_file)

In [19]:
df_places = pd.json_normalize(places_dict, record_path="features")
df_places

Unnamed: 0,type,properties.identifier,properties.name,properties.information,properties.categories,properties.campus,properties.faculties,properties.floors,geometry.type,geometry.coordinates
0,Feature,auditorio_ccl,Auditorio construcción civil,,[other],SJ,CCL,[2],Point,"[-70.61364284624105, -33.499000010604604]"
1,Feature,C504-SJ,C504,,[classroom],SJ,,[5],Point,"[-70.61410111982357, -33.49887488255148]"
2,Feature,C502-SJ,C502,,[classroom],SJ,,[5],Point,"[-70.61409212054254, -33.4989157013247]"
3,Feature,C505-SJ,C505,,[classroom],SJ,,[5],Point,"[-70.61410111982357, -33.49887488255148]"
4,Feature,C503-SJ,C503,,[classroom],SJ,,[5],Point,"[-70.61409212054254, -33.4989157013247]"
...,...,...,...,...,...,...,...,...,...,...
782,Feature,METROLOGIA,DICTUC - Metrología,,,SJ,,[2],Point,"[-70.61318693749489, -33.500789937775814]"
783,Feature,Laboratorio ingenieria de materiales,Laboratorio ingeniería de materiales,,[laboratory],SJ,ING,[5],Point,"[-70.612017, -33.5000722]"
784,Feature,Instituto biologia de ingenieria biologica y m...,Instituto biología de ingeniería biológica y m...,,[laboratory],SJ,ING,[7],Point,"[-70.61202318, -33.4999792]"
785,Feature,Laboratorio Electronica y Robotica,Laboratorio Electrónica y Robótica,,[laboratory],SJ,ING,[6],Point,"[-70.61193384, -33.5001874]"


In [20]:
df_places.loc[
    (df_places["properties.categories"] == "") & (df_places["properties.category"] != ""),
    "properties.categories"
] = df_places.loc[
    (df_places["properties.categories"] == "") & (df_places["properties.category"] != ""),
    "properties.category"
]


KeyError: 'properties.category'

In [101]:
## df_places[(df_places["properties.categories"] != "") & (df_places["properties.category"] != "")]

# Revisando estos datos, esta claro que categories siempre tendra prioridad sobre category, sobre todo porque el Rocka actualiza manualmente los datos y estos se actualizan solo en category.

In [21]:
df_places.columns

Index(['type', 'properties.identifier', 'properties.name',
       'properties.information', 'properties.categories', 'properties.campus',
       'properties.faculties', 'properties.floors', 'geometry.type',
       'geometry.coordinates'],
      dtype='object')

In [22]:
df_places.drop(columns=["properties.category"], inplace=True)

KeyError: "['properties.category'] not found in axis"

In [23]:
df_places["properties.categories"]

0           [other]
1       [classroom]
2       [classroom]
3       [classroom]
4       [classroom]
           ...     
782                
783    [laboratory]
784    [laboratory]
785    [laboratory]
786    [laboratory]
Name: properties.categories, Length: 787, dtype: object

In [24]:
[~ df_places['properties.categories'].apply(lambda x: isinstance(x, (str)))]

[0       True
 1       True
 2       True
 3       True
 4       True
        ...  
 782    False
 783     True
 784     True
 785     True
 786     True
 Name: properties.categories, Length: 787, dtype: bool]

In [25]:
df_places[~df_places['properties.categories'].apply(lambda x: isinstance(x, (list)))]

Unnamed: 0,type,properties.identifier,properties.name,properties.information,properties.categories,properties.campus,properties.faculties,properties.floors,geometry.type,geometry.coordinates
781,Feature,Auditorio edificio pataguas,Auditorio edificio pataguas,Facultad de Economía,,SJ,,[-1],Point,"[-70.61025614045515, -33.49666302388468]"
782,Feature,METROLOGIA,DICTUC - Metrología,,,SJ,,[2],Point,"[-70.61318693749489, -33.500789937775814]"


In [26]:
def string_to_array(x):
    return [x] if x else x

df_places.loc[
    (df_places['properties.categories'].apply(lambda x: isinstance(x, (str)))),
    "properties.categories"
] = df_places.loc[
    (df_places['properties.categories'].apply(lambda x: isinstance(x, (str)))),
    "properties.categories"
].apply(string_to_array)

In [27]:
df_places['properties'] = df_places.apply(lambda row: {
    'floors': row['properties.floors'],
    'categories': row['properties.categories']
}, axis=1)

df_places['geometry'] = df_places.apply(lambda row: {
    'type': row['geometry.type']
}, axis=1)

In [40]:
df_places["properties.categories"].explode().unique()

array(['other', 'classroom', 'bath', 'laboratory', 'studyroom',
       'food_lunch', 'water', 'financial', 'sports_place', 'auditorium',
       'trash', 'computers', 'photocopy', 'park_bicycle', '', 'printer'],
      dtype=object)

In [35]:
df_places["properties.floors"] = df_places["properties.floors"].apply(lambda x: [1 if i == 0 else i for i in x])

In [36]:
def build_feature(row):
    return {
        "type": row['type'],
        "properties": {
            "identifier": row['properties.identifier'],
            "name": row['properties.name'],
            "information": row['properties.information'],
            "categories": row['properties.categories'],
            "campus": row['properties.campus'],
            "faculties": row['properties.faculties'],
            "floors": row['properties.floors'],
        },
        "geometry": {
            "type": row['geometry.type'],
            "coordinates": row['geometry.coordinates']
        }
    }

In [37]:
geojson = {
    "type": "FeatureCollection",
    "features": df_places.apply(build_feature, axis=1).tolist()
}


In [38]:
geojson_str = json.dumps(geojson, indent=4)


In [39]:
with open("./places.json", "w") as f:
    f.write(geojson_str)