## Extract landmarks from json

In [1]:
import os
import json
import pandas as pd
from pandas import json_normalize

data_file_path = '../data/landmarks'


# Initialize an empty list to hold data frames
dfs = []

# List all files in the directory 'landmarks'
for filename in os.listdir(data_file_path):
    # Check if the file is a JSON file
    if filename.endswith('.json'):
        # Open the JSON file
        with open(os.path.join(data_file_path, filename), 'r') as f:
            # Load the JSON content
            content = json.load(f)
            
            # Check if 'elements' key exists in the JSON content
            if 'elements' in content:
                # Use json_normalize to flatten the nested 'tags' dictionary
                df = json_normalize(content['elements'], sep='_')
                
                # Append the data frame to the list
                dfs.append(df)

# Concatenate all data frames in the list into a single data frame
final_df = pd.concat(dfs, ignore_index=True)

# Save the final data frame
final_df.to_csv('./landmarks.csv', index=False)



## Clean up landmarks by deduplication and null value from lat, lon and tag_name

In [12]:
# tags_name, lat and lon should not be null or nan

final_df = final_df[final_df['lat'].notnull()]
final_df = final_df[final_df['lon'].notnull()]
final_df = final_df[final_df['tags_name'].notnull()]

# remove duplicates
final_df.drop_duplicates(subset=['tags_name', 'lat', 'lon'], inplace=True)

final_df.reset_index(drop=True, inplace=True)


In [13]:
final_df.to_csv('./landmarks_clean.csv', index=False)

In [14]:
final_df

Unnamed: 0,type,id,lat,lon,tags_amenity,tags_cuisine,tags_name,tags_source,tags_addr:city,tags_addr:street,...,tags_capacity,tags_access,tags_parking,tags_covered,tags_fee,tags_supervised,tags_surface,tags_description,tags_delivery,tags_drive_through
0,node,1398437051,27.668738,85.538153,cafe,regional;noodles;snacks;teahouse,Teahouse,Garmin Orgeon 450 GPS,,,...,,,,,,,,,,
1,node,1769321635,27.630180,85.524165,cafe,,New Bhomi Cafe,,,,...,,,,,,,,,,
2,node,1819665900,27.618443,85.538343,cafe,,KU Fast Food Cafe,,,,...,,,,,,,,,,
3,node,3986718694,27.632663,85.517836,cafe,ice_cream,Everest ice cream,,Banepa Municipality,Aananda M.,...,,,,,,,,,,
4,node,4327779391,27.621901,85.538583,cafe,,Durga cafe,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489,node,10889516105,27.641784,85.477396,restaurant,,The Burger House & Crunchy Fried Chicken,,,,...,,,,,,,,,,
490,node,10889516205,27.620168,85.553375,restaurant,,Fire n Wood Pizza,,,,...,,,,,,,,,,
491,node,10889581105,27.642719,85.469714,restaurant,,Daura Chulo,,,,...,,,,,,,,,,
492,node,10943772558,27.617516,85.538309,restaurant,,Anisha bhansa ghar,,,,...,,,,,,,,,,
