# Plotting locations in Nordisk Familjebok

In [None]:
import os
os.chdir('../../')
print(os.getcwd())

import matplotlib.pyplot as plt
# from mpl_toolkits.basemap import Basemap
#import scripts.coordinates_retreival as gs
# import json
# import time
from utils import json_helpers as jh
from utils.paths import *
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import random

e1 = f'{ENCYCLOPEDIAS_JSON_FOLDER}/e1'
e2 = f'{ENCYCLOPEDIAS_JSON_FOLDER}/e2'

### Functions helpful for comparision of editions

In [None]:
# Returns all coordinates to articles in an edition
def get_all_coords(edition: list[dict]):
    return [(entry.get('latitude', None), (entry.get('longitude', None))) for entry in edition]

# Returns the location entries in both editions
def coords_union(edition1: list[dict], edition2: list[dict]) -> list[dict]:
    coords1 = get_all_coords(edition1)
    coords2 = get_all_coords(edition2)
    
    union_coords = set(coords1 + (coords2))
    union_coords.discard((None, None))
    
    qids = set()
    entry_union = []
    for entry in edition1 + edition2:
        lat = entry.get('latitude', None)
        lon = entry.get('longitude', None)
        qid = entry['qid']
        if (lat, lon) in union_coords and qid not in qids:
            entry_union.append(entry)

    return entry_union

# Returns the location entries in edition1 but not in edition2
def coords_diff(edition1: list[dict], edition2: list[dict]) -> list[dict]:
    coords1 = set(get_all_coords(edition1))
    coords2 = set(get_all_coords(edition2))

    diff_coords = coords1.difference(coords2)
    diff_coords.discard((None, None))

    qids = set()
    entry_diff = []
    for entry in edition1:
        lat = entry.get('latitude', None)
        lon = entry.get('longitude', None)
        qid = entry['qid']
        if (lat, lon) in diff_coords and qid not in qids:
            diff_coords.discard((lat, lon))
            entry_diff.append(entry)

    return entry_diff

def coords_intersec(edition1: list[dict], edition2: list[dict]) -> list[dict]:
    coords1 = set(get_all_coords(edition1))
    coords2 = set(get_all_coords(edition2))

    intersec_coords = coords1.intersection(coords2)
    intersec_coords.discard((None, None))


    qids = set()
    entry_intersec = []
    for entry in edition1 + edition2:
        lat = entry.get('latitude', None)
        lon = entry.get('longitude', None)
        qid = entry['qid']
        if (lat, lon) in intersec_coords and qid not in qids:
            qids.add(entry['qid'])
            entry_intersec.append(entry)

    return entry_intersec

### Visualization functions

In [None]:
def twod_map_coords(title: str="", outname: str= "", edition1: list[dict]=[], edition2: list[dict]=[], samples: int=0):
    # Decide what datapoints to plot
    if samples != 0:
        edition1 = random.sample(edition1, samples)
        edition2 = random.sample(edition2, samples)
    
    # Create DataFrame for edition1
    data1 = pd.DataFrame(edition1)
    if not data1.empty:
        data1['color'] = 'blue'  # Assign blue color to edition1
        data1['description'] = 'Edition 1 (blue)'  # Description for legend

    # Create DataFrame for edition2
    data2 = pd.DataFrame(edition2)
    if not data2.empty:
        data2['color'] = 'green'  # Assign red color to edition2
        data2['description'] = 'Edition 2 (green)'  # Description for legend

    # Combine both DataFrames
    combined_data = pd.concat([data1, data2], axis=0, ignore_index=True)

    if combined_data.empty:
        # Create an empty plot
        fig = px.scatter_geo(projection="natural earth", opacity=0.3)

    else: 
        # Plot the combined data
        fig = px.scatter_geo(combined_data, lat="latitude", 
                             lon="longitude", 
                             hover_name='qid',
                             hover_data='text',
                             color='description',  # Use the description column for the legend
                             projection="natural earth", 
                             opacity=0.15,
                             color_discrete_map={'Edition 1 (blue)': 'blue', 'Edition 2 (green)': 'green'}
                             )

    fig.update_layout(title=title, hoverlabel=dict(
        bgcolor="white",
        font_size=12,
        font_family="Rockwell",
    ))

    fig.write_html(f"{LOCATION_PLOTS_FOLDER}/2d_plot_{outname}.html")
    fig.show()



def threed_map_coords(config):
    FILENAME_OUT_CSV = config["coords_fetch"]["output_csv_file"]

    lons, lats = [], []

    with open(FILENAME_OUT_CSV) as f:
        for line in f.readlines():
            lon, lat = line.split(',')

            # print(lon, lat)
            lons.append(float(lon))
            lats.append(float(lat))

    print(f"Successful coords: {len(lons)}")

    # if you are passing just one lat and lon, put it within "[]"
    # editing the marker
    fig = go.Figure(go.Scattergeo(lat=lats, lon=lons))
    # this projection_type = 'orthographic is the projection which return 3d globe map'
    fig.update_traces(marker={"opacity": 0.4, 'size': 5, "color": "blue"})
    # layout, exporting html and showing the plot
    fig.update_geos(projection_type="orthographic")
    fig.update_layout(width=800, height=800, margin={
                      "r": 0, "t": 0, "l": 0, "b": 0})
    fig.write_html("3d_plot.html")
    fig.show()

### Plotting the locations in the editions

In [None]:
e1_entries = jh.read_items(f'{ENCYCLOPEDIAS_JSON_FOLDER}/e1')
e2_entries = jh.read_items(f'{ENCYCLOPEDIAS_JSON_FOLDER}/e2')

# Remove entries with coordinates None
e1_locations = [entry for entry in e1_entries if entry['latitude'] != None and entry['longitude'] != None]
e2_locations = [entry for entry in e2_entries if entry['latitude'] != None and entry['longitude'] != None]

# editions_union = coords_union(e1_locations, e2_locations) # Already plotted in normal plot
e1_diff = coords_diff(e1_locations, e2_locations)
e2_diff = coords_diff(e2_locations, e1_locations)
editions_intersec = coords_intersec(e1_locations, e2_locations)

twod_map_coords(title="Locations in Edition 1 and 2", outname="e1_and_e2", edition1=e1_locations, edition2=e2_locations)
# twod_map_coords(title="Locations in both editions", outname="e1_union_e2", edition1=editions_union) # Already plotted in normal plot
twod_map_coords(title="Locations only in Edition 1", outname="e1_diff_e2", edition1=e1_diff)
twod_map_coords(title="Locations only in Edition 2", outname="e2_diff_e1", edition1=e2_diff)
twod_map_coords(title="Locations present in both Edition 1 and 2", outname="e1_inter_e2", edition1=editions_intersec)