## Setup

In [7]:
# Installations
# !pip install plotly
# !pip install kaleido==0.1.0
# !pip install py-markdown-table
# !pip install openpyxl
# !pip install mdutils


In [8]:
# Import dependencies
import pandas as pd
import regex as re
import numpy as np
import subprocess
from collections import defaultdict
import openpyxl
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.io as pio
import kaleido
from markdownTable import markdownTable
from mdutils.mdutils import MdUtils
from mdutils import Html
from datetime import date

from IPython.display import display

print(plotly.__version__, kaleido.__version__)

# if using Google Colab:
# from google.colab import files
# %load_ext google.colab.data_table

print("Dependencies loaded with no problems.")

5.9.0 0.2.1
Dependencies loaded with no problems.


## Functions

In [9]:
#Roman numerals
def roman(num: int) -> str:

    chlist = "VXLCDM"
    rev = [int(ch) for ch in reversed(str(num))]
    chlist = ["I"] + [chlist[i % len(chlist)] + "\u0304" * (i // len(chlist))
                    for i in range(0, len(rev) * 2)]

    def period(p: int, ten: str, five: str, one: str) -> str:
        if p == 9:
            return one + ten
        elif p >= 5:
            return five + one * (p - 5)
        elif p == 4:
            return one + five
        else:
            return one * p

    return "".join(reversed([period(rev[i], chlist[i * 2 + 2], chlist[i * 2 + 1], chlist[i * 2])
                            for i in range(0, len(rev))]))

def century(year):
    return (year) // 100 + 1 

# Variables
# https://en.wikibooks.org/wiki/LaTeX/Colors
PolyU='#8f1329'
MidnightBlue='#006795'

#Prism colors:
ppurple = '#5f4690' #'rgb(95, 70, 144)'
pblue = '#1d6996' #'rgb(29, 105, 150)'
pturquiose = '#38a6a5' #'rgb(56, 166, 165)'
pgreen = '#0f8554' #'rgb(15, 133, 84)'
plime = '#73af48' #'rgb(115, 175, 72)'
pyellow = '#edad08' #'rgb(237, 173, 8)'
porange = '#e17c05' #'rgb(225, 124, 5)'
pred = '#cc503e' #'rgb(204, 80, 62)'
pmagenta = '#94346e' #'rgb(148, 52, 110)'
pfuchsia = '#6f4070' #'rgb(111, 64, 112)'
pgray = '#808080' #'rgb(128,128,128)' #'rgb(102, 102, 102)'

p1=ppurple
p2=pblue
p3=pturquiose
p4=pgreen
p5=plime
p6=pyellow
p7=porange
p8=pred 
p9=pmagenta
p10=pfuchsia
p11=pgray
p12='black'

half_transparent = 'rgba(255,255,255,0.5)'
transparent = 'rgba(255,255,255,0)'
PRISM = [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12]

antique = px.colors.qualitative.Antique
bold = px.colors.qualitative.Bold
pastel = px.colors.qualitative.Pastel
prism = px.colors.qualitative.Prism
safe = px.colors.qualitative.Safe
vivid = px.colors.qualitative.Vivid

pio.templates["prism"] = go.layout.Template(layout=go.Layout(colorway=prism))
pio.templates["family"] = go.layout.Template(layout=go.Layout(colorway=[p1,p3,p5,p7,p9]))
pio.templates["trilingual"] = go.layout.Template(layout=go.Layout(colorway=[p2,p4,p6]))
pio.templates["yesmaybeno"] = go.layout.Template(layout=go.Layout(colorway=[p2,p11,p8]))
pio.templates["yesnomaybe"] = go.layout.Template(layout=go.Layout(colorway=[p2,p6,p11]))
pio.templates["top5"] = go.layout.Template(layout=go.Layout(colorway=[p1,p2,p3,p4,p5,p11,p6,p7,p8,p9,p10]))

draft_template = go.layout.Template()
draft_template.layout.annotations = [
    dict(
        name="draft watermark",
        text="DRAFT",
        textangle=-30,
        opacity=0.1,
        font=dict(color="black", size=120),
        xref="paper",
        yref="paper",
        x=0.5,
        y=0.5,
        showarrow=False,)]
    
# fig.update_layout(template=draft_template)

# pio.templates.default = 'prism'


# Plotly code dump

# Draft template
# draft_template = go.layout.Template()
# draft_template.layout.annotations = [
#     dict(
#         name="draft watermark",
#         text="DRAFT",
#         textangle=-30,
#         opacity=0.1,
#         font=dict(color="black", size=120),
#         xref="paper",
#         yref="paper",
#         x=0.5,
#         y=0.5,
#         showarrow=False,)]
    
# fig.update_layout(template=draft_template)



# fig.add_annotation(
#     xref = "paper", yref = "paper",
#     x=1, y=0, #x=1
#     xanchor="left", yanchor="top", align="center",
#     text="© Parti Gábor, 2022",
#     font={"color": "gainsboro", "size": 8, "family": font_family},
#     showarrow=False)

# # add images
# fig.add_layout_image(
#     source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
#     sizex=0.1, sizey=0.1,
#     # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
#     # sizex=0.15, sizey=0.15,
#     x=1, y=0, 
#     xanchor="left", yanchor="bottom", 
# )


# for template in ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]:
#     fig = px.scatter(df, x="year", y="id", color="class",
#                     #  log_x=True, size_max=60,
#                      template=template)

## Data

In [10]:
# Path
root = ""
path = "data/"
output = "output/"

In [11]:
# result = pd.merge(df_spices, df_tcm, on=["id"], how="outer")
# result.to_csv (path+"result.csv", index = None, header=True)

## Distribution Maps

### Spices

In [13]:
# Read and store content of an excel file 
read_file = pd.read_excel(path + "spices.xlsx")

# Write the dataframe object into csv file
read_file.to_csv (path + "spices.csv", index = None, header=True)

# Load in dataset
df_spices=pd.read_csv(path + 'spices.csv', header =[0], delimiter=',', encoding="utf-8")

# df_spices = df_spices.loc[df_spices['include'] == 'in'] # include ones to include

df_spices = df_spices.loc[df_spices['sym'] == 'yes'] # include ones to include

print(df_spices.shape)

df = df_spices.copy()

(29, 92)


In [15]:
# Variables
marker_symbol= 'circle'
marker_size = 18
size_max = 24
edge_size = 1
edge_color = 'white'
opacity = 0.75
line_width = 4
font_size = 18
font_color = 'black'
font_family = 'Times New Roman'
lines = 'gainsboro'
land = 'gainsboro'
water = 'white' # 'azure'
half_transparent = 'rgba(255,255,255,0.5)'

# add a value to make small points bigger
# df['size'] = df['spreadability']+2 
df['size'] = size_max


#Plot scatter data
fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='id',
    color='family',
    color_discrete_sequence=PRISM,
    size_max = size_max,
    # color_discrete_sequence=[p1,p2,p4,p6,p11],
    # color_discrete_sequence = [PolyU],
    size = 'size',
    opacity = opacity,
    hover_name='id',
    hover_data={'species':True, 'family':True, 'region of origin':True, 'Arabic':True, 'Chinese':True, 'lon':False, 'lat':False},
    # labels={"group": "category"}
    )

fig.update_traces(mode = "markers+text",
                  textposition='middle right',
                  textfont={"size": font_size, "color": font_color, "family": font_family},
                  marker=dict(symbol=marker_symbol, 
                              size=marker_size, 
                              line=dict(color=edge_color, width=edge_size)
                              ),
                  )

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        scope='world',
        projection_type = 'orthographic',
        # projection_type = 'mercator',
        # projection_scale = 1.2,
        projection_rotation = {'lat': 20, 'lon': 80, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=True, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
    showlegend = True,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor=half_transparent,  
                font={"color": font_color, "size": font_size+2, "family": font_family}, traceorder = 'normal', orientation="v"),
    # title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
    #            font={"color": "black", "size": 20, "family": font_family}),
    margin={"r":0,"t":0,"l":0,"b":0},
    hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    )

# # # write and save
filename = "spices_map"

fig.update_layout(
    width = 1000, height=1000,
)

fig.show()

fig.write_image(output + filename + ".pdf", engine="kaleido")
fig.write_image(output + filename + ".png", engine="kaleido")
# files.download(filename + ".pdf")

####HTML

In [None]:
# add a value to make small points bigger
df['size'] = df['spreadability']+2 

#Plot scatter data
fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='id',
    color='family',
    color_discrete_sequence=PRISM,
    size_max = size_max,
    # color_discrete_sequence=[p1,p2,p4,p6,p11],
    # color_discrete_sequence = [PolyU],
    size = 'size',
    
    opacity = opacity,
    hover_name='id',
    hover_data={'species':True, 'family':True, 'spreadability':True, 'size':False, 'region of origin':True, 'Arabic':True, 'Chinese':True, 'lon':False, 'lat':False},
    # labels={"group": "category"}
    )

fig.update_traces(mode = "markers+text",
                  textposition='middle right',
                  textfont={"size": font_size, "color": font_color, "family": font_family},
                  marker=dict(symbol=marker_symbol, 
                              # size=marker_size, 
                              line=dict(color=edge_color, width=edge_size)
                              ),
                  )

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        scope='world',
        projection_type = 'orthographic',
        # projection_type = 'natural earth',
        # projection_scale = 1.2,
        projection_rotation = {'lat': 20, 'lon': 80, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=True, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
    showlegend = True,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor=half_transparent,  
                font={"color": font_color, "size": font_size, "family": font_family}, traceorder = 'normal', orientation="v"),
    # title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
    #            font={"color": "black", "size": 20, "family": font_family}),
    margin={"r":0,"t":0,"l":0,"b":0},
    hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    )

fig.add_annotation(x=0.5, y=0, #x=1
                   xanchor="right", yanchor="bottom", align="center",
                   text="© Parti Gábor, 2022",
                   font={"color": "lightgray", "size": 8, "family": font_family},
                   showarrow=False)

# add images
fig.add_layout_image(
    source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
    sizex=0.15, sizey=0.15,
    # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
    # sizex=0.15, sizey=0.15,
    x=1, y=0, 
    xanchor="right", yanchor="bottom", 
)

fig.show()

# # write and download
filename = "spices_map"
fig.write_html(filename + ".html")
files.download(filename + ".html")

ValueError: 
    Invalid element(s) received for the 'size' property of scattergeo.marker
        Invalid elements include: [nan]

    The 'size' property is a number and may be specified as:
      - An int or float in the interval [0, inf]
      - A tuple, list, or one-dimensional numpy array of the above

#### Natural Earth

In [None]:
#Plot scatter data
fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='id',
    color='family',
    color_discrete_sequence=PRISM,
    # color_discrete_sequence=[p1,p2,p4,p6,p11],
    # color_discrete_sequence = [PolyU],
    opacity = opacity,
    hover_name='id',
    hover_data={'species':True, 'family':True, 'region of origin':True, 'Arabic':True, 'Chinese':True, 'lon':False, 'lat':False},
    # labels={"group": "category"}
    )

fig.update_traces(mode = "markers+text",
                  textposition='middle right',
                  textfont={"size": font_size, "color": font_color, "family": font_family},
                  marker=dict(symbol=marker_symbol, size=marker_size, 
                              line=dict(color=edge_color, width=edge_size)
                              ),
                  )

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        # lataxis_range=[-90,90], 
        # lonaxis_range=[-150, 150],
        # scope='world',
        # projection_type = 'orthographic',
        # projection_type = 'natural earth',
        # projection_scale = 1,
        # projection_rotation = {'lat': 20, 'lon': 80, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=False, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        # lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        # lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)
        ),
    showlegend = True,
    # legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor=half_transparent,  
    #             font={"color": font_color, "size": font_size, "family": font_family}, 
    #             traceorder = 'normal', orientation="v"),
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor=half_transparent,  
                font={"color": font_color, "size": font_size, "family": font_family}, 
                traceorder = 'normal', orientation="h"),
    # title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
    #            font={"color": "black", "size": 20, "family": font_family}),
    # hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    margin={"r":0,"t":0,"l":0,"b":0},
    )

fig.show()

# # write and download
filename = "spices_map_ne"

fig.update_layout(
    width = 1000, height=500,
)

fig.write_image(filename + ".pdf", engine="kaleido")
files.download(filename + ".pdf")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Spice words

####Preprocessing

In [None]:
key = "pepper"

# load in datasets
path="/content/drive/MyDrive/Thesis/Data/Wiktionary/"

# Read and store content of an excel file 
read_file = pd.read_excel(path + key +".xlsx")

# Write the dataframe object into csv file
read_file.to_csv (path + key + ".csv", index = None, header=True)

# Load in dataset
df=pd.read_csv(path + key + ".csv", header =[0], delimiter=',', encoding="utf-8")

# extract only desired columns
selectlist = ['skip', 'language', 'term', 'transliteration', 'item', 'group']
df = df[selectlist]

print("Before skipping: ", df.shape)
df=df[df['skip'] != "yes"]
print("After skipping: ", df.shape)

# # drop columns manually
# df.drop(columns=['skip', 'literal', 'explanation', 'IPA', 'source', 'notes', 'type', 'katzer', 'katzer tr', 'checked', 'reference', 'link'], inplace=True)

# Change languages to glottolog name
df['language'] = [re.sub(r"^Arabic$", "Standard Arabic", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Mandarin$", "Mandarin Chinese", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Old Armenian$", "Classical-Middle Armenian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Croatian$", "Croatian Standard", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Serbian$", "Serbian Standard", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Bosnian$", "Bosnian Standard", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Serbo-Croatian$", "Serbian-Croatian-Bosnian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Gaelic$", "Scottish Gaelic", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Greek$", "Modern Greek", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Punjabi$", "Eastern Panjabi", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Norman$", "Anglo-Norman", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Bikol Central$", "Coastal-Naga Bikol", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Armenian (Eastern)$", "Eastern Armenian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Armenian (Western)$", "Western Armenian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Manipuri (Meitei-Lon)$", "Manipuri", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Sumi)$", "Sumi Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Tangkhul)$", "North-Central Tangkhul Naga", str(x)) for x in df['language']] # A hypoglot
df['language'] = [re.sub(r"^Naga (Rengma)$", "Northern Rengma Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Lotha)$", "Lotha Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Konyak)$", "Konyak Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Chakhesang-Chokri)$", "Chokri Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Ao)$", "Ao Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Naga (Angami)$", "Angami Naga", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Kyrgyz$", "Kirghiz", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Hebrew$", "Modern Hebrew", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^North Sami$", "North Saami", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Greenlandic$", "Kalaallisut", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^West Frisian$", "Western Frisian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Old Javanese$", "Kawi", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Old Church Slavonic$", "Church Slavic", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^North Frisian$", "Northern Frisian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Luxembourgish$", "Luxemburgish", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Armenian$", "Eastern Armenian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Old East Slavic$", "Old Russian", str(x)) for x in df['language']]
df['language'] = [re.sub(r"^Ossetian$", "Modern Ossetic", str(x)) for x in df['language']]

input = df

# load in datasets
path="/content/drive/MyDrive/Thesis/Data/Languages/"
languages=pd.read_csv(path+'languages.csv', header =[0], delimiter=',', encoding="utf-8", index_col=[0])

# merge input and languages
df = pd.merge(input, languages, on=['language'])
print(df.shape)

#drop duplicates
df.drop_duplicates(subset=['language', 'term'], keep='first', inplace=True, ignore_index=True)
print(df.shape)

multilingual = df
multilingual

#check missing ones
temp = pd.merge(input, multilingual, how='outer', suffixes=('','_y'), indicator=True)
missing = temp[temp['_merge']=='left_only'][input.columns]
print("The following terms and languages have failed to load:")
print(missing)

# df = df.dropna() #OPERATIVE ONLY
# df = df.fillna('x')

# #sort by categories, cinnamon
# df['group'] = pd.Categorical(df['group'], ["canela", "kinnamon", "korica", "qirfa", "darchin", "gui", "other"]) # add categorical order here
# df.sort_values("group", inplace = True) # sort according to the categories

#sort by categories, pepper
df['group'] = pd.Categorical(df['group'], ["pippali", "pigment", "marica", "hujiao", "other"]) # add categorical order here
df.sort_values("group", inplace = True) # sort according to the categories

# create text for annotation label
df['text'] = df['term'] + '<br>' + df['transliteration'].astype(str) + '<br>Language: ' + df['language'] + '<br>Family: ' + df['family']
df['text'] = [re.sub(r"<br>nan<br>", "<br>", str(x)) for x in df['text']]

# save
path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
df.to_csv(path + key+'.csv')

Before skipping:  (207, 6)
After skipping:  (196, 6)
(197, 16)
(189, 16)
The following terms and languages have failed to load:
    skip            language              term transliteration  \
113  NaN              Newari               मले            male   
123  NaN     Old High German  pheffur, pheffar             NaN   
135  NaN            Q'eqchi'      kaxlan q’een             NaN   
144  NaN       Selice Romani             pepšo             NaN   
164  NaN     Tarifiyt Berber           řfəřfəř             NaN   
189  NaN         White Hmong          hwj txob             NaN   
194  NaN  Zinacantán Tzotzil           pimenta             NaN   

                 item    group  
113              male    other  
123  pheffur, pheffar  pippali  
135      kaxlan q’een    other  
144             pepšo  pippali  
164           řfəřfəř  pippali  
189          hwj txob   hujiao  
194           pimenta  pigment  


###Plot map

####Plotly Express tries

In [None]:
# #Plot scatter data, easy for grouping!

# font_family = "Raleway" 
# # font_family = "Times New Roman"

# lines = 'gainsboro'
# land = 'gainsboro'
# water = 'white'

# fig = px.scatter_geo(df,
#     lat='lat', 
#     lon='lon',
#     text='item',
#     # symbol='group',
#     # symbol_sequence = ['diamond-open', 'triangle-up', 'triangle-down', 'triangle-left', 'triangle-right', 'triangle-ne', 'triangle-se'],
#     # size='pop' # Can set size by some value
#     # size_max = 20,
#     # animation_frame = 'date', # !TIMELAPSE!
#     # animation_group = '', # ?
#     color='group',
#     color_discrete_sequence=px.colors.qualitative.Prism, #Viridis #Bold
#     # color_discrete_sequence=px.colors.sequential.Viridis,
#     # color_discrete_sequence=[p1,p2,p3,p4,p11],
#     opacity = 0.75, #???
#     hover_name='item',
#     hover_data={'term':True, 'language':True, 'family':True, 'item':False, 'lon':False, 'lat':False, 'group':False},
#     # labels={"group": "category"}
#     )

# # print(px.colors.qualitative.Prism) #to see the color codes
# # https://plotly.com/python/discrete-color/ # See the colormaps here, or construct a sequence like:
# # color_discrete_sequence=["red", "green", "blue", "goldenrod", "magenta"]
# # Using Sequential Scales as Discrete Sequences: color_discrete_sequence= px.colors.sequential.Plasma_r,

# fig.update_traces(mode = "markers",
#                   textposition='top right',
#                   textfont={"color": "black", "size": 12, "family": font_family},
#                   marker=dict(symbol='diamond', size=12, opacity=0.75, line=dict(color='white', width=1)),
#                   # customdata=df.text, # WRONG!
#                   # hovertemplate='<b>%{text}</b><br>' + '%{customdata}', #<extra></extra>',
#                   # hovertemplate=None
#                   )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 1280, height=720,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0.9, x=0.12, xanchor="left", yanchor="top", bgcolor='rgba(0,0,0,0)',   
#                 font={"color": "black", "size": 16, "family": font_family}, 
#                 traceorder = 'normal' #or reversed
#                 , orientation="v",
#                 # bgcolor="white",
#                 # bordercolor="gainsboro", 
#                 # borderwidth=1
#                 ),
#     title=dict(y=0.99, x=0.12, xanchor='left', yanchor='top', text='Words for ' + key,#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(bgcolor="white", font_size=12, font_family=font_family),
#     )

# fig.add_annotation(y=0, x=1, xanchor="right", yanchor="bottom", text="Parti Gábor, 2022",
#                    font={"color": "lightgray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.show()
# fig.write_html(key + ".html")

In [None]:
# # Without text
# fig.update_traces(mode = "markers+text")

# fig.show()
# fig.write_html(key + "_annotated.html")

####Plotly Go tries


In [None]:
# #Different plotting
# # https://plotly.com/python/builtin-colorscales/
# prism = [p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11]
# # colors = ['rgb(95, 70, 144)', 'rgb(29, 105, 150)', 'rgb(56, 166, 165)', 'rgb(15, 133, 84)', 'rgb(115, 175, 72)', 'rgb(237, 173, 8)', 'rgb(225, 124, 5)']

# path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
# df=pd.read_csv(path+'cinnamon.csv', header =[0], delimiter=',', encoding="utf-8")

# # This is for coloring by group or family:
# groups = np.unique(df['group'].values)  #set the array of unique groups in your column, df['group']
# d = dict(zip(groups, np.arange(len(groups)))) # a dict that associates a numerical value to each group
# d = {'canela': 0,
#      'kinnamon': 1,
#      'korica': 2,
#      'qirfa': 3,
#      'darchin': 4,
#      'gui': 5,
#      'other': 6}

# fig = go.Figure()

# fig.add_traces(data=go.Scattergeo(
#     name='cinnamon',
#     lon = df['lon'],
#     lat = df['lat'],
#     text = df['item'],
#     textposition = 'top right',
#     textfont={"color": "black", "size": 12, "family": font_family},
#     mode = 'markers',
#     # marker_color = [d[s] for s in df['group']], #coloring markers by group (not interactive, no legend)
#     marker_color = 'darkorange',
#     marker=dict(symbol='diamond', color=PolyU, colorscale=colors, size=12, opacity=0.75, line=dict(color='white', width=1)),
#     opacity=1, #for marker & text
#     ))

# # fig.update_traces(customdata=df.text) # ONLY WORKS HERE IN PLOTLY GO HAVE TO USE THIS BUT DISTUINGUISH BETWEEN THE TWO DF
# # fig.update_traces(hovertemplate='%{customdata}') #<extra></extra>')

# # Pepper -----------------------------------------------------------------------
# df=pd.read_csv(path+'pepper.csv', header =[0], delimiter=',', encoding="utf-8")

# groups = np.unique(df['group'].values)  #set the array of unique groups in your column, df['group']
# d = dict(zip(groups, np.arange(len(groups)))) # a dict that associates a numerical value to each group
# d = {'pippali': 0,
#      'pigment': 1,
#      'marica': 2,
#      'hujiao': 3,
#      'other': 4}

# fig.add_traces(data=go.Scattergeo(
#     name='pepper',
#     lon = df['lon'],
#     lat = df['lat'],
#     text = df['item'],
#     textposition = 'top right',
#     textfont={"color": "black", "size": 12, "family": font_family},
#     mode = 'markers',
#     # marker_color = [d[s] for s in df['group']], #coloring markers by group (not interactive, no legend)
#     marker_color = 'lightblue',
#     marker=dict(symbol='diamond', color=PolyU, colorscale=colors, size=12, opacity=0.75, line=dict(color='white', width=1)),
#     opacity=1, #for marker & text
#     ))

# # fig.update_traces(customdata=df.text) # ONLY WORKS HERE IN PLOTLY GO
# # fig.update_traces(hovertemplate='%{customdata}') #<extra></extra>')

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 0.75,
#         projection_rotation = {'lat': 12, 'lon': 60, 'roll': 0},
#         center = {'lat':12,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 1280, height=720,  
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0.9, x=0.05, xanchor="left", yanchor="top",   
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v",),
#     title=dict(y=0.98, x=0.05, xanchor='left', yanchor='top', text='Cinnamon',#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(
#         # bgcolor="white", 
#         font_size=12, 
#         font_family=font_family),
#     )

# fig.show()
# fig.write_html("cinnamon_go.html")

In [None]:
# import plotly.graph_objects as go
# import pandas as pd

# fig = go.Figure(data=go.Scattergeo(
#     lat = df['lat'],
#     lon = df['lon'],
#     text = df['term'].astype(str),
#     marker = dict(
#         color = [d[s] for s in df['group']],
#         colorscale = "Viridis",
#         reversescale = True,
#         opacity = 0.75,
#         size = 12,
#         colorbar = dict(
#             titleside = "right",
#             outlinecolor = "rgba(68, 68, 68, 0)",
#             ticks = "outside",
#             showticksuffix = "last",
#             dtick = 1
#         )
#     )
# ))

# fig.update_layout(
#         title = 'Title',
#         geo = dict(
#             scope='world',
#             projection_type='orthographic',
#             showland = True,
#             landcolor = "gainsboro",
#             subunitcolor = "gainsboro",
#             countrycolor = "white",
#             countrywidth = 0.5,
#             subunitwidth = 0.5,
#             showcountries = True,
#         ),
#     )
# fig.show()

### One by one

###Tea

In [None]:
key = 'tea'

# load in datasets
path = "/content/drive/MyDrive/Thesis/Data/Languages/"

# Load in dataset
df = pd.read_csv(path + "wals.csv", header =[0], delimiter=',', encoding="utf-8")
print(df.shape)

# df.drop(columns=['link'], inplace=True)

df = df[df['tea'].notna()]
print(df.shape)

df['tea'] = [re.sub('2 Words derived from Min Nan Chinese te', "te", str(x)) for x in df['tea']]
df['tea'] = [re.sub('1 Words derived from Sinitic cha', "cha", str(x)) for x in df['tea']]
df['tea'] = [re.sub('3 Others', "other", str(x)) for x in df['tea']]

# save
path = "/content/drive/MyDrive/Thesis/Data/Multilingual/"
df.to_csv(path + key + '.csv')

#-------------------------------------------------------------------------------

#Plot scatter data, easy for grouping!

# Variables
opacity = 0.75
marker_size = 12
marker_symbol = 'circle'
edge_size = 1
edge_color = "white"
line_width = 4
font_size = 12
font_color = 'black' 
font_family = 'Raleway'
lines = 'gainsboro'
land = 'gainsboro'
water = 'white'

fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='name',
    color='tea',
    color_discrete_sequence=[MidnightBlue, PolyU, 'gray'], #https://www.colorhexa.com/1034a6
    opacity = opacity, #???
    hover_name='name',
    # hover_data={'term':True, 'language':True, 'family':True, 'name':False, 'lon':False, 'lat':False, 'group':False}
    labels={"tea": "group"},
    )

fig.update_traces(mode = "markers",
                  textposition='middle right',
                  textfont={"color": "black", "size": 12, "family": font_family},
                  marker=dict(symbol=marker_symbol, size=12, opacity=0.75, line=dict(color='white', width=1)),
                  # customdata=df.text, # WRONG!
                  # hovertemplate='<b>%{text}</b><br>' + '%{customdata}', #<extra></extra>',
                  hovertemplate=None
                  )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 600, height=600,  
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(0,0,0,0)',  
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
#     title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for tea',#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(#bgcolor="white", 
#                     font_size=12, 
#                     font_family=font_family),
#     )

# fig.show()
# filename = "distribution_"

# # write-------------------------------------------------------------------------
# fig.write_image(filename+key+".png", engine="kaleido")
# fig.write_image(filename+key+".pdf", engine="kaleido")
# # download
# files.download(filename+key+".pdf")
# files.download(filename+key+".png")

# # full size for html----------------------
# fig.update_layout(width = 1200, height=600,
#                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top"),
#                   template=draft_template)

# fig.add_annotation(x=1, y=0, xanchor="right", yanchor="bottom", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.write_html(filename+key+".html")
# files.download(filename+key+".html")

# # With text---------------------------------------------------------------------
# fig.update_traces(mode = "markers+text")

# fig.write_html(filename+key+"_annotated.html")
# files.download(filename+key+"_annotated.html")

################################################################################

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        scope='world',
        projection_type = 'orthographic',
        # projection_type = 'natural earth',
        projection_scale = 1,
        projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=True, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
    showlegend = True,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
                font={"color": "black", "size": 14, "family": font_family}, traceorder = 'normal', orientation="v"),
    title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
               font={"color": "black", "size": 20, "family": font_family}),
    margin={"r":0,"t":0,"l":0,"b":0},
    # width = 600, height=600,
    # hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    )

fig.update_layout(
    # width = 1200, height=560,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

# fig.add_annotation(x=0.5, y=0, #x=1
#                    xanchor="right", yanchor="bottom", align="center",
#                    text="© Parti Gábor, 2022",
#                    font={"color": "lightgray", "size": 8, "family": font_family},
#                    showarrow=False)

# fig.add_annotation(x=0, y=0.8, #x=1
#                    xanchor="left", yanchor="bottom", align="left",
#                    text="Data:<br>Östen Dahl. 2013. Tea.<br>In: Dryer, Matthew S. & Haspelmath, Martin (eds.)<br>The World Atlas of Language Structures Online.<br>Leipzig: Max Planck Institute for Evolutionary Anthropology.<br>(Available online at http://wals.info/chapter/138,<br>Accessed on 2022-07-28.)",
#                    font={"color": "black", "size": 8, "family": font_family},
#                    showarrow=False)



# # add images
# fig.add_layout_image(
#     source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
#     sizex=0.15, sizey=0.15,
#     # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
#     # sizex=0.15, sizey=0.15,
#     x=1, y=0, 
#     xanchor="right", yanchor="bottom", 
# )

fig.show()

# write and save ---------------------------------------------------------------
filename = "distribution_"

# write and download
fig.write_html(filename + key + ".html")
files.download(filename + key + ".html")

# smaller for thesis document --------------------------------------------------

# full size for html
fig.update_layout(
    width = 600, height=600,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

# fig.write_image(filename + key + ".png", engine="kaleido")
fig.write_image(filename + key + ".pdf", engine="kaleido")

# download
# files.download(filename + key + ".png")
files.download(filename + key + ".pdf")

(2712, 13)
(230, 13)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

###Cinnamon

In [None]:
# #Plot cinnamon
# key = 'cinnamon'

# path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
# df=pd.read_csv(path+key+'.csv', header =[0], delimiter=',', encoding="utf-8")

# fig = px.scatter_geo(df,
#     lat='lat', 
#     lon='lon',
#     text='item',
#     color='group',
#     # color_discrete_sequence=px.colors.qualitative.Prism,
#     color_discrete_sequence=[p1,p2,p3,p4,p5,p6,p11],
#     opacity = 0.75,
#     hover_name='item',
#     hover_data={'term':True, 'language':True, 'family':True, 'item':False, 'lon':False, 'lat':False, 'group':False},
#     # labels={"group": "category"}
#     )

# fig.update_traces(mode = "markers",
#                   textposition='middle right',
#                   textfont={"color": "black", "size": 12, "family": font_family},
#                   marker=dict(symbol=marker_symbol, size=12, opacity=0.75, line=dict(color='white', width=1)),
#                   )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 600, height=600,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
#     title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for '+key,#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(#bgcolor="white", 
#                     font_size=12, 
#                     font_family=font_family),
#     )

# fig.show()
# filename = "distribution_"

# # write-------------------------------------------------------------------------
# fig.write_image(filename+key+".png", engine="kaleido")
# fig.write_image(filename+key+".pdf", engine="kaleido")
# # download
# files.download(filename+key+".pdf")
# files.download(filename+key+".png")

# # full size for html----------------------
# fig.update_layout(width = 1200, height=600,
#                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top"),
#                   template=draft_template)

# fig.add_annotation(x=1, y=0, xanchor="right", yanchor="bottom", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.write_html(filename+key+".html")
# files.download(filename+key+".html")

# # With text---------------------------------------------------------------------
# fig.update_traces(mode = "markers+text")

# fig.write_html(filename+key+"_annotated.html")
# files.download(filename+key+"_annotated.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#Plot cinnamon
key = 'cinnamon'

path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
df=pd.read_csv(path+key+'.csv', header =[0], delimiter=',', encoding="utf-8")

# Variables
opacity = 0.75
marker_size = 12
marker_symbol = 'circle'
edge_size = 1
edge_color = "white"
line_width = 4
font_size = 12
font_color = 'black' 
font_family = 'Raleway'
lines = 'gainsboro'
land = 'gainsboro'
water = 'white'

fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='item',
    color='group',
    # color_discrete_sequence=px.colors.qualitative.Prism,
    color_discrete_sequence=[p1,p2,p3,p4,p5,p6,p11],
    opacity = opacity,
    hover_name='item',
    hover_data={'term':True, 'language':True, 'family':True, 'item':False, 'lon':False, 'lat':False, 'group':False},
    # labels={"group": "category"}
    )

fig.update_traces(mode = "markers",
                  textposition='middle right',
                  textfont={"color": font_color, "size": font_size, "family": font_family},
                  marker=dict(symbol=marker_symbol, size=marker_size, opacity=opacity, line=dict(color='white', width=edge_size)),
                  )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 600, height=600,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
#     title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for '+key,#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(#bgcolor="white", 
#                     font_size=12, 
#                     font_family=font_family),
#     )

# fig.show()
# filename = "distribution_"

# # write-------------------------------------------------------------------------
# fig.write_image(filename+key+".png", engine="kaleido")
# fig.write_image(filename+key+".pdf", engine="kaleido")
# # download
# files.download(filename+key+".pdf")
# files.download(filename+key+".png")

# # full size for html----------------------
# fig.update_layout(width = 1200, height=600,
#                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top"),
#                   template=draft_template)

# fig.add_annotation(x=1, y=0, xanchor="right", yanchor="bottom", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.write_html(filename+key+".html")
# files.download(filename+key+".html")

# # With text---------------------------------------------------------------------
# fig.update_traces(mode = "markers+text")

# fig.write_html(filename+key+"_annotated.html")
# files.download(filename+key+"_annotated.html")

# #===============================================================================

# # Compact version of plotting a map.

# # Variables
# PolyU='#8f1329'
# marker_symbol= 'circle'
# marker_size = 12
# edge_size = 1
# line_width = 4
# font_size = 12
# opacity = 0.75
# font_family = "Raleway"
# lines = 'gainsboro'
# land = 'gainsboro'
# water = 'white' # 'azure'

# fig = go.Figure()

# for i,j in names.items():
#   fig.add_trace(go.Scattergeo(name = j['id'].iloc[0], lon = j['lon'], lat = j['lat'],
#       hoverinfo = 'text', hovertext = j['text'], text = j['text'],
#       textfont={"color": "black", "family": font_family, "size": font_size}, 
#       textposition="middle right", mode = 'markers+lines',  
#       line_color = str(j['plot color'].iloc[0]), line_width=line_width, line_dash='solid', 
#       opacity=opacity,
#       marker = dict(symbol=marker_symbol, size = marker_size, opacity=0.75, color = str(j['plot color'].iloc[0]), line = dict(width = edge_size, color = 'white'))))

# LAYOUT

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        scope='world',
        projection_type = 'orthographic',
        # projection_type = 'natural earth',
        projection_scale = 1,
        projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=True, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
    showlegend = True,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
                font={"color": "black", "size": 14, "family": font_family}, traceorder = 'normal', orientation="v"),
    title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
               font={"color": "black", "size": 20, "family": font_family}),
    margin={"r":0,"t":0,"l":0,"b":0},
    # width = 600, height=600,
    # hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    )

fig.update_layout(
    # width = 1200, height=560,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

fig.add_annotation(x=0.5, y=0, #x=1
                   xanchor="right", yanchor="bottom", align="center",
                   text="© Parti Gábor, 2022",
                   font={"color": "lightgray", "size": 8, "family": font_family},
                   showarrow=False)

# add images
fig.add_layout_image(
    source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
    sizex=0.15, sizey=0.15,
    # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
    # sizex=0.15, sizey=0.15,
    x=1, y=0, 
    xanchor="right", yanchor="bottom", 
)

fig.show()

# write and save ---------------------------------------------------------------
filename = "distribution_"

# write and download
fig.write_html(filename + key + ".html")
files.download(filename + key + ".html")

# smaller for thesis document --------------------------------------------------

# full size for html
fig.update_layout(
    width = 600, height=600,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

# fig.write_image(filename + key + ".png", engine="kaleido")
fig.write_image(filename + key + ".pdf", engine="kaleido")

# download
# files.download(filename + key + ".png")
files.download(filename + key + ".pdf")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

###Pepper

In [None]:
# key = 'pepper'

# path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
# df=pd.read_csv(path+key+'.csv', header =[0], delimiter=',', encoding="utf-8")

# # df = df.fillna('') # Operative only

# marker_size = 12
# marker_symbol = 'diamond'
# lines = 'gainsboro'
# land = 'gainsboro'
# water = 'white'
# font_size = 12
# font_color = 'black' 
# font_family = 'Raleway'
# opacity = 0.75

# #Plot scatter data
# fig = px.scatter_geo(df,
#     lat='lat', 
#     lon='lon',
#     text='item',
#     color='group',
#     # color_discrete_sequence=px.colors.qualitative.Prism,
#     color_discrete_sequence=[p1,p2,p4,p6,p11],
#     opacity = opacity,
#     hover_name='item',
#     hover_data={'term':True, 'language':True, 'family':True, 'item':False, 'lon':False, 'lat':False, 'group':False},
#     # labels={"group": "category"}
#     )

# fig.update_traces(mode = "markers",
#                   textposition='middle right',
#                   textfont={"size": font_size, "color": font_color, "family": font_family},
#                   marker=dict(symbol=marker_symbol, size=marker_size, opacity=opacity, line=dict(color='white', width=1)),
#                   )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 600, height=600,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
#     title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for '+key,#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(#bgcolor="white", 
#                     font_size=12, 
#                     font_family=font_family),
#     )

# fig.show()
# filename = "distribution_"

# # write-------------------------------------------------------------------------
# fig.write_image(filename+key+".png", engine="kaleido")
# fig.write_image(filename+key+".pdf", engine="kaleido")
# # download
# files.download(filename+key+".pdf")
# files.download(filename+key+".png")

# # full size for html------------------------------------------------------------
# fig.update_layout(width = 1200, height=600,
#                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top"),
#                   template=draft_template)

# fig.add_annotation(x=1, y=0, xanchor="right", yanchor="bottom", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.write_html(filename+key+".html")
# files.download(filename+key+".html")

# # With text---------------------------------------------------------------------
# fig.update_traces(mode = "markers+text")

# fig.write_html(filename+key+"_annotated.html")
# files.download(filename+key+"_annotated.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
key = 'pepper'

path="/content/drive/MyDrive/Thesis/Data/Multilingual/"
df=pd.read_csv(path+key+'.csv', header =[0], delimiter=',', encoding="utf-8")

# df = df.fillna('') # Operative only

# Variables
opacity = 0.75
marker_size = 12
marker_symbol = 'circle'
edge_size = 1
edge_color = "white"
line_width = 4
font_size = 12
font_color = 'black' 
font_family = 'Raleway'
lines = 'gainsboro'
land = 'gainsboro'
water = 'white'

#Plot scatter data
fig = px.scatter_geo(df,
    lat='lat', 
    lon='lon',
    text='item',
    color='group',
    # color_discrete_sequence=px.colors.qualitative.Prism,
    color_discrete_sequence=[p1,p2,p4,p6,p11],
    opacity = opacity,
    hover_name='item',
    hover_data={'term':True, 'language':True, 'family':True, 'item':False, 'lon':False, 'lat':False, 'group':False},
    # labels={"group": "category"}
    )

fig.update_traces(mode = "markers",
                  textposition='middle right',
                  textfont={"size": font_size, "color": font_color, "family": font_family},
                  marker=dict(symbol=marker_symbol, size=marker_size, opacity=opacity, 
                              line=dict(color=edge_color, width=edge_size)),
                  )

# fig.update_layout(
#     geo = dict(
#         resolution=110, #50 is large or 110 small
#         scope='world',
#         projection_type = 'orthographic',
#         # projection_type = 'natural earth',
#         projection_scale = 1,
#         projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
#         center = {'lat':20,'lon':60},
#         bgcolor='white',
#         showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
#         showcountries=False, countrywidth = 1, countrycolor = lines, 
#         showframe=True, framewidth = 1, framecolor = lines, 
#         showlakes=True, lakecolor = water,
#         showland=True, landcolor = land, 
#         showocean=True, oceancolor = water,
#         showrivers=True, riverwidth = 1, rivercolor = water,
#         showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
#         lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
#         lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
#     width = 600, height=600,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
#                 font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
#     title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for '+key,#"Various groups for the names of cinnamon in different languages",   
#                font={"color": "black", "size": 20, "family": font_family}),
#     hovermode="closest", #default
#     hoverlabel=dict(#bgcolor="white", 
#                     font_size=12, 
#                     font_family=font_family),
#     )

# fig.show()
# filename = "distribution_"

# # write-------------------------------------------------------------------------
# fig.write_image(filename+key+".png", engine="kaleido")
# fig.write_image(filename+key+".pdf", engine="kaleido")
# # download
# files.download(filename+key+".pdf")
# files.download(filename+key+".png")

# # full size for html----------------------
# fig.update_layout(width = 1200, height=600,
#                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top"),
#                   template=draft_template)

# fig.add_annotation(x=1, y=0, xanchor="right", yanchor="bottom", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.write_html(filename+key+".html")
# files.download(filename+key+".html")

# # With text---------------------------------------------------------------------
# fig.update_traces(mode = "markers+text")

# fig.write_html(filename+key+"_annotated.html")
# files.download(filename+key+"_annotated.html")

# #===============================================================================

# # Compact version of plotting a map.

# # Variables
# PolyU='#8f1329'
# marker_symbol= 'circle'
# marker_size = 12
# edge_size = 1
# line_width = 4
# font_size = 12
# opacity = 0.75
# font_family = "Raleway"
# lines = 'gainsboro'
# land = 'gainsboro'
# water = 'white' # 'azure'

# fig = go.Figure()

# for i,j in names.items():
#   fig.add_trace(go.Scattergeo(name = j['id'].iloc[0], lon = j['lon'], lat = j['lat'],
#       hoverinfo = 'text', hovertext = j['text'], text = j['text'],
#       textfont={"color": "black", "family": font_family, "size": font_size}, 
#       textposition="middle right", mode = 'markers+lines',  
#       line_color = str(j['plot color'].iloc[0]), line_width=line_width, line_dash='solid', 
#       opacity=opacity,
#       marker = dict(symbol=marker_symbol, size = marker_size, opacity=0.75, color = str(j['plot color'].iloc[0]), line = dict(width = edge_size, color = 'white'))))

# LAYOUT

fig.update_layout(
    geo = dict(
        resolution=110, #50 is large or 110 small
        scope='world',
        projection_type = 'orthographic',
        # projection_type = 'natural earth',
        projection_scale = 1,
        projection_rotation = {'lat': 20, 'lon': 60, 'roll': 0},
        # center = {'lat':20,'lon':20},
        bgcolor='white',
        showcoastlines=True, coastlinewidth = 1, coastlinecolor = lines,
        showcountries=False, countrywidth = 1, countrycolor = lines, 
        showframe=True, framewidth = 1, framecolor = lines, 
        showlakes=True, lakecolor = water,
        showland=True, landcolor = land, 
        showocean=True, oceancolor = water,
        showrivers=True, riverwidth = 1, rivercolor = water,
        showsubunits=False, subunitwidth = 1, subunitcolor = lines, 
        lonaxis = dict(showgrid = True, gridwidth = 0.5, dtick = 10),
        lataxis = dict (showgrid = True, gridwidth = 0.5, dtick = 10)),
    showlegend = True,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
                font={"color": "black", "size": 14, "family": font_family}, traceorder = 'normal', orientation="v"),
    title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Words for ' + key + " in various languages, grouped by their origins.",
               font={"color": "black", "size": 20, "family": font_family}),
    margin={"r":0,"t":0,"l":0,"b":0},
    # width = 600, height=600,
    # hovermode="closest", #default
    # hoverlabel=dict(#bgcolor="white", 
    #                 font_size=12, 
    #                 font_family=font_family),
    )

fig.update_layout(
    # width = 1200, height=560,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

fig.add_annotation(x=0.5, y=0, #x=1
                   xanchor="right", yanchor="bottom", align="center",
                   text="© Parti Gábor, 2022",
                   font={"color": "lightgray", "size": 8, "family": font_family},
                   showarrow=False)

# add images
fig.add_layout_image(
    source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
    sizex=0.15, sizey=0.15,
    # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
    # sizex=0.15, sizey=0.15,
    x=1, y=0, 
    xanchor="right", yanchor="bottom", 
)

fig.show()

# write and save ---------------------------------------------------------------
filename = "distribution_"

# write and download
fig.write_html(filename + key + ".html")
files.download(filename + key + ".html")

# smaller for thesis document --------------------------------------------------

# full size for html
fig.update_layout(
    width = 600, height=600,
    legend=dict(y=0, x=0, xanchor="left", yanchor="bottom"),
    )

fig.write_image(filename + key + ".png", engine="kaleido")
fig.write_image(filename + key + ".pdf", engine="kaleido")

# download
files.download(filename + key + ".png")
files.download(filename + key + ".pdf")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Timelines

In [None]:
path="/content/drive/MyDrive/Thesis/Data/"
df=pd.read_csv(path+'oed.csv', header =[0], delimiter=',', encoding="utf-8")
df = df.loc[df['year'] != '?'] # include ones to include
df.to_csv(path+"oed_working.csv", index = None, header=True)
df=pd.read_csv(path+'oed_working.csv', header =[0], delimiter=',', encoding="utf-8")

# df = df.loc[df['level'] == 'main'] # include ones to include
# df = df.loc[df['id'] == 'pepper'] # include ones to include
# df = df.loc[(df['id'] == 'saffron') | (df['id'] == 'pepper')] # include ones to include
print(df['id'].value_counts())
df

# Add dummy column for size
df['size'] = 1

# # # Sorting
# df.sort_values(['level'], inplace = True, key=lambda col: col.str.lower()) # sort by order of columns, ignoring casing
df['class'] = pd.Categorical(df['class'], ["n.", "v.", "adj.", "adv.", "phrase"]) # add categorical order here
df.sort_values(["class", "level", "year"], inplace = True) # sort according to the categories
df

pepper      132
ginger       87
saffron      59
cinnamon     39
Name: id, dtype: int64


Unnamed: 0,id,tier,level,entry,class,year,size
39,ginger,2,main,"ginger, n. and adj.",n.,925,1
126,pepper,3,main,"pepper, n.",n.,925,1
129,pepper,3,main,"pepper-quern, n.",n.,940,1
130,pepper,3,main,"peppercorn, n. and adj.",n.,945,1
247,saffron,4,main,"saffron, n. and adj.",n.,1200,1
...,...,...,...,...,...,...,...
310,pepper,3,sub,†to pepper a person's box (also pans),phrase,1608,1
199,pepper,3,sub,to snuff pepper,phrase,1624,1
221,pepper,3,sub,to pay a visit to Pepper Alley,phrase,1821,1
123,ginger,2,sub,to put ginger,phrase,1919,1


In [None]:
# # https://plotly.com/python/templates/
# for template in ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]:
#     fig = px.scatter(df, x="year", y="id", color="class",
#                     #  log_x=True, size_max=60,
#                      template=template)

#     fig.show()

##OED entries

###Document

In [None]:
# Variables
PolyU='#8f1329'
marker_size = 20
font_size = 20
edge_size = 2
line_width = 4
opacity = 0.5
font_color = "black"
font_family = "Raleway"

fig = px.scatter(df, x="year", y="id", 
                 size="size", size_max=marker_size,
                 color="class", color_discrete_sequence=[p2,p7,p4,p5,p1],
                 symbol="level", symbol_sequence = ['circle', 'circle-open'], opacity = opacity, #'circle', 'circle-open'
                 hover_name='entry',
                 marginal_x="histogram",
                 template = "plotly_white"
                 )

fig.update_traces(
    marker_line_width=edge_size,
    marker_line_color="rgba(0,0,0,0)",
    # selector=dict(mode='markers')
    )

fig.update_layout(
    font_family=font_family,
    font_color=font_color,
    font_size=font_size)

fig.update_xaxes(visible=True)
fig.update_yaxes(visible=True, title=None, showticklabels=True)
fig.update_xaxes(ticklabelposition="inside")
fig.update_yaxes(ticklabelposition="inside") # tickangle = 0
# fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8') #f0f0f0
# fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8')
# fig.update_layout(paper_bgcolor="white") # transparent background rgb(0,0,0,0)
# fig.update_layout(plot_bgcolor="white")#f6f6f6


fig.update_layout(width = 1000, height=600,
                  margin={"r":0,"t":0,"l":0,"b":0}, showlegend = True,
                  legend=dict(xanchor="left", yanchor="top", #title="class",
                              title=dict(side='top'), bgcolor=transparent,
                              # y=-0.15, x=0, 
                              font={"color": font_color, "size": font_size-1, "family": font_family}, 
                              orientation="h"))

fig.show()

# writing and saving ---------------------------------------------------------
filename = "oed" 
fig.write_image(filename+".pdf", engine="kaleido")
# download
files.download(filename+".pdf")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

###HTML

In [None]:
# Variables
PolyU='#8f1329'
marker_size = 20
edge_size = 2
line_width = 4
font_size = 20
opacity = 0.5
font_color = "black"
font_family = "Raleway"

fig = px.scatter(df, x="year", y="id", 
                 size="size", size_max=marker_size,
                 color="class", color_discrete_sequence=[p2,p7,p4,p5,p1],
                 symbol="level", symbol_sequence = ['circle', 'circle-open'], opacity = opacity, #'circle', 'circle-open'
                 hover_name='entry',
                 marginal_x="histogram",
                 template = "plotly_white"
                 )

fig.update_traces(
    marker_line_width=edge_size,
    marker_line_color="rgba(0,0,0,0)",
    # selector=dict(mode='markers')
    )

fig.update_layout(
    font_family=font_family,
    font_color=font_color,
    font_size=font_size)

fig.update_xaxes(visible=True)
fig.update_yaxes(visible=True, title=None, showticklabels=True)
fig.update_xaxes(ticklabelposition="inside")
fig.update_yaxes(ticklabelposition="inside") # tickangle = 0
# fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8') #f0f0f0
# fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8')
# fig.update_layout(paper_bgcolor="white") # transparent background rgb(0,0,0,0)
# fig.update_layout(plot_bgcolor="white")#f6f6f6


fig.update_layout(
    # width = 1000, height=400,
    margin={"r":0,"t":20,"l":0,"b":0}, showlegend = True,
    title=dict(y=1, x=0, xanchor='left', yanchor='top', 
               text='A timeline of words and phrases derived from spice names, based on main- and sub-level entries in the OED',   
               font={"color": "black", "size": font_size, "family": font_family}),
    legend=dict(xanchor="left", yanchor="top", #title="class",
                title=dict(side='top'),
                              # y=-0.15, x=0, 
                              font={"color": font_color, "size": font_size-1, "family": font_family}, 
                              bgcolor='rgba(0,0,0,0)', 
                              orientation="h"))


fig.add_annotation(
    xref = "paper", yref = "paper",
    x=0, y=0, #x=1
    xanchor="left", yanchor="top", align="center",
    text="© Parti Gábor, 2022",
    font={"color": "gainsboro", "size": 8, "family": font_family},
    showarrow=False)

# add images
fig.add_layout_image(
    source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
    sizex=0.1, sizey=0.1,
    # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
    # sizex=0.15, sizey=0.15,
    x=1, y=0, 
    xanchor="right", yanchor="top", 
)

fig.show()

# writing and saving ---------------------------------------------------------
filename = "oed" 
fig.write_html(filename+".html")
files.download(filename+".html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## OED: pepper

In [None]:
path="/content/drive/MyDrive/Thesis/Data/"
df=pd.read_csv(path+'oed.csv', header =[0], delimiter=',', encoding="utf-8")
df = df.loc[df['year'] != '?'] # include ones to include
df.to_csv(path+"oed_working.csv", index = None, header=True)
df=pd.read_csv(path+'oed_working.csv', header =[0], delimiter=',', encoding="utf-8")

# Filter
# df = df.loc[df['level'] == 'main'] # include ones to include
df = df.loc[df['id'] == 'pepper'] # include ones to include
# Add dummy column for size
df['size'] = 1
# # # Sorting
df['class'] = pd.Categorical(df['class'], ["n.", "v.", "adj.", "adv.", "phrase"]) 
df.sort_values(["class", "level", "year"], inplace = True)
df

Unnamed: 0,id,tier,level,entry,class,year,size
126,pepper,3,main,"pepper, n.",n.,925,1
129,pepper,3,main,"pepper-quern, n.",n.,940,1
130,pepper,3,main,"peppercorn, n. and adj.",n.,945,1
132,pepper,3,main,"pepperer, n.1",n.,1309,1
134,pepper,3,main,"peppergrass, n.",n.,1500,1
...,...,...,...,...,...,...,...
238,pepper,3,sub,pepperily,adv.,1898,1
133,pepper,3,main,to have pepper in the nose,phrase,1400,1
310,pepper,3,sub,†to pepper a person's box (also pans),phrase,1608,1
199,pepper,3,sub,to snuff pepper,phrase,1624,1


###Document

In [None]:
# Variables
PolyU='#8f1329'
marker_size = 20
edge_size = 2
line_width = 4
font_size = 20
opacity = 0.5
font_color = "black"
font_family = "Raleway"

fig = px.scatter(df, x="year", y="class", 
                 size="size", size_max=marker_size,
                 color="class", color_discrete_sequence=[p2,p7,p4,p5,p1],
                 symbol="level", symbol_sequence = ['circle', 'circle-open'], opacity = opacity,
                 hover_name='entry',
                 hover_data={'class':True, 'level':True, 'year':True, 'id':True, 'size':False},
                 marginal_x="histogram",
                 template='plotly_white')

fig.update_traces(
    marker_line_width=edge_size,
    marker_line_color="rgba(0,0,0,0)",
    # selector=dict(mode='markers')
    )

fig.update_layout(
    font_family=font_family,
    font_color=font_color,
    font_size=font_size)

fig.update_xaxes(visible=True)
fig.update_yaxes(visible=True, title=None, showticklabels=True)
fig.update_xaxes(ticklabelposition="inside")
fig.update_yaxes(ticklabelposition="inside") # tickangle = 0

fig.update_layout(width = 1000, height=500,
                  margin={"r":0,"t":0,"l":0,"b":0}, showlegend = True,
                  legend=dict(xanchor="left", yanchor="top", #title="class",
                              title=dict(side='top'),
                              y=0, x=0, 
                              font={"color": font_color, "size": font_size-2, "family": font_family}, 
                              bgcolor='rgba(0,0,0,0)', 
                              orientation="h"))

fig.show()

# writing and saving ---------------------------------------------------------
filename = "oed_pepper" 
fig.write_image(filename+".pdf", engine="kaleido")
files.download(filename+".pdf")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

###HTML

In [None]:
# Variables #-------------------------------------------------------------------
PolyU='#8f1329'
marker_size = 20
edge_size = 2
line_width = 4
font_size = 20
opacity = 0.5
font_color = "black"
font_family = "Raleway"

fig = px.scatter(df, x="year", y="class", 
                 size="size", size_max=marker_size,
                 color="class", color_discrete_sequence=[p2,p7,p4,p5,p1],
                 symbol="level", symbol_sequence = ['circle', 'circle-open'], opacity = opacity,
                 hover_name='entry',
                 hover_data={'class':True, 'level':True, 'year':True, 'id':True, 'size':False},
                 marginal_x="histogram",
                 template='plotly_white')

fig.update_traces(
    marker_line_width=edge_size,
    marker_line_color="rgba(0,0,0,0)",
    # selector=dict(mode='markers')
    )

fig.update_layout(
    font_family=font_family,
    font_color=font_color,
    font_size=font_size)

fig.update_xaxes(visible=True)
fig.update_yaxes(visible=True, title=None, showticklabels=True)
fig.update_xaxes(ticklabelposition="inside")
fig.update_yaxes(ticklabelposition="inside") # tickangle = 0

fig.update_layout(
    # width = 1000, height=400,
                  margin={"r":0,"t":20,"l":0,"b":0}, showlegend = True,
                  title=dict(y=1, x=0, xanchor='left', yanchor='top', 
                  text='A timeline of words and phrases derived from pepper, based on main- and sub-level entries in the OED',   
                  font={"color": "black", "size": font_size, "family": font_family}),
                  legend=dict(xanchor="left", yanchor="top", #title="class",
                              title=dict(side='top'),
                              # y=-0.15, x=0, 
                              font={"color": font_color, "size": font_size-2, "family": font_family}, 
                              bgcolor='rgba(0,0,0,0)', 
                              orientation="h"))
                  # legend=dict(y=1, x=1, 
                  # xanchor="left", yanchor="top", 
                  # orientation="v"))


fig.add_annotation(
    xref = "paper", yref = "paper",
    x=0, y=0, #x=1
    xanchor="left", yanchor="top", align="center",
    text="© Parti Gábor, 2022",
    font={"color": "gainsboro", "size": 8, "family": font_family},
    showarrow=False)

# add images
fig.add_layout_image(
    source="https://upload.wikimedia.org/wikipedia/en/thumb/9/9e/PolyU_Logo_with_wordmark.svg/1024px-PolyU_Logo_with_wordmark.svg.png",
    sizex=0.1, sizey=0.1,
    # source="https://upload.wikimedia.org/wikipedia/en/thumb/5/52/PolyU.svg/759px-PolyU.svg.png",
    # sizex=0.15, sizey=0.15,
    x=1, y=0, 
    xanchor="right", yanchor="top", 
)

# # Annotations manually
# m = df.loc[df["level"] == "main", ["id","level","entry","class","year"]]
# for i, j, k in zip(df['year'], df['id'], df['entry']):
#     fig.add_annotation(go.layout.Annotation(
#     x = i, y = j, xref="x", yref="y",
#     text = k, 
#     xanchor='auto',
#     yanchor='bottom',
#     align='center',  
#     showarrow=False,
#     textangle=-45,
#     font=dict(size=12, color=font_color, family=font_family)))

# Annotations with a button
layoutAnnotationList = []
for i, j, k in zip(df['year'], df['class'], df['entry']):
    x = i
    y = j
    text = k
    xanchor='center'
    yanchor='bottom'
    layoutAnnotationList.append( { 'x':x, 'y':y, 'xanchor':xanchor, 'yanchor':yanchor, 
                                  'text':text, 'textangle':-45 , 'showarrow':False, 
                                  "font":dict(size=12, color=font_color, family=font_family) } )
# layout = { 'annotations': layoutAnnotationList } 

layoutButtons = list([
                dict(type="buttons", active=0, showactive=True,
                     x=1, xanchor="right",
                     y=-0.1, yanchor="top",
                     buttons=list([   
                        dict(label = 'Annotations: On',
                             method = 'update',
                             args = [{'visible': [True, True, True, True]},{'annotations':layoutAnnotationList}]
                             ),
                        dict(label = 'Annotations: Off',
                             method = 'update',
                             args = [{'visible':[True, True, True, True]},{'annotations':[]}]
                             ),]))])

layout = {#'annotations': layoutAnnotationList, 
          'updatemenus':layoutButtons }

# https://stackoverflow.com/questions/54222205/hidding-annotations-in-plotly-python-using-a-button

fig.update_layout(layout)
        
fig.show()

filename = "oed_pepper" 
fig.write_html(filename+".html")
files.download(filename+".html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:

# Variables #-------------------------------------------------------------------
PolyU='#8f1329'
marker_size = 20
edge_size = 2
line_width = 4
font_size = 20
opacity = 0.5
font_color = "black"
font_family = "Raleway"

fig = px.scatter(df, x="year", y="class", 
                 size="size", size_max=marker_size,
                 color="class", color_discrete_sequence=[p2,p7,p4,p5,p1],
                 symbol="level", symbol_sequence = ['circle', 'circle-open'], opacity = opacity,
                 hover_name='entry',
                 hover_data={'class':True, 'level':True, 'year':True, 'id':True, 'size':False},
                 marginal_x="histogram",
                 template='plotly_white')

fig.update_traces(
    marker_line_width=edge_size,
    marker_line_color="rgba(0,0,0,0)",
    # selector=dict(mode='markers')
    )

fig.update_layout(
    font_family=font_family,
    font_color=font_color,
    font_size=font_size)

fig.update_xaxes(visible=True)
fig.update_yaxes(visible=True, title=None, showticklabels=True)
fig.update_xaxes(ticklabelposition="inside")
fig.update_yaxes(ticklabelposition="inside") # tickangle = 0
# fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8') #f0f0f0
# fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#f8f8f8')
# fig.update_layout(paper_bgcolor="white") # transparent background rgb(0,0,0,0)
# fig.update_layout(plot_bgcolor="white")#f6f6f6

fig.update_layout(width = 1000, height=400,
                  margin={"r":0,"t":0,"l":0,"b":0}, showlegend = True,
                  legend=dict(xanchor="left", yanchor="top", #title="class",
                              # y=-0.15, x=0, 
                              font={"color": font_color, "size": font_size-1, "family": font_family}, 
                              bgcolor='rgba(0,0,0,0)', 
                              orientation="h"))

fig.show()

# writing and saving ---------------------------------------------------------
filename = "oed_pepper" 
fig.write_image(filename+".pdf", engine="kaleido")
# download
files.download(filename+".pdf")

# full size for html ---------------------------------------------------------
fig.update_layout(
    font_size=12,
    autosize=True,
    margin={"r":0,"t":60,"l":0,"b":30},
    title=dict(y=0.9, x=0, xanchor='left', yanchor='top', 
               text='A timeline of words and phrases derived from spice names, based on main- and sub-level entries in the OED',   
               font={"color": "black", "size": font_size, "family": font_family}),
    legend=dict(y=1, x=1, 
                xanchor="left", yanchor="top", 
                orientation="v")
    )

fig.add_annotation(xref="paper", yref="paper", x=1, y=0, xanchor="right", yanchor="top", align="right",
                   text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
                   font={"color": "gray", "size": 10, "family": font_family},
                   showarrow=False)

# # Annotations manually
# m = df.loc[df["level"] == "main", ["id","level","entry","class","year"]]
# for i, j, k in zip(df['year'], df['id'], df['entry']):
#     fig.add_annotation(go.layout.Annotation(
#     x = i, y = j, xref="x", yref="y",
#     text = k, 
#     xanchor='auto',
#     yanchor='bottom',
#     align='center',  
#     showarrow=False,
#     textangle=-45,
#     font=dict(size=12, color=font_color, family=font_family)))

# Annotations with a button
layoutAnnotationList = []
for i, j, k in zip(df['year'], df['class'], df['entry']):
    x = i
    y = j
    text = k
    xanchor='center'
    yanchor='bottom'
    layoutAnnotationList.append( { 'x':x, 'y':y, 'xanchor':xanchor, 'yanchor':yanchor, 
                                  'text':text, 'textangle':-45 , 'showarrow':False, 
                                  "font":dict(size=12, color=font_color, family=font_family) } )
# layout = { 'annotations': layoutAnnotationList } 

layoutButtons = list([
                dict(type="buttons", active=0, showactive=True,
                     x=1, xanchor="left",
                     y=0, yanchor="bottom",
                     buttons=list([   
                        dict(label = 'Annotations: On',
                             method = 'update',
                             args = [{'visible': [True, True, True, True]},{'annotations':layoutAnnotationList}]
                             ),
                        dict(label = 'Annotations: Off',
                             method = 'update',
                             args = [{'visible':[True, True, True, True]},{'annotations':[]}]
                             ),]))])

layout = {#'annotations': layoutAnnotationList, 
          'updatemenus':layoutButtons }

# https://stackoverflow.com/questions/54222205/hidding-annotations-in-plotly-python-using-a-button

fig.update_layout(layout)
        
fig.show()

fig.write_html(filename+".html")
files.download(filename+".html")

In [None]:
# import plotly.express as px

# # Variables
# PolyU='#8f1329'
# marker_size = 24
# edge_size = 3
# line_width = 5
# font_size = 24
# font_color = "black"
# opacity = 0.5
# font_family = "Raleway"

# fig = px.scatter(
#     df, x="year", y="id", 
#     color="class",
#     symbol="level",
#     symbol_sequence = ['circle', 'circle-open'],
#     # color_discrete_sequence=px.colors.qualitative.Prism, #Viridis #Bold
#     # color_discrete_sequence=px.colors.sequential.Viridis,
#     color_discrete_sequence=[p2,p7,p4,p5,p1],
#     opacity = 0.5,
#     hover_name='entry',
#     hover_data={'entry':True, 'level':True, 'class':True, 'year':True, 'id':False, 'entry':False},
#     labels={"id": "", "year":"year", "class":"class", "level":"level"},
#     # title="A timeline of words and phrases derived from spice names,<br>based on main- and sub-level entries in the OED",
#     marginal_x="histogram", #'rug', 'box', 'violin', or 'histogram'
#     # marginal_y="rug"
#     )

# # fig = px.line(df, x='year', y='value', color='class')

# fig.update_traces(
#     # marker_color="lightskyblue",
#     marker_line_color="rgba(0,0,0,0)",
#     marker_line_width=edge_size,
#     marker_size=marker_size,
#     selector=dict(mode='markers')
#     )

# # # a good way to id subsets
# # sub = df.loc[df["level"] == "sub", ["id","level","entry","class","year"]]
# # print(sub)

# fig.update_xaxes(visible=True, showticklabels=False)
# fig.update_yaxes(visible=False, showticklabels=False)
# fig.update_layout(paper_bgcolor="white") # transparent background rgb(0,0,0,0)
# fig.update_layout(plot_bgcolor="#f8f8f8")#f6f6f6
# fig.update_layout(xaxis = go.layout.XAxis(title='year', showticklabels=True))
# fig.update_layout(yaxis = go.layout.YAxis(title=None, visible = True, showticklabels=False, showgrid=True))
# fig.update_xaxes(showgrid=True, gridwidth=2, gridcolor='white') #f0f0f0
# fig.update_yaxes(showgrid=True, gridwidth=2, gridcolor='white')
# fig.update_xaxes(ticklabelposition="inside")
# fig.update_yaxes(ticklabelposition="inside", tickangle = 270, showticklabels=True)


# fig.update_layout(
#     width = 1200, height=400,
#     margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(#xref="plot", yref="plot"
#                 title="class",
#                 y=-0.15, x=0, xanchor="left", yanchor="top", 
#                 bgcolor='rgba(0,0,0,0)',   
#                 font={"color": "black", "size": 24, "family": font_family}, 
#                 orientation="h",
#                 # traceorder = 'normal', #or reversed
#                 # bgcolor="white",
#                 # bordercolor="gainsboro", 
#                 # borderwidth=1
#                 )
#     )

# fig.update_layout(
#     font_family=font_family,
#     font_color=font_color,
#     font_size=font_size)

# # fig.add_annotation(y=0.1, x=950, xanchor="center", yanchor="middle", text="Old English",
# #                    font={"color": "darkgray", "size": 10, "family": font_family},
# #                    showarrow=False)

# fig.show()

# # # write
# # filename = "pepper_oed" 
# # fig.write_image(filename+".png", engine="kaleido")
# # fig.write_image(filename+".pdf", engine="kaleido")
# # # download
# # files.download(filename+".pdf")
# # files.download(filename+".png")

# # # full size for html-----------------------
# # fig.update_layout(
# #     width = 1200, height=400,
# #     margin={"r":0,"t":60,"l":0,"b":0},
# #     title=dict(y=0.9, x=0, xanchor='left', yanchor='top', 
# #                text='A timeline of words and phrases derived from spice names, based on main- and sub-level entries in the OED',   
# #                font={"color": "black", "size": font_size, "family": font_family}),
# #     legend=dict(y=1, x=1, 
# #                 xanchor="left", yanchor="top", 
# #                 orientation="v")
# #     )

# # fig.add_annotation(xref="paper", yref="paper", y=0, x=1, xanchor="right", yanchor="top", align="right",
# #                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
# #                    font={"color": "gray", "size": 10, "family": font_family},
# #                    showarrow=False)

# # fig.show()

# # fig.write_html(filename+".html")
# # files.download(filename+".html")

In [None]:
# # Alltogether
# import plotly.express as px

# # Variables
# PolyU='#8f1329'
# marker_size = 24
# edge_size = 3
# line_width = 5
# font_size = 16
# font_color = "black"
# opacity = 0.5
# font_family = "Raleway"

# fig = px.scatter(
#     df, x="year", y="id", 
#     color="class",
#     symbol="level",
#     symbol_sequence = ['circle', 'circle-open'],
#     color_discrete_sequence=[p2,p7,p4,p5,p1],
#     opacity = 0.5,
#     # hover_name='entry',
#     # hover_data={'entry':True, 'level':True, 'class':True, 'year':True, 'id':False, 'entry':False},
#     labels={"id": "id", "year":"year", "class":"class", "level":"level"},
#     # marginal_x="histogram", #'rug', 'box', 'violin', or 'histogram'
#     # marginal_y="rug"
#     )

# fig.update_traces(
#     # marker_color="lightskyblue",
#     marker_line_color="rgba(0,0,0,0)",
#     marker_line_width=edge_size,
#     marker_size=marker_size,
#     selector=dict(mode='markers')
#     )

# fig.update_xaxes(visible=True, showticklabels=False)
# fig.update_yaxes(visible=False, showticklabels=False)
# fig.update_layout(paper_bgcolor="white") # transparent background rgb(0,0,0,0)
# fig.update_layout(plot_bgcolor="#f8f8f8")#f6f6f6
# fig.update_layout(xaxis = go.layout.XAxis(title='year', showticklabels=True))
# fig.update_layout(yaxis = go.layout.YAxis(title=None, visible = True, showticklabels=False, showgrid=True))
# fig.update_xaxes(showgrid=True, gridwidth=2, gridcolor='white') #f0f0f0
# fig.update_yaxes(showgrid=True, gridwidth=2, gridcolor='white')
# fig.update_xaxes(ticklabelposition="inside")
# fig.update_yaxes(ticklabelposition="inside", tickangle = 0, showticklabels=True)


# fig.update_layout(
#     width = 1000, height=400,
#     # margin={"r":0,"t":0,"l":0,"b":0},
#     showlegend = True,
#     legend=dict(#xref="plot", yref="plot"
#                 title="class",
#                 y=-0.15, x=0, xanchor="left", yanchor="top", 
#                 bgcolor='rgba(0,0,0,0)',   
#                 font={"color": "black", "size": font_size, "family": font_family}, 
#                 orientation="h",
#                 # traceorder = 'normal', #or reversed
#                 # bgcolor="white",
#                 # bordercolor="gainsboro", 
#                 # borderwidth=1
#                 )
#     )

# fig.update_layout(
#     font_family=font_family,
#     font_color=font_color,
#     font_size=font_size)

# # fig.add_annotation(y=0.1, x=950, xanchor="center", yanchor="middle", text="Old English",
# #                    font={"color": "darkgray", "size": 10, "family": font_family},
# #                    showarrow=False)



# fig.show()

In [None]:
# # write
# filename = "pepper_oed" 
# fig.write_image(filename+".png", engine="kaleido")
# fig.write_image(filename+".pdf", engine="kaleido")
# # download
# files.download(filename+".pdf")
# files.download(filename+".png")

# # full size for html-----------------------
# fig.update_layout(
#     width = 1200, height=400,
#     margin={"r":0,"t":60,"l":0,"b":0},
#     title=dict(y=0.9, x=0, xanchor='left', yanchor='top', 
#                text='A timeline of words and phrases derived from spice names, based on main- and sub-level entries in the OED',   
#                font={"color": "black", "size": font_size, "family": font_family}),
#     legend=dict(y=1, x=1, 
#                 xanchor="left", yanchor="top", 
#                 orientation="v")
#     )

# fig.add_annotation(xref="paper", yref="paper", y=0, x=1, xanchor="right", yanchor="top", align="right",
#                    text="Parti Gábor, 2022<br>The Hong Kong Polytechnic University",
#                    font={"color": "gray", "size": 10, "family": font_family},
#                    showarrow=False)

# fig.show()

# fig.write_html(filename+".html")
# files.download(filename+".html")

##Timeline with binning

In [None]:
df['century'] = century(df['year'])
df['value'] = 1
d = df['century'].value_counts().to_dict()

for index, row in df.iterrows():
  for key, value in d.items():
    if row['century'] == key:
      row['value'] = value
      df.loc[index, 'value'] = value

df

Unnamed: 0,id,tier,level,entry,class,year,century,value
126,pepper,3,main,"pepper, n.",n.,925,10,7
129,pepper,3,main,"pepper-quern, n.",n.,940,10,7
130,pepper,3,main,"peppercorn, n. and adj.",n.,945,10,7
132,pepper,3,main,"pepperer, n.1",n.,1309,14,2
134,pepper,3,main,"peppergrass, n.",n.,1500,16,15
...,...,...,...,...,...,...,...,...
235,pepper,3,sub,pepper-salt,adj.,1882,19,26
238,pepper,3,sub,pepperily,adv.,1898,19,26
133,pepper,3,main,to have pepper in the nose,phrase,1400,15,1
199,pepper,3,sub,to snuff pepper,phrase,1624,17,27


Plotly Go

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = px.line(df, x='year', y='value', color='class')

# fig = make_subplots(rows=2, cols=1)

# Go ===========================================================================

# fig = go.Figure()

# fig.add_trace(go.Scatter(x=df['year'], y=df['tier'],
#                     mode='markers',
#                     name='oed',
#                     text=df['entry'],
#                     textposition = 'top right',
#                     textfont={"color": font_color, "size": font_size, "family": font_family},
#                     # marker_color = 'darkorange',
#                     marker=dict(symbol='circle', color=PolyU, size=marker_size, opacity=opacity, line=dict(color='white', width=edge_size)), 
#                     opacity=1,
#                     )) 


# x = df['year']
# y = df['value']

# fig.add_trace(go.Scatter(x=df['year'], y=df['value']/10,
#                          mode="lines",
#                          name="spline",
#                          line_shape='spline',
#                          line_color = 'black', line_width=line_width, line_dash='solid', 
#                          opacity=opacity,
#                          hoverinfo='text+name',
#                          text=["tweak line smoothness"],
#                          ))

# # Go =========================================================================


fig.update_traces(
    # mode='markers+lines',
    marker=dict(#symbol='0', #'diamond',
                size=marker_size,
                # line=dict(width=4, 
                           # color='white'
                          # )
                  ),
                  selector=dict(mode='markers')
                  )

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#f0f0f0')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#f0f0f0')
fig.update_layout(paper_bgcolor="rgb(0,0,0,0)") # transparent
fig.update_layout(paper_bgcolor="white",
                  plot_bgcolor="white")

fig.update_layout(
    width = 1200, height=400,
    # margin={"r":0,"t":0,"l":0,"b":0},
    showlegend = True,
    legend=dict(
                # y=0.9, x=0.12, xanchor="left", yanchor="top", 
                bgcolor='rgba(0,0,0,0)',   
                font={"color": "black", "size": 12, "family": font_family}, 
                # traceorder = 'normal', #or reversed
                orientation="h",
                # bgcolor="white",
                # bordercolor="gainsboro", 
                # borderwidth=1
                ),
    # title=dict(
               #y=0.99, x=0.12, xanchor='left', yanchor='top', 
              #  text='Title',   
              #  font={"color": "black", "size": 20, "family": font_family}),
    # hovermode="closest", #default
    # hoverlabel=dict(bgcolor="white", font_size=12, font_family=font_family),
    )

fig.update_layout(
    font_family="Raleway",
    font_color="black",
    font_size=font_size
    # title_font_family="Times New Roman",
    # title_font_color="red",
    # legend_title_font_color="green"
)

# fig.add_annotation(y=0, x=1, xanchor="right", yanchor="bottom", text="Parti Gábor, 2022",
#                    font={"color": "lightgray", "size": 10, "family": font_family},
#                    showarrow=False)

fig.add_annotation(y=2.1, x=950, xanchor="center", yanchor="middle", text="Old English",
                   font={"color": "darkgray", "size": 10, "family": font_family},
                   showarrow=False)

fig.show()

# # write
# fig.write_image("oed.png", engine="kaleido")
# fig.write_image("oed.pdf", engine="kaleido")
# # download
# files.download("oed.pdf")
# files.download("oed.png")

# # full size for html-----------------------
# # fig.update_layout(width = 1200, height=600,
# #                   legend=dict(y=0.9, x=0, xanchor="left", yanchor="top")
# #                   )

# fig.write_html("oed.html")
# files.download("oed.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import plotly.graph_objects as go
from plotly.validators.scatter.marker import SymbolValidator
# import plotly

raw_symbols = SymbolValidator().values
namestems = []
namevariants = []
symbols = []
for i in range(0,len(raw_symbols),3):
    name = raw_symbols[i+2]
    symbols.append(raw_symbols[i])
    namestems.append(name.replace("-open", "").replace("-dot", ""))
    namevariants.append(name[len(namestems[-1]):])

fig = go.Figure(go.Scatter(mode="markers", x=namevariants, y=namestems, marker_symbol=symbols,
                           marker_line_color="midnightblue", marker_color="lightskyblue",
                           marker_line_width=2, marker_size=15,
                           hovertemplate="name: %{y}%{x}<br>number: %{marker.symbol}<extra></extra>"))
fig.update_layout(title="Mouse over symbols for name & number!",
                  xaxis_range=[-1,4], yaxis_range=[len(set(namestems)),-1],
                  margin=dict(b=0,r=0), xaxis_side="top", height=1400, width=400)
# plotly.offline.plot(fig, filename='C:/plotlyplots/lifeExp.html')
fig.show()

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Variables
PolyU='#8f1329'
marker_size = 12
edge_size = 1
line_width = 5
font_size = 12
font_color = "black"
opacity = 0.75
font_family = "Raleway"
# colors = ['rgb(95, 70, 144)', 'rgb(29, 105, 150)', 'rgb(56, 166, 165)', 'rgb(15, 133, 84)', 'rgb(115, 175, 72)', 'rgb(237, 173, 8)', 'rgb(225, 124, 5)']

data = pd.read_csv(path+'oed.csv')

fig = go.Figure(data=go.Scatter(name='ginger',
                                x=data['year'],
                                y=data['id'],
                                text=data['entry'],
                                textposition = 'top right',
                                textfont={"color": font_color, "size": font_size, "family": font_family},
                                mode='markers',
                                # marker_color = 'darkorange',
                                marker=dict(symbol='diamond', color=PolyU, size=marker_size, opacity=opacity, line=dict(color='white', width=edge_size)), 
                                opacity=1, #for marker & text
                                )) 

for i, j, k in zip(data['year'], data['id'], data['entry']):
    fig.add_annotation(go.layout.Annotation(
    x = i,
    y = j,
    xref="x",
    yref="y",
    text = k,
    font=dict(
            family=font_family,
            size=font_size,
            color=font_color),
    align='center',
    showarrow=False,
    yanchor='bottom',
    textangle=-90))

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='gainsboro')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='gainsboro')
fig.update_layout(paper_bgcolor="rgb(0,0,0,0)") # transparent
fig.update_layout(paper_bgcolor="white",
                  plot_bgcolor="white")

# fig.update_layout(title='Propagation of terms')

# fig.update_layout(
    # width = 1200, height=600,
    # margin={"r":0,"t":0,"l":0,"b":0},
    # showlegend = True,
    # legend=dict(y=0, x=0, xanchor="left", yanchor="bottom", bgcolor='rgba(255,255,255,0.5)',  
                # font={"color": "black", "size": 16, "family": font_family}, traceorder = 'normal', orientation="v"),
    # title=dict(y=0.99, x=0, xanchor='left', yanchor='top', text='Title',   
              #  font={"color": "black", "size": 20, "family": font_family}),
    # hoverlabel=dict(#bgcolor="white", 
                    # font_size=12, 
                    # font_family=font_family),
    # )

fig.show()

