2021-10-19	<br/>
Big Pumpkins <br/>
https://github.com/rfordatascience/tidytuesday/blob/master/data/2021/2021-10-19/readme.md

Line Chart Charts with Advanced Annotation and Labeling - Complete 

https://towardsdatascience.com/highlighted-line-chart-with-plotly-express-e69e2a27fea8

In [58]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
pio.renderers.default='notebook'
import os

In [59]:
df =pd.read_csv('pumpkins.csv')

In [60]:
df.head() 

Unnamed: 0,id,place,weight_lbs,grower_name,city,state_prov,country,gpc_site,seed_mother,pollinator_father,ott,est_weight,pct_chart,variety
0,2013-F,1,154.5,"Ellenbecker, Todd & Sequoia",Gleason,Wisconsin,United States,Nekoosa Giant Pumpkin Fest,209 Werner,Self,184.0,129.0,20.0,
1,2013-F,2,146.5,"Razo, Steve",New Middletown,Ohio,United States,Ohio Valley Giant Pumpkin Growers Weigh-off,150.5 Snyder,,194.0,151.0,-3.0,
2,2013-F,3,145.0,"Ellenbecker, Todd & Sequoia",Glenson,Wisconsin,United States,Mishicot Pumpkin Fest,209 Werner,103 Mackinnon,177.0,115.0,26.0,
3,2013-F,4,140.8,"Martin, Margaret & Glen",Combined Locks,Wisconsin,United States,Cedarburg Wine and Harvest Festival,109 Martin '12,209 Werner '12,194.0,151.0,-7.0,
4,2013-F,5,139.0,"Barlow, John",,Wisconsin,United States,Stillwater Harvestfest,130.5 McKinnon,open,0.0,0.0,0.0,


In [61]:
df.shape

(28065, 14)

In [62]:
# Take only the 1st place winners
df=df[df.place=='1']

In [63]:
df['year']=df.id.apply(lambda x: x[:4])

In [64]:
df['type']=df.id.apply(lambda x: x[-1])

In [65]:
df['est_weight']=df['est_weight'].str.replace(',', '')
df['est_weight'] = df['est_weight'].astype(float)

In [66]:
df.head()

Unnamed: 0,id,place,weight_lbs,grower_name,city,state_prov,country,gpc_site,seed_mother,pollinator_father,ott,est_weight,pct_chart,variety,year,type
0,2013-F,1,154.5,"Ellenbecker, Todd & Sequoia",Gleason,Wisconsin,United States,Nekoosa Giant Pumpkin Fest,209 Werner,Self,184.0,129.0,20.0,,2013,F
292,2013-L,1,139.25,"Ansems, Fred",Steam Mill,Nova Scotia,Canada,AVGVG Glad Gardens,120 Kline,123 Rumancik,0.0,0.0,0.0,,2013,L
496,2013-P,1,2032.0,"Mathison, Tim",Napa,California,United States,Uesugi Farms Weigh-off,2009 Wallace,1554 Mathison,475.0,2000.0,2.0,,2013,P
2178,2013-S,1,1264.0,"Pierpont, Edwin",Jefferson,Maine,United States,Damariscotta Pumpkinfest and Regatta Weigh-off,996 Haist,1109 Pierpont,355.0,983.0,29.0,,2013,S
2330,2013-T,1,6.83,"Boudyo, Fabrice",Carsac De Gurson,Other,France,Early Tomatoes,5.07 Boudyo,,,,,,2013,T


In [67]:
# create a dict with colors:
colors = pd.DataFrame(df["type"].unique(), columns=["type"])
colors["color"] = colors["type"].map({"W": "red","F":"orange"}).fillna("lightgrey")

# color map is a dict with colors, lightgrey for most, {"Aruba": "lightgrey", ... "Japan: "blue", ...}
color_map = {v["type"]: v["color"] for k,v in colors.iterrows()}

# show sample from the dictionary
{k:color_map[k] for k in color_map if k in ["W","F"]}


{'F': 'orange', 'W': 'red'}

In [68]:
# sort the dataframe
df["order"] = df["type"].map({"W": 1, "F": 2}).fillna(3)
df.sort_values(by=["order","year"], ascending=True, inplace=True)
df.head(3)

Unnamed: 0,id,place,weight_lbs,grower_name,city,state_prov,country,gpc_site,seed_mother,pollinator_father,ott,est_weight,pct_chart,variety,year,type,order
2620,2013-W,1,350.5,"Kent, Chris",Seuterville,Tennessee,United States,Operation Pumpkin,291 kent,274 kent,234.0,348.0,1.0,,2013,W,1.0
5800,2014-W,1,297.6,Gabriele Bartoli,Novellara,Other,Italy,Festa della zucca di sale,233.5 bright,260 bright,218.0,283.0,5.0,,2014,W,1.0
9116,2015-W,1,302.0,"Kent, Christopher",sevierville,Tennessee,United States,Great Pumpkin and Watermelon Weigh-Off,251 Kent,s1BB,221.0,294.0,3.0,,2015,W,1.0


In [77]:
# but still my lines are somewhere in the middle
fig = px.line(df.sort_values(by=["order","year"], ascending=True),
              x="year",
              y="est_weight", 
              color="type", 
              line_group="type",
              color_discrete_map=color_map)

fig.update_layout(title="Weight of Watermelon and Pumpkin over time",
                # remove the legend
                showlegend=False,
                  
                # make y-axis invisible
                yaxis={"visible":False},
                
                xaxis={"type": "linear"},
                
                # create the annoations
                # point annotattion
                annotations=[
                        {"x":2014, "y":226, "ay": -40, 
                        "text": "<b>Closest pumpkin<br> weighs to watermellon</b>",
                        "arrowhead": 3, "showarrow":True,
                        "font": {"size": 15}},
                    # area annotation
                    {"x":2019, "y":900, 
                        "text": "<b>Both are much lower compared to the other fruits</b>",
                         "textangle": -25,
                        "showarrow":False,
                         "bgcolor":"lightblue",
                        "font": {"size": 10}},
                    # start of the line annotation   
                    # use the "xanchor": "right" so that the labels stick to the right side of the plot area
                    {"xref":"paper", "yref":"paper", "x":0, "y":0.20,
                              "xanchor":'right', "yanchor":"top",
                              "text":'348',
                              "font":dict(family='Arial',
                                        size=12,
                                        color="red"),
                              "showarrow":False},
                    {"xref":"paper", "yref":"paper", "x":0, "y":0.1,
                              "xanchor":'right', "yanchor":'top',
                              "text":'129',
                              "font":dict(family='Arial',
                                        size=12,
                                        color="orange"),
                              "showarrow":False},
                    # end of the line legend
                    # use the "xanchor": "left" so that the labels stick to the right side of the plot area
                    {"xref":"paper", "yref":"paper", "x":1, "y":0.20,
                              "xanchor":"left", "yanchor":"top",
                              "text":'Watermellon (358)',
                              "font":dict(family='Arial',
                                        size=12,
                                        color="red"),
                              "showarrow":False},
                    {"xref":"paper", "yref":"paper", "x":1, "y":0.15,
                              "xanchor":'left', "yanchor":'top',
                              "text":'Pumpkin (125)',
                              "font":dict(family='Arial',
                                        size=12,
                                        color="orange"),
                              "showarrow":False}
                    
                ])
fig.show()