<a href="https://colab.research.google.com/github/lucasreis95/world-surf-league-data/blob/main/notebooks/06_silver_events_dimensions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import libs
import pandas as pd
import pandas_gbq
import numpy as np

In [None]:
# read df from gbq
df_raw = pandas_gbq.read_gbq(
                             query_or_table = 'wsl-data-397017.01_bronze.wsl_events_scrap'
                             )

Downloading: 100%|[32m██████████[0m|


In [None]:
# add manual dimensions and corrections

In [None]:
df = df_raw.copy()
# event changed from margaret to uluwato
# https://www.worldsurfleague.com/posts/331022/margaret-river-to-uluwatu-where-did-we-leave-off
df['event_name'] = df['event_name'].str.replace('Uluwatu CT\\/ Margaret River Pro', 'Uluwatu CT', regex = True)
# set Rip Curl Search event countries
# https://www.worldsurfleague.com/posts/458627/2010-rip-curl-pro-search-puerto-rico
# https://www.worldsurfleague.com/posts/27/final-day-highlights-2011-rip-curl-pro-search-san-francisco
df.loc[(df['event_name'] == 'Rip Curl Search') & (df['season_year'] == 2010), 'event_name'] = 'Rip Curl Search Puerto Rico'
df.loc[(df['event_name'] == 'Rip Curl Search') & (df['season_year'] == 2011), 'event_name'] = 'Rip Curl Search San Francisco'
# covert to lowercase
df['event_name'] = df['event_name'].str.lower()

In [None]:
# create manual dictionary to set countries
events_country_dict ={
 # australia
 'gold coast':'australia',
 'bells beach':'australia',
 'margaret river':'australia',
 'newcastle':'australia',
 'narrabeen ':'australia',
 'rottnest':'australia',
 # brazil
 'santa catarina':'brazil',
 'rio':'brazil',
 # el salvador
 'el salvador':'el salvador',
 # fiji
 'fiji':'fiji',
 # france
 'france':'france',
 # french polynesia
 'teahupoo':'french polynesia',
 'tahiti':'french polynesia',
 # hawaii
 'pipe masters':'hawaii',
 'pipeline':'hawaii',
 'sunset beach':'hawaii',
 # indonesia
 'bali':'indonesia',
 'uluwatu ct':'indonesia',
 'g-land':'indonesia',
 # mexico
 'mexico':'mexico',
 # portugal
 'portugal':'portugal',
 # puerto rico
 'puerto rico':'puertorico',
 # south africa
 'j-bay':'south africa',
 # united states
 'trestles':'united states',
 'new york':'united states',
 'santa cruz':'united states',
 'surf ranch':'united states',
 'freshwater pro':'united states',
 'san francisco':'united states'
}

In [None]:
# create manual dictionary to set wave side (left, right, both, both-left, both-right)
events_wave_side_dict ={
 # australia
 'gold coast':'right',
 'bells beach':'right',
 'margaret river':'right',
 'newcastle':'both-right',
 'narrabeen ':'both',
 'rottnest':'left',
 # brazil
 'santa catarina':'both',
 'rio':'right-both',
 # el salvador
 'el salvador':'right',
 # fiji
 'fiji':'left',
 # france
 'france':'both',
 # french polynesia
 'teahupoo':'left',
 'tahiti':'left',
 # hawaii
 'pipe masters':'both-left',
 'pipeline':'both-left',
 'sunset beach':'right',
 # indonesia
 'bali':'right',
 'uluwatu ct':'left',
 'g-land':'left',
 # mexico
 'mexico':'right',
 # portugal
 'portugal':'both',
 # puerto rico
 'puerto rico':'right',
 # south africa
 'j-bay':'right',
 # united states
 'trestles':'both',
 'new york':'both',
 'santa cruz':'right',
 'surf ranch':'both',
 'freshwater pro':'both',
 'san francisco':'both'
}

In [None]:
# create manual dictionary to set wave type (barrel, point, beach)
events_wave_type_dict ={
 # australia
 'gold coast':'point',
 'bells beach':'point',
 'margaret river':'beach',
 'newcastle':'beach',
 'narrabeen ':'beach',
 'rottnest':'beach',
 # brazil
 'santa catarina':'beach',
 'rio':'beach',
 # el salvador
 'el salvador':'point',
 # fiji
 'fiji':'barrel',
 # france
 'france':'beach',
 # french polynesia
 'teahupoo':'barrel',
 'tahiti':'barrel',
 # hawaii
 'pipe masters':'barrel',
 'pipeline':'barrel',
 'sunset beach':'point',
 # indonesia
 'bali':'beach',
 'uluwatu ct':'point',
 'g-land':'point',
 # mexico
 'mexico':'point',
 # portugal
 'portugal':'beach',
 # puerto rico
 'puerto rico':'beach',
 # south africa
 'j-bay':'point',
 # united states
 'trestles':'beach',
 'new york':'beach',
 'santa cruz':'point',
 'surf ranch':'beach',
 'freshwater pro':'beach',
 'san francisco':'beach'
}

In [None]:
# create manual dictionary to set wave average size (in foot)
events_wave_size_dict ={
 # australia
 'gold coast':5,
 'bells beach':9,
 'margaret river':10,
 'newcastle':5,
 'narrabeen ':4,
 'rottnest':6,
 # brazil
 'santa catarina':3,
 'rio':6,
 # el salvador
 'el salvador':5,
 # fiji
 'fiji':8,
 # france
 'france':5,
 # french polynesia
 'teahupoo':9,
 'tahiti':9,
 # hawaii
 'pipe masters':9,
 'pipeline':9,
 'sunset beach':10,
 # indonesia
 'bali':5,
 'uluwatu ct':5,
 'g-land':6,
 # mexico
 'mexico':5,
 # portugal
 'portugal':5,
 # puerto rico
 'puerto rico':3,
 # south africa
 'j-bay':7,
 # united states
 'trestles':4,
 'new york':3,
 'santa cruz':4,
 'surf ranch':4,
 'freshwater pro':4,
 'san francisco':4
}

In [None]:
# Create column event_country based on event_name and dict
# if no substring is present in the string_to_check the function raise an error due to the [0]
# https://stackoverflow.com/questions/65957275/python-get-dictionary-value-if-key-is-contained-in-a-string
df['event_country'] = df['event_name'].apply(lambda x: [val for key, val in events_country_dict.items() if key in x][0])
df['event_wave_side'] = df['event_name'].apply(lambda x: [val for key, val in events_wave_side_dict.items() if key in x][0])
df['event_wave_type'] = df['event_name'].apply(lambda x: [val for key, val in events_wave_type_dict.items() if key in x][0])
df['event_wave_size'] = df['event_name'].apply(lambda x: [val for key, val in events_wave_size_dict.items() if key in x][0])

In [None]:
df.head()

Unnamed: 0,season_year,event_order,event_name,event_country,event_wave_side,event_wave_type,event_wave_size
0,2010,1,quiksilver pro gold coast,australia,right,point,5
1,2011,1,quiksilver pro gold coast,australia,right,point,5
2,2012,1,quiksilver pro gold coast,australia,right,point,5
3,2013,1,quiksilver pro gold coast,australia,right,point,5
4,2014,1,quiksilver pro gold coast,australia,right,point,5


In [None]:
# write raw table in big query
pandas_gbq.to_gbq(
                  dataframe = df,
                  destination_table = 'wsl-data-397017.02_silver.wsl_events_dimensions',
                  project_id = 'wsl-data-397017',
                  if_exists = 'replace'
                  )

100%|██████████| 1/1 [00:00<00:00, 1632.02it/s]
