In [43]:
# Artificial Intellegence HW1 
# Jan 20, 2023
# Booth 32200
# Nic Marlton

### Psuedo Code
# dependencies
# dictionary of spot ids (pulled mannually from surfline.com)
# use spot ids list to query surfline api
    # note: either the conditions url or the wave url could be used
    # `conditions` returns ratings 'POOR' to 'EPIC', whereas `wave`
    # returns an optimal rating 0, 1, 2. I chose the optimal rating
    # given its relative simplicity
    # helpful overview of api below:
    # https://pkg.go.dev/github.com/mhelmetag/surflinef/v2#section-readme
# assign surfline ratings to photos
    # in a future itteration of this tool, I would want to pull photos
    # and ratings in real time. that would make this step unnecessary. 
    # for the purposes of this proof of concept, I am mannually assigning 
    # ratings. 
# sort 90% of photos into 4 X 4 - good v bad X crowded v quiet
# store remaining 10 % for test data

### Process after this point occurs in Google's Teachable Machine 

In [44]:
# Dependencies
import pandas as pd
import numpy as np
import requests

In [45]:
# Dictionary of spotIds 
spot_id_dict = {
    'PlesurePoint':'5842041f4e65fad6a7708807',
    'UpperTrestles':'5842041f4e65fad6a7708887',
    'SanClementeStateBeach':'5842041f4e65fad6a77088cf',
    'LowerTrestles':'5842041f4e65fad6a770888a',
    'SteamerLane':'5842041f4e65fad6a7708805',
    'TStreetOverview':'5842041f4e65fad6a7708830',
    'Church':'5842041f4e65fad6a770888b',
    'Strands':'5842041f4e65fad6a77088d5',
    'Jacks':'5842041f4e65fad6a770880b',
    'SaltCreekOverview':'5842041f4e65fad6a770882e',
    # 'Hightower':'584204214e65fad6a7709cbd', // excluding Hightower given it is in FL 
    'Lowers':'5842041f4e65fad6a770888a',
    # 'VenicePierSouthside':'5842041f4e65fad6a7708b17' // excluding Venice Pier given it is in FL
}

In [46]:
# Concatenate data to create list of files w & w/o surfers 
# I did not include these csv files in the git repo because of their size.
# they can be downlaoded with the photo data here:
# https://universe.roboflow.com/surfline/surfer-spotting/dataset/1
def create_file_name_df():
    df1 = pd.read_csv('test_annotations.csv') # csv describing Roboflow test data
    df2 = pd.read_csv('train_annotations.csv') # csv describing Roboflow train data 
    df3 = pd.read_csv('valid_annotations.csv') # csv describing Roboflow validation data
    annotations_df = pd.concat([df1,df2,df3],ignore_index=True)
    df_mask = annotations_df['filename'].isna()
    annotations_df = annotations_df[~df_mask]
    annotations_df = annotations_df['filename'].drop_duplicates().reset_index()
    annotations_df = annotations_df['filename']

create_file_name_df()

In [59]:
# Use spot_id_dict to query surfline api
def get_wave_data(spot_id):
    """
    Executes an API query to surfline.com for a given spotId, e.g.
    'PlesurePoint':'5842041f4e65fad6a7708807'. Specifically, it uses 
    the wave url

    Input:
        - spot_id from spot_id_dict

    Output:
        - 2 column df timestamp and optimalScore for given beach.
    """
    # API Call to sufrline
    login_base_url = 'https://services.surfline.com/trusted/token'
    base_url = 'https://services.surfline.com/kbyg/regions/forecasts/wave'
    response = requests.request("GET", base_url + '?spotId=' + spot_id + '&days=1')
    
    # Transform data for output in 
    df = pd.json_normalize(response.json()['data'],record_path =['wave'])
    df = df[['timestamp','surf.optimalScore']]
    return df.head(1)

def capture_current_conditions(spot_id_dict):
    """
    Captures wave data for a given day by running get_wave_data() 
    for each record in spot_id_dict. In a future version of this module
    it could also save records to a file or db for use in photo labeling

    Input: 
        - takes spot_id_dict as input.

    Output:
        - df of optimalScore and metadata for spots in spot_id_dict for a 
        given date
    """
    forecast_df = pd.DataFrame()
    for i in spot_id_dict:
        spot_forecast_df = get_wave_data(spot_id_dict[i])
        spot_forecast_df['spot'] = i
        spot_forecast_df['spot_id'] = spot_id_dict[i]
        spot_forecast_df['date'] = pd.Timestamp.today().date()
        forecast_df = pd.concat([forecast_df,spot_forecast_df])

    return forecast_df

forecast_df = capture_current_conditions(spot_id_dict)

In [61]:
# Sample output for current date
forecast_df

Unnamed: 0,timestamp,surf.optimalScore,spot,spot_id,date
0,1674201600,2,PlesurePoint,5842041f4e65fad6a7708807,2023-01-20
0,1674201600,0,UpperTrestles,5842041f4e65fad6a7708887,2023-01-20
0,1674201600,0,SanClementeStateBeach,5842041f4e65fad6a77088cf,2023-01-20
0,1674201600,0,LowerTrestles,5842041f4e65fad6a770888a,2023-01-20
0,1674201600,2,SteamerLane,5842041f4e65fad6a7708805,2023-01-20
0,1674201600,0,TStreetOverview,5842041f4e65fad6a7708830,2023-01-20
0,1674201600,0,Church,5842041f4e65fad6a770888b,2023-01-20
0,1674201600,0,Strands,5842041f4e65fad6a77088d5,2023-01-20
0,1674201600,2,Jacks,5842041f4e65fad6a770880b,2023-01-20
0,1674201600,0,SaltCreekOverview,5842041f4e65fad6a770882e,2023-01-20
