In [43]:
# Artificial Intellegence HW1 
# Jan 20, 2023
# Booth 32200
# Nic Marlton

### Psuedo Code
# dependencies
# dictionary of spot ids (pulled mannually from surfline.com)
# use spot ids list to query surfline api
    # note: either the conditions url or the wave url could be used
    # `conditions` returns ratings 'POOR' to 'EPIC', whereas `wave`
    # returns an optimal rating 0, 1, 2. I chose the optimal rating
    # given its relative simplicity
    # helpful overview of api below:
    # https://pkg.go.dev/github.com/mhelmetag/surflinef/v2#section-readme
# assign surfline ratings to photos
    # in a future itteration of this tool, I would want to pull photos
    # and ratings in real time. that would make this step unnecessary. 
    # for the purposes of this proof of concept, I am mannually assigning 
    # ratings. 
# sort 90% of photos into 4 X 4 - good v bad X crowded v quiet
# store remaining 10 % for test data

### Process after this point occurs in Google's Teachable Machine 

In [44]:
# Dependencies
import pandas as pd
import numpy as np
import requests
from random import randint


In [45]:
# Dictionary of spotIds 
spot_id_dict = {
    'PlesurePoint':'5842041f4e65fad6a7708807',
    'UpperTrestles':'5842041f4e65fad6a7708887',
    'SanClementeStateBeach':'5842041f4e65fad6a77088cf',
    'LowerTrestles':'5842041f4e65fad6a770888a',
    'SteamerLane':'5842041f4e65fad6a7708805',
    'TStreetOverview':'5842041f4e65fad6a7708830',
    'Church':'5842041f4e65fad6a770888b',
    'Strands':'5842041f4e65fad6a77088d5',
    'Jacks':'5842041f4e65fad6a770880b',
    'SaltCreekOverview':'5842041f4e65fad6a770882e',
    # 'Hightower':'584204214e65fad6a7709cbd', // excluding Hightower given it is in FL 
    'Lowers':'5842041f4e65fad6a770888a',
    # 'VenicePierSouthside':'5842041f4e65fad6a7708b17' // excluding Venice Pier given it is in FL
}

In [59]:
# Use spot_id_dict to query surfline api
def get_wave_data(spot_id):
    """
    Executes an API query to surfline.com for a given spotId, e.g.
    'PlesurePoint':'5842041f4e65fad6a7708807'. Specifically, it uses 
    the wave url

    Input:
        - spot_id from spot_id_dict

    Output:
        - 2 column df timestamp and optimalScore for given beach.
    """
    # API Call to sufrline
    login_base_url = 'https://services.surfline.com/trusted/token'
    base_url = 'https://services.surfline.com/kbyg/regions/forecasts/wave'
    response = requests.request("GET", base_url + '?spotId=' + spot_id + '&days=1')
    
    # Transform data for output in 
    df = pd.json_normalize(response.json()['data'],record_path =['wave'])
    df = df[['timestamp','surf.optimalScore']]
    return df.head(1)

def capture_current_conditions(spot_id_dict):
    """
    Captures wave data for a given day by running get_wave_data() 
    for each record in spot_id_dict. In a future version of this module
    it could also save records to a file or db for use in photo labeling

    Input: 
        - takes spot_id_dict as input.

    Output:
        - df of optimalScore and metadata for spots in spot_id_dict for a 
        given date
    """
    forecast_df = pd.DataFrame()
    for i in spot_id_dict:
        spot_forecast_df = get_wave_data(spot_id_dict[i])
        spot_forecast_df['spot'] = i
        spot_forecast_df['spot_id'] = spot_id_dict[i]
        spot_forecast_df['date'] = pd.Timestamp.today().date()
        forecast_df = pd.concat([forecast_df,spot_forecast_df])

    return forecast_df

forecast_df = capture_current_conditions(spot_id_dict)

In [74]:
# Sample output for current date
forecast_df.to_csv('forcast_data.csv')

In [86]:
# Concatenate data to create list of files w & w/o surfers 
# I did not include these csv files in the git repo because of their size.
# they can be downlaoded with the photo data here:
# https://universe.roboflow.com/surfline/surfer-spotting/dataset/1
def create_file_name_df():
    # Concatenate all file names from full Roboflow data set (~40k photos)
    df1 = pd.read_csv('test_annotations.csv') # csv describing Roboflow test data
    df2 = pd.read_csv('train_annotations.csv') # csv describing Roboflow train data 
    df3 = pd.read_csv('valid_annotations.csv') # csv describing Roboflow validation data
    annotations_df = pd.concat([df1,df2,df3],ignore_index=True)
    
    # Remove rows for non-CA beaches and NaN
    annotations_df['filename'] = annotations_df['filename'].apply(lambda x: np.nan 
            if str(x).__contains__('VenicePierSouthside') or str(x).__contains__('VenicePierSouthside') else x)
    df_mask = annotations_df['filename'].isna()
    annotations_df = annotations_df[~df_mask]
    
    # Get count of surfers in each file
    annotations_df = annotations_df.groupby(['filename']).agg({'filename':'count'})#drop_duplicates().reset_index()
    annotations_df = annotations_df.rename(columns={'filename':'count'}).reset_index()
    print(annotations_df)

# Dataframe of files that include surferss
file_name_df = create_file_name_df()

                                                filename  count
0      Church_2021_02_19_0102pm_frame_12_left_jpg.rf....      5
1      Church_2021_02_19_0102pm_frame_12_left_jpg.rf....     10
2      Church_2021_02_19_0102pm_frame_12_left_jpg.rf....     12
3      Church_2021_02_19_0102pm_frame_12_left_jpg.rf....      1
4      Church_2021_02_19_0102pm_frame_12_left_jpg.rf....      1
...                                                  ...    ...
14078  UpperTrestles_2021_02_19_1253pm_frame_9_left_j...      3
14079  UpperTrestles_2021_02_19_1253pm_frame_9_left_j...     16
14080  UpperTrestles_2021_02_19_1253pm_frame_9_right_...      1
14081  UpperTrestles_2021_02_19_1253pm_frame_9_right_...      1
14082  UpperTrestles_2021_02_19_1253pm_frame_9_right_...      1

[14083 rows x 2 columns]


In [73]:
# Select 100 images with surfers and 100 images w/o surfers
# file will code images as 0 (few surfers: < 3), or 1 (many surfers: > 2)
# Mannually code images as 0 (poor surf), 2 (good surf) 
    # note: we are ignoring the 1 optimalScore rating available on surfline
    # in order to simplify our classification
def choose_200_files():
    files_to_encode_df = pd.DataFrame()
    for i in range (100):
        randint(0,len())


zsh:1: bad pattern: [surfline_api_call.py,


In [None]:

# %%! ls .
# # myvar