- Requirements.txt should contain the following (other modules are included in the standard Python library):
    - requests
    - pandas
    - pytz

In [91]:
################################################################################
# make these imports into a set of requirements (see above)
################################################################################

import requests
import pandas as pd
from datetime import date, timedelta, datetime
import pytz
import json

In [92]:
################################################################################
# set and create some variables
################################################################################

# Coordinates for [location]: 
latitude = '55.951009'
longitude = '-3.100191'

# Set the start date to current date:
start_date = date.today()

In [93]:
################################################################################
# function to fetch sunrise and sunset data
################################################################################

def fetch_sunrise_sunset_data(date_str):
    """Fetch sunrise and sunset times from API."""
    url = f'https://api.sunrise-sunset.org/json?lat={latitude}&lng={longitude}&date={date_str}&formatted=0'
    response = requests.get(url)

    # Return JSON data if request is successful, else print error and return None
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching sunrise/sunset data: {response.status_code}")
        return None

################################################################################
# function to convert times to local
################################################################################

def convert_to_bst(time_str):
    """Convert GMT time string to BST."""
    gmt = pytz.timezone('GMT')
    bst = pytz.timezone('Europe/London')
    gmt_time = datetime.fromisoformat(time_str).replace(tzinfo=gmt)
    bst_time = gmt_time.astimezone(bst)
    return bst_time.strftime('%H:%M')

################################################################################
# use the functions to fetch times for one week
################################################################################

sun_data = []

for i in range(7):
    # Calculate the date for each day in the range
    current_date = start_date + timedelta(days=i)
    date_str = current_date.isoformat()

    # Fetch sunrise and sunset data for the current date
    sun_info = fetch_sunrise_sunset_data(date_str)

    # If data is successfully fetched, convert times to BST and add to sun_data
    if sun_info and 'results' in sun_info:
        sun_data.append({
            'date': date_str,
            'sunrise': convert_to_bst(sun_info['results']['sunrise']),
            'sunset': convert_to_bst(sun_info['results']['sunset'])
        })
    else:
        print(f"No sun data fetched for date: {date_str}")


In [94]:
################################################################################
# put the data in a dataframe
################################################################################

df = pd.DataFrame(sun_data)


In [95]:
################################################################################
# use the date column as the index
################################################################################

df.set_index('date', inplace=True)

In [96]:
# convert the index to datetime
df.index = pd.to_datetime(df.index)

# create a new column for the day of the week
df['day_of_week'] = df.index.day_name()

# Define the new order of the columns with 'day_of_week' at the beginning
column_order = ['day_of_week', 'sunrise', 'sunset']

# Reorder the DataFrame
df = df[column_order]

In [97]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

url = 'https://www.tidetime.org/europe/united-kingdom/portobello.htm'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table in the soup.
table = soup.find('table', {'id': 'tideTable'})

# Find all the table rows in the table.
rows = table.find_all('tr')

# Initialize an empty list to hold the data.
data = []

# Initialize a counter for the day of the week.
day = 0

# Loop through each row.
for row in rows:
    # Find all the table data cells in the row.
    cells = row.find_all('td')

    # Loop through each cell.
    for cell in cells:
        # Increment the day counter.
        day += 1

        # Find all the high and low tides in the cell.
        tides = cell.find_all('li', {'class': ['highTide', 'lowTide']})

        # Loop through each tide.
        for tide in tides:
            # Get the tide type, time and height from the tide text.
            tide_type = tide.find('span', {'class': 'tidal-state'}).text
            tide_time = tide.find('strong').text.replace(tide_type, '').strip()
            tide_height = tide.text.split(')')[0].split('(')[-1]

            # Add the day, tide type, time and height to the data list.
            data.append([day, tide_type, tide_time, tide_height])

# Convert the data list into a DataFrame.
df_tides = pd.DataFrame(data, columns=['Day', 'Tide Type', 'Tide Time', 'Tide Height'])

df_tides.drop('Tide Type', axis=1, inplace=True)

# print(df)
# print(df_tides)

In [98]:
df_tides_pivot = df_tides.assign(
    col_id=df_tides.groupby('Day').cumcount().add(1)
).pivot_table(
    index='Day',
    columns='col_id',
    values=['Tide Time', 'Tide Height'],
    aggfunc='first'
)

# Sort MultiIndex columns by level 1 (col_id) then by level 0 ('Tide Time'/'Tide Height')
# This will result in 'Tide Time' coming before 'Tide Height' for each col_id
df_tides_pivot = df_tides_pivot.sort_index(axis=1, level=[1, 0])

# Remove the 'Day' column
df_tides_pivot = df_tides_pivot.drop('Day', axis=1, errors='ignore')

# Flatten the MultiIndex to join the two levels into a single level
# Separator between the levels is '_'
df_tides_pivot.columns = df_tides_pivot.columns.map(lambda x: f'{x[0]}_{x[1]}')



In [99]:
# Create a list of column names in the desired order
ordered_columns = ['Tide Time_1', 'Tide Height_1', 'Tide Time_2', 'Tide Height_2', 'Tide Time_3', 'Tide Height_3', 'Tide Time_4', 'Tide Height_4']

# Reorder the columns in the DataFrame
df_tides_pivot = df_tides_pivot[ordered_columns]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7 entries, 1 to 7
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Tide Time_1    7 non-null      object
 1   Tide Height_1  7 non-null      object
 2   Tide Time_2    7 non-null      object
 3   Tide Height_2  7 non-null      object
 4   Tide Time_3    7 non-null      object
 5   Tide Height_3  7 non-null      object
 6   Tide Time_4    5 non-null      object
 7   Tide Height_4  5 non-null      object
dtypes: object(8)
memory usage: 504.0+ bytes
    Tide Time_1 Tide Height_1 Tide Time_2 Tide Height_2 Tide Time_3  \
Day                                                                   
1       06:07am         5.28m     12:24pm         0.88m     06:56pm   
2       12:47am         1.51m     07:01am         5.16m     01:25pm   
3       01:48am          1.7m     07:59am         5.03m     02:30pm   
4       02:50am         1.82m     09:05am         4.92m     03:31pm   

In [100]:
# Define a function to convert the time to datetime.time
def convert_time(time_str):
    if pd.isna(time_str):
        return pd.NaT
    else:
        return pd.to_datetime(time_str.replace('am', ' AM').replace('pm', ' PM'), format='%I:%M %p').time()

# Apply the function to the time columns
for i in range(1, 5):
    df_tides_pivot[f'Tide Time_{i}'] = df_tides_pivot[f'Tide Time_{i}'].apply(convert_time)

# Apply lambda function to remove 'm' and convert to float for the height columns
for i in range(1, 5):
    df_tides_pivot[f'Tide Height_{i}'] = df_tides_pivot[f'Tide Height_{i}'].str.replace('m', '', regex=True).astype(float)


In [106]:
df_merged = pd.concat([df.reset_index(), df_tides_pivot.reset_index()], axis=1)
df_merged.drop(columns=['Day'], inplace=True)

df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           7 non-null      datetime64[ns]
 1   day_of_week    7 non-null      object        
 2   sunrise        7 non-null      object        
 3   sunset         7 non-null      object        
 4   Tide Time_1    7 non-null      object        
 5   Tide Height_1  7 non-null      float64       
 6   Tide Time_2    7 non-null      object        
 7   Tide Height_2  7 non-null      float64       
 8   Tide Time_3    7 non-null      object        
 9   Tide Height_3  7 non-null      float64       
 10  Tide Time_4    5 non-null      object        
 11  Tide Height_4  5 non-null      float64       
dtypes: datetime64[ns](1), float64(4), object(7)
memory usage: 800.0+ bytes


- DONE - Add a Day Of The Week column

- DONE - Fetch the Tides webpage using Python's requests library.

- DONE - Parse the HTML: use Beautiful Soup to parse the HTML and find the table containing the tide data.

- DONE - Extract and structure the data: go through cell by cell to extract the tide data and put it into a structured format. This might involve creating a Pandas DataFrame, or perhaps a more specialized data structure that's suited to your needs.

- Convert to the desired format: If necessary, you may need to convert the times to a different format, or perform other transformations on the data to prepare it for your analysis.

- py_Tides_old_remains contains the old bits of code for weather and visualisation.  They won't map directly but will hopefully offer some guidance on what remains to do.