# Sentinel Data

> Retrieving data from a sentinel server
- prettify: true

In [38]:
from bs4 import BeautifulSoup
import urllib.request
import xml.etree.ElementTree as ET
import geopandas as gpd
from shapely.geometry import Point, Polygon
import pandas as pd
import threading 
from datetime import datetime 
import builtins
from shapely.wkt import loads
debugMe = False

In [39]:
# Define the URL and KML identifier strings
s1_url = 'https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-1/observation-scenario/acquisition-segments'
s1_kml_str = 's1a_mp_user'
s2_url = 'https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2/acquisition-plans'
s2a_kml_str = 's2a_mp_acq__kml'
s2b_kml_str = 's2b_mp_acq__kml'

In [40]:
# Create Polygon from coordinates function 
def create_polygon(coords):
    points = []
    for coord in coords.split():
        lon, lat, _ = coord.split(',')
        points.append((float(lon), float(lat)))
    return Polygon(points)

In [52]:
def getLinks(url, kml_str):
        # Import Sentinel URL
    r = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(r, "html.parser")
    # Extract KML links from URL
    kml_links = soup.find_all('a', href=lambda href:  kml_str in href.lower())
    print('kml_links: ', kml_links) 
    # Create a list to store the kml_content
    kml_contents = []
    # Iterate over the links
    for kml_link in kml_links:
        # Retrieve the URL of each KML file
        kml_url = 'https://sentinels.copernicus.eu' + kml_link['href']
        # Open the KML file
        kml_response = urllib.request.urlopen(kml_url)
        # Read the content of the KML file
        kml_content = kml_response.read()
        # Append the kml_content to the list
        kml_contents.append(kml_content)

In [47]:
def sentinel_scraper(url, kml_str):
    if debugMe: print('sentinel_scraper: ', kml_str, url)
    kml_content = getLinks(url, kml_str)
    
    # Parse the KML content
    root = ET.fromstring(kml_content)

    # Define the XML namespace
    namespace = {'kml': 'http://www.opengis.net/kml/2.2'}

    # Extract latitude, longitude, time, mode and polarisation 
    data = [] # will create a list of dictionaries. 
    # Each dictionary in the list represents an observation, and the keys of the dictionary become columns in the DataFrame.
    
    for placemark in root.findall('.//kml:Placemark', namespace):
        coordinates = placemark.find('.//kml:coordinates', namespace).text.strip()
        time_start = placemark.find('.//kml:begin', namespace).text
        time_end = placemark.find('.//kml:end', namespace).text
        mode = placemark.find('.//kml:Data[@name="Mode"]/kml:value', namespace).text
        # Check if 's1a' is in the kml_url
        if 's1a' in kml_url:
            polarisation = placemark.find('.//kml:Data[@name="Polarisation"]/kml:value', namespace).text
            
            # Create a dictionary for the current data
            current_data = {'coordinates': coordinates, 'time_start': time_start, 'time_end': time_end,
                            'mode': mode, 'polarisation': polarisation}
        else:
            # Create a dictionary for the current data
            current_data = {'coordinates': coordinates, 'time_start': time_start, 'time_end': time_end, 'mode': mode}
        
        # Check if the current_data already exists in the data list
        if current_data not in data:
            # Append the current_data to the list if it doesn't exist
            data.append(current_data)
        else:
            # Print a message indicating that the data already exists
            print("Data already exists:", current_data)



    # Convert the data to a DataFrame
    df = pd.DataFrame(data)

    # Create geometry column 
    df['geometry'] = df['coordinates'].apply(create_polygon)

    # Create a GeoDataFrame with Polygon geometry
    gdf = gpd.GeoDataFrame(df, geometry='geometry') 

    # Apply the filter for mode and polarisation, depending on s1 or s
    if 's1a' in kml_str:
        gdf_filtered = gdf[(gdf['mode'] == 'IW') & (gdf['polarisation'] == 'DV')]
    else: 
        gdf_filtered = gdf[(gdf['mode'] == 'NOBS')]

    # Format GeoDataFrame
    gdf_clean = gdf_filtered.drop('coordinates', axis=1)
    gdf_final = gdf_clean.drop('mode', axis=1)
    if 's1a' in kml_str:
        gdf_final = gdf_final.drop('polarisation', axis=1)
        gdf_final['satellite'] = 'Sentinel 1'
    elif 's2a' in kml_str:
        gdf_final['satellite'] = 'Sentinel 2A'
    else:                    
        gdf_final['satellite'] = 'Sentinel 2B'

    return(gdf_final)

In [None]:

def trim_string(s, max_length):
    if len(s) > max_length:
        return s[:max_length]
    return s

In [48]:
# Function to combine GeoDataFrames
def combine_gdfs(s1_url, s1_kml_str, s2_url, s2a_kml_str, s2b_kml_str):
    s1_gdf = sentinel_scraper(s1_url, s1_kml_str)
    print(type(s1_gdf))
    s2a_gdf = sentinel_scraper(s2_url, s2a_kml_str)
    print(type(s2a_gdf))
    s2b_gdf = sentinel_scraper(s2_url, s2b_kml_str)
    print(type(s2b_gdf))
    combined_gdf = pd.concat([s1_gdf, s2a_gdf, s2b_gdf], ignore_index=True)
    return combined_gdf

In [49]:
def scrape_and_update_data(): 
    combined_gdf = combine_gdfs(s1_url, s1_kml_str, s2_url, s2a_kml_str, s2b_kml_str)

    # Convert 'time_start' and 'time_end' columns to datetime objects
    # Previously: combined_gdf['time_start'] = pd.to_datetime(combined_gdf['time_start'])
    max_length = 10 
    combined_gdf['time_start'] = combined_gdf['time_start'].apply(lambda x: trim_string(str(x), max_length))
    combined_gdf['time_start'] = pd.to_datetime(combined_gdf['time_start'])
    
    combined_gdf['time_end'] = combined_gdf['time_end'].apply(lambda x: trim_string(str(x), max_length))
    combined_gdf['time_end'] = pd.to_datetime(combined_gdf['time_end'])

    # Filter out times based on the current time
    current_time = datetime.now()
    combined = combined_gdf[combined_gdf['time_start'] >= current_time]

    final_gdf = gpd.GeoDataFrame(combined, geometry='geometry')

    # Store the GeoDataFrame in the global variable 
    scraped_data = final_gdf
    builtins.scraped_data = scraped_data 
    scraped_data.to_csv('scraped_data.csv', index=False)
    if debugMe: print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END scrape_and_update_data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", len(scraped_data))
 

In [50]:
import builtins
import geopandas as gpd
import pandas as pd 

In [51]:
scraped_data = scrape_and_update_data() 

kml_links:  [<a href="/documents/d/sentinel/s1a_mp_user_20230907t174000_20230927t194000">07 - 27 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230905t174000_20230925t194000">05 - 25 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230901t174000_20230921t194000">01 - 21 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230830t174000_20230919t194000">30 August - 19 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230828t174000_20230917t194000">28 August - 17 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230825t174000_20230914t194000">25 August - 14 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230823t174000_20230912t194000">23 August - 12 September 2023</a>, <a href="/documents/d/sentinel/s1a_mp_user_20230822t174000_20230911t194000">22 August - 11 September 2023</a>]
<class 'geopandas.geodataframe.GeoDataFrame'>
kml_links:  [<a href="/documents/d/sentinel/s2a_mp_acq__kml_20230907t12000

In [None]:
html_content = '''
    <form method="POST">
        Latitude: <input type="number" name="latitude" required><br>
        Longitude: <input type="number" name="longitude" required><br>
        Auth Key: <input type="text" name="seacret_id" name="auth" required placeholder="Your Secret ID HERE"><br>
        <input id="submit" type="submit" value="Run">
    </form><br>
'''

# used inDev.. Wheras nodemon restarts the _server_ on filechange... 
# this _client_ side Javascript will resubmit the browser form (a POST REQUEST) every 8.5s. 
includeClientDebug = True
if debugMe and includeClientDebug: html_content += """
    <script>
        function refreshPage() {
            document.querySelector('input[name="latitude"]').value = 147
            document.querySelector('input[name="longitude"]').value = 8
            document.querySelector('input[name="secret_id"]').value = "TEST";
            // document.getElementById("submit").click();
        }
        setInterval(refreshPage, 3500);
    </script>"""

In [17]:
latitude = request.form['latitude']
longitude = request.form['longitude']
secret_user_id = request.form['secret_id']

latitude = float(latitude)
longitude = float(longitude)    
user_point = Point(latitude,longitude)  

filtered_gdf = final_gdf[final_gdf['geometry'].contains(user_point)] 
# Display the time associated with the filtered GeoDataFrame for the user to see
time_message = filtered_gdf['time_start'].iloc[0] if not filtered_gdf.empty else "No upcoming time available."
 

print(jsonify({"message": time_message}))


NameError: name 'request' is not defined