<a href="https://colab.research.google.com/github/khalil-alexander/get_forecast/blob/main/DataAnalytics2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## <font color="red"> Pip install Astropy in google colab <font/>
### <font color="Green"> We install astropy in order to convert kilometre to longitatide later <font/>

In [1]:
!pip install astropy
!pip install country_converter
!pip install shapely
!pip install cartopy
!pip install geopandas
!pip install movingpandas
!pip install hvplot
!pip install holoviews
!pip install geoviews
!pip install global_land_mask
!pip install reverse_geocode



## <font color="red"> Install Moudles in google colab <font/>

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from astropy import coordinates as coord
from astropy import units as u
from astropy import time
from astropy.time import Time
from astropy.coordinates import Angle

In [4]:
import pandas as pd
from bs4 import BeautifulSoup
import requests as reqs
import xml.etree.ElementTree as ET
import io
import pprint
import json
import datetime

In [5]:
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from geopy.geocoders import Nominatim

In [6]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.cm as mcm

In [7]:
import cartopy
import cartopy.crs as ccrs
import country_converter as coco
from cartopy.feature.nightshade import Nightshade
from global_land_mask import globe

In [8]:
from shapely import geometry as shpgeom
from shapely import wkt as shpwkt

In [9]:
import holoviews as hv
import hvplot.pandas
import geoviews

In [10]:
import os
from pathlib import Path
import datetime as dt
from datetime import timezone

In [11]:
from datetime import datetime
import datetime as dt
from datetime import timezone

In [12]:
import xarray as xr

In [13]:
from global_land_mask import globe
import reverse_geocode

In [14]:
def create_latlon_positions(date: str, xyz: list[float]):
    now = Time(date, scale='utc')
    cartrep = coord.CartesianRepresentation(*xyz, unit=u.km)

    gcrs = coord.GCRS(cartrep, obstime = now)
    itrs = gcrs.transform_to(coord.ITRS(obstime = now))
    loc = coord.EarthLocation(*itrs.cartesian.xyz)

    return Angle(loc.lat).deg, Angle(loc.lon).deg


In [15]:
def get_latlon(row: pd.Series):
    date = row['Date'].strftime('%Y-%m-%d %H:%M:%S')
    xyz = [row['x units(km)'], row['y units(km)'], row['z units(km)']]
    return create_latlon_positions(date, xyz)




## <font color="red"> read_xml_DateFrame <font/>
### <font color="green">A function that reads an xml file and returns the specfic position the ISS is at during different time. <font/>

<font color="blue"> We pass in the url of an XML file:https://nasa-public-data.s3.amazonaws.com/iss-coords/current/ISS_OEM/ISS.OEM_J2K_EPH.xml
<font color="blue"> We use the XML file to webscrap data on the the position of the ISS and time the ISS was in that posistion.

<font color="blue"> In order to clean up the data we use Beautifulsoup

### <font color="green">What is Beautifulsoup? <font/>

<font color="blue">Beautifulsoup is a python libary that is used to pull XML and HTML files. In this case we want to pull out an XML file. <font/>

<font color="blue">The XML file we used to extract data from: https://nasa-public-data.s3.amazonaws.com/iss-coords/current/ISS_OEM/ISS.OEM_J2K_EPH.xml <font/>

In [16]:
def read_xml_DataFrame(url):
    """
    Do web scraping from an XML file to obtain time the International Space
    Station will be at a specfic postion and the speed it is going at.

    Parameters
    ----------
    url : String
        The url of the data in XML format you want to extract from.

    Return
    ------
    df : Pandas DataFrame
        The DataFrame containing the time the International Space Station is at
        a specfic position and the speed it is traveling.
    """
    # Get the xml request
    xml = reqs.get(url)

    # Use BeautifulSoup to Organize the XML file
    soup = BeautifulSoup(xml.content,"lxml")

    # Finds all the instances 'statevector' appeared in the xml file
    state_tag = soup.find_all('statevector')

    col_name = ['Date','x units(km)',
                'y units(km)','z units(km)',
                'x_dot(km/s)',
                'y_dot(km/s)','z_dot(km/s)']
    df = pd.DataFrame(columns=col_name)

    #Loops thorugh all elements to add rows in the empty xml_df
    for element in state_tag:
        # Turns element into text and splits the '\n'
        element = element.text.split('\n')
        # Deletes First empty String
        element.pop(0)
        # Deletes Last empty Sting
        element.pop(-1)
        # Removes the 'Z' in the elements first index
        element[0] = element[0][:-1]

        # Appending list into DataFrame by adding 1 row to the DataFrame
        #df = xml_df.append(pd.DataFrame(element, columns=col_name), ignore_index=True)
        df.loc[len(df)] = element


    # Turns the "Date" collumn into a Datetime object
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%jT%H:%M:%S.%f')
    # Sets the "Date" collumn into a Datetime object
    #df.set_index("Date", inplace=True)

    # Returns the Dataframe
    return df

In [17]:
xml_url = "https://nasa-public-data.s3.amazonaws.com/iss-coords/current/ISS_OEM/ISS.OEM_J2K_EPH.xml"
%time df = read_xml_DataFrame(xml_url)

CPU times: user 10.4 s, sys: 151 ms, total: 10.5 s
Wall time: 11.4 s


In [18]:
df

Unnamed: 0,Date,x units(km),y units(km),z units(km),x_dot(km/s),y_dot(km/s),z_dot(km/s)
0,2025-02-12 12:00:00,1032.86303391617,-4135.01655227222,5285.24786080595,7.16296898488722,2.6374691055433002,0.66471433707796002
1,2025-02-12 12:04:00,2693.5587421218202,-3359.4192497927902,5250.1913759590498,6.5917212472323703,3.7863673738742798,-0.95502205589357003
2,2025-02-12 12:08:00,4158.4600933256697,-2339.6317437399498,4832.4175386834704,5.5412630870164703,4.66003318550072,-2.5051758306921501
3,2025-02-12 12:12:00,5321.0206661877801,-1149.7539262479099,4062.3141148320401,4.0874925446178603,5.1951267094584699,-3.8731644775054899
4,2025-02-12 12:16:00,6096.55352743612,123.73958115751,2995.9081624519899,2.33567153768014,5.3525206292160297,-4.9592151147897399
...,...,...,...,...,...,...,...
6082,2025-02-27 11:45:00,4725.6122273278197,352.30775819179598,-4876.7302152288303,-3.0171612421375902,6.5948530993270298,-2.4423904537669201
6083,2025-02-27 11:49:00,3839.1599846392301,1903.17044275832,-5278.7259780336399,-4.3249845490169596,6.2504577260547203,-0.88721536660181999
6084,2025-02-27 11:53:00,2674.73145218746,3316.2289606109498,-5297.4214788776399,-5.3196592851300499,5.4534040548973604,0.73246350063873
6085,2025-02-27 11:57:00,1316.58492360587,4489.0743177531904,-4931.3291171098199,-5.9294790985872803,4.2606771881374801,2.2999334681557699


In [19]:
df[['lat','lon']] = df.apply(get_latlon, axis=1, result_type='expand')

In [20]:
df = df[['Date','lat','lon']]

In [21]:
df

Unnamed: 0,Date,lat,lon
0,2025-02-12 12:00:00,51.324729,-38.612693
1,2025-02-12 12:04:00,50.905665,-14.883611
2,2025-02-12 12:08:00,45.665062,6.091809
3,2025-02-12 12:12:00,37.041021,22.307132
4,2025-02-12 12:16:00,26.449077,34.684130
...,...,...,...
6082,2025-02-27 11:45:00,-45.862328,30.763534
6083,2025-02-27 11:49:00,-50.981686,51.800327
6084,2025-02-27 11:53:00,-51.277893,75.480906
6085,2025-02-27 11:57:00,-46.647216,97.013203


In [22]:
def create_latlon_positions(date: str, xyz: list[float]):
    now = Time(date, scale='utc')
    cartrep = coord.CartesianRepresentation(*xyz, unit=u.km)

    gcrs = coord.GCRS(cartrep, obstime = now)
    itrs = gcrs.transform_to(coord.ITRS(obstime = now))
    loc = coord.EarthLocation(*itrs.cartesian.xyz)

    return Angle(loc.lat).deg, Angle(loc.lon).deg


In [23]:
def get_latlon(row: pd.Series):
    date = row['Date'].strftime('%Y-%m-%d %H:%M:%S')
    xyz = [row['x units(km)'], row['y units(km)'], row['z units(km)']]
    return create_latlon_positions(date, xyz)




In [24]:
# Writing a function to return if there is land in a given latitude/longitude
# Function returns 1 if there is land and returns 0 if there is ocean.
def get_land_flag(row):
    """
    Extract the land_flag given the latitude/longitude information.
    This function provides if there is land in a given latitude/longitude. If
    not then we can assume the given latitude/longitude is on the ocean.

    Parameters
    ----------
    lat : float
       Latitude of the location
    lon : float
       Longitude of the location

    Returns
    -------
    land_flag : boolen
       If land_flag is true or not
    """
    # Gets checks if there is land in the given latitude/longitude
    land_flag = globe.is_land(row['lat'],row['lon'])
    # Returns land flag
    return land_flag

In [25]:
# Getting the name of a country with latitude/longitude
# Returns the name of the country
def get_country_name(lat: float, lon: float) -> str:
    """
    Extract the country name given the latitude/longitude information.
    This function provides a country name even when a location is on
    the ocean. We wish it was not the case.

    Parameters
    ----------
    lat : float
       Latitude of the location
    lon : float
       Longitude of the location

    Returns
    -------
    country : str
       Country name (empty string if no country)
    """
    # Turn latitude and longitude into a tuple
    lat_lon = (lat, lon),
    # Get location with geocode
    loc_name = reverse_geocode.search(lat_lon)
    # Returns Country name
    return loc_name[0].get('country', '')

In [26]:
# Using the function get_county_name we obtain the country if land_flag is true
# Returns name of country if land_flag is true, otherwise returns 'Ocean'
def obtain_country_name(row):
    """
    Extract the country name given the latitude/longitude information.
    This function provides a country name even when a location is on
    the ocean. We wish it was not the case.

    Parameters
    ----------
    row: Pandas Dataframe
       Dataframe expected to contain the latitude and longitude

       lat : float
         Latitude of the location
       lon : float
         Longitude of the location

    Returns
    -------
    country : str
       Country name (empty string if no country)
    """
    # Checks if land_flag is true
    if row['land_flag'] == 1:
        # Gets latitude and longitude
        lat = row['lat']
        lon = row['lon']
        # Gets country name
        country = get_country_name(lat, lon)
        # Retuns country name
        return country
    else:
        # Returns ocean if land_flag is false
        return "Ocean"

In [27]:
def get_overpassed_countries(df: pd.DataFrame, country_dict: dict):
    """
    Counts the amount of countrys the ISS flys over.

    Parameters
    ----------
    df : Pandas DataFrame

    Return
    ------
    count : Int
        The number of countrys the ISS flys over.
    """
    list_countries = df["Country"].unique().tolist()
    list_countries.remove("Ocean")

    for country in list_countries:
        if country in country_dict:
            country_dict[country] += 1
        else:
            country_dict[country] = 1
    return country_dict


In [28]:
def count_countrys(df: pd.DataFrame):
    """
    Counts the amount of countrys the ISS flys over.

    Parameters
    ----------
    df : Pandas DataFrame

    Return
    ------
    count : Int
        The number of countrys the ISS flys over.
    """
    country_dict = {}
    previous_name = ""
    country_count = 0
    for name, land_flag in zip(df['Country'], df['land_flag']):
        if (previous_name != name) and (name not in country_dict) and land_flag == True:
            if name not in country_dict:
                country_dict[name] = 1
                country_count += 1

        previous_name = name
    print(country_dict)
    print(country_count)
    return country_count


In [29]:
def create_list_orbits(df: pd.DataFrame) -> list:
    """

    """
    list_orbits = list()
    temp_df = df['lon']
    first_index = 0
    second_index = 0


    for i in range(len(temp_df)-1):
        if (temp_df[i] > 0) and (temp_df[i+1] < 0):
            second_index = i+1
            list_orbits.append(df[first_index:second_index])
            first_index = second_index

    list_orbits.append(df[second_index:])


    return list_orbits

In [30]:
def get_interpolated_data(df: pd.DataFrame, seconds='20s') -> pd.DataFrame:
    """

    """

    df.rename(columns={'Date': 't'}, inplace=True)

    df.set_index('t', inplace=True)
    ds = df.to_xarray()

    start_date = df.index[0] + dt.timedelta(seconds=5)
    start_date = datetime.fromtimestamp(start_date.timestamp())
    start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S')

    end_date = df.index[-1] - dt.timedelta(seconds=5)
    end_date = datetime.fromtimestamp(end_date.timestamp())
    end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S')

    new_dates = pd.date_range(start_date, end_date, freq=seconds)
    interpolated_data = ds.interp(t=new_dates, method='linear')
    interpolated_df = interpolated_data.to_dataframe()

    return interpolated_df

In [31]:
def interpolate_orbits(df_list: list, seconds='20s') -> list:
    count = 0
    new_df_list = list()
    for element in df_list:
        element = get_interpolated_data(element, seconds)
        element['land_flag'] = element.apply(get_land_flag, axis=1, result_type='expand')
        element['Country'] = element.apply(obtain_country_name, axis=1, result_type='expand')
        new_df_list.append(element)


    return new_df_list



In [32]:
def get_all_countrys(df_list: list) -> dict:
    """

    """
    country_dict = dict()
    for df in df_list:
        country_dict = get_overpassed_countries(df, country_dict)
    return country_dict

In [33]:
def get_all(df: pd.DataFrame, seconds='20s') -> dict:
    """
    Reads the dataframe and returns dictionary of countries the ISS flys over.
    Parameters
    ----------
    df : Pandas DataFrame

    Return
    ------
    country_dict : dict
        A dictionary of countrys the ISS flys over.
    """
    # Make a deep copy of the dataframe
    mydf = df.copy()
    # Create a list of orbits with the copy of the dataframe
    orbit_list = create_list_orbits(mydf)
    # Go through each orbit and interpolate each orbit
    interpolated_list = interpolate_orbits(orbit_list, seconds=seconds)
    # Loop through the list and add columns land_flag and Country to the element
    for element in interpolated_list:
        element['land_flag'] = element.apply(get_land_flag, axis=1, result_type='expand')
        element['Country'] = element.apply(obtain_country_name, axis=1, result_type='expand')
    print(interpolated_list[0].columns)
    country_dict = get_all_countrys(interpolated_list)
    return country_dict


In [34]:
%time test_list_15 = get_all(df, seconds='15s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 37.9 s, sys: 365 ms, total: 38.2 s
Wall time: 38.7 s


In [35]:
%time test_list_20 = get_all(df, seconds='20s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 28.1 s, sys: 91.2 ms, total: 28.2 s
Wall time: 28.4 s


In [36]:
%time test_list_25 = get_all(df, seconds='25s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 22.9 s, sys: 66 ms, total: 22.9 s
Wall time: 23.1 s


In [37]:
%time test_list_30 = get_all(df, seconds='30s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 20 s, sys: 61.9 ms, total: 20 s
Wall time: 20.2 s


In [49]:
%time test_list_10 = get_all(df, seconds='10s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 54.6 s, sys: 148 ms, total: 54.7 s
Wall time: 55.6 s


In [63]:
%time test_list_5 = get_all(df, seconds='5s')

Index(['lat', 'lon', 'land_flag', 'Country'], dtype='object')
CPU times: user 1min 47s, sys: 278 ms, total: 1min 47s
Wall time: 1min 49s


In [64]:
keys15 = set(test_list_15.keys())
keys20 = set(test_list_20.keys())
keys25 = set(test_list_25.keys())
keys30 = set(test_list_30.keys())
keys10 = set(test_list_10.keys())
keys5 = set(test_list_5.keys())

In [65]:
print(len(keys15))
print(len(keys20))
print(len(keys25))
print(len(keys30))
print(len(keys10))
print(len(keys5))

169
161
161
161
171
179


In [70]:
keys30-keys5

set()

Create a table
First column: Freq
Second column: How long it takes to run: Computing time
Third Column: How many countries does the ISS pass: Number of Countries and

In [47]:
keys20-keys15

{'Equatorial Guinea', 'Guadeloupe', 'Kiribati'}

In [52]:
keys15-keys10

{'Belize',
 'French Polynesia',
 'Liechtenstein',
 'New Caledonia',
 'Trinidad and Tobago'}

In [53]:
keys10-keys15

{'Bahrain',
 'Equatorial Guinea',
 'Guadeloupe',
 'Kiribati',
 'Palau',
 'Palestinian Territory',
 'Reunion'}

In [54]:
keys10-keys20

{'Bahrain',
 'Bhutan',
 'Brunei Darussalam',
 'Comoros',
 'Martinique',
 'Palau',
 'Palestinian Territory',
 'Reunion',
 'Timor-Leste',
 'Turks and Caicos Islands'}

In [61]:
keys30-keys10

set()

In [48]:
df

Unnamed: 0,Date,lat,lon
0,2025-02-12 12:00:00,51.324729,-38.612693
1,2025-02-12 12:04:00,50.905665,-14.883611
2,2025-02-12 12:08:00,45.665062,6.091809
3,2025-02-12 12:12:00,37.041021,22.307132
4,2025-02-12 12:16:00,26.449077,34.684130
...,...,...,...
6082,2025-02-27 11:45:00,-45.862328,30.763534
6083,2025-02-27 11:49:00,-50.981686,51.800327
6084,2025-02-27 11:53:00,-51.277893,75.480906
6085,2025-02-27 11:57:00,-46.647216,97.013203


In [38]:
test_list_15

{'France': 47,
 'Italy': 26,
 'Greece': 15,
 'Egypt': 25,
 'Saudi Arabia': 27,
 'Yemen': 14,
 'Somalia': 19,
 'Mexico': 42,
 'United States': 104,
 'Canada': 82,
 'Portugal': 14,
 'Spain': 27,
 'Algeria': 36,
 'Libyan Arab Jamahiriya': 32,
 'Chad': 26,
 'Sudan': 28,
 'Central African Republic': 17,
 'South Sudan': 18,
 'Congo, The Democratic Republic of the': 29,
 'Uganda': 7,
 'Tanzania, United Republic of': 18,
 'Madagascar': 20,
 'Western Sahara': 7,
 'Morocco': 28,
 'Mauritania': 23,
 'Mali': 26,
 'Burkina Faso': 12,
 'Togo': 5,
 'Benin': 8,
 'Nigeria': 20,
 'Gabon': 9,
 'Angola': 25,
 'Namibia': 21,
 'Botswana': 15,
 'South Africa': 27,
 'Australia': 74,
 'Papua New Guinea': 18,
 'Indonesia': 58,
 'Cuba': 14,
 'Venezuela': 21,
 'Colombia': 20,
 'Brazil': 52,
 'Japan': 32,
 'Peru': 27,
 'Chile': 71,
 'Argentina': 90,
 'Uruguay': 8,
 'Myanmar': 17,
 'China': 102,
 'Russian Federation': 112,
 'India': 44,
 'Nepal': 12,
 'Mongolia': 63,
 'Zambia': 18,
 'Kenya': 16,
 'Ethiopia': 20,
 '

In [39]:
test_list_20

{'France': 45,
 'Italy': 25,
 'Greece': 15,
 'Egypt': 26,
 'Saudi Arabia': 27,
 'Yemen': 14,
 'Somalia': 19,
 'Mexico': 42,
 'United States': 103,
 'Canada': 82,
 'Portugal': 12,
 'Spain': 27,
 'Algeria': 35,
 'Libyan Arab Jamahiriya': 34,
 'Chad': 26,
 'Sudan': 28,
 'Central African Republic': 17,
 'South Sudan': 18,
 'Congo, The Democratic Republic of the': 31,
 'Uganda': 7,
 'Tanzania, United Republic of': 17,
 'Mozambique': 19,
 'Madagascar': 20,
 'Morocco': 28,
 'Mauritania': 23,
 'Mali': 26,
 'Burkina Faso': 12,
 'Togo': 5,
 'Benin': 8,
 'Nigeria': 20,
 'Gabon': 9,
 'Angola': 24,
 'Namibia': 22,
 'Botswana': 15,
 'South Africa': 27,
 'Australia': 73,
 'Papua New Guinea': 15,
 'Cuba': 11,
 'Venezuela': 21,
 'Colombia': 20,
 'Brazil': 52,
 'Indonesia': 54,
 'Japan': 33,
 'Peru': 27,
 'Chile': 69,
 'Argentina': 90,
 'Uruguay': 8,
 'Myanmar': 16,
 'China': 102,
 'Russian Federation': 113,
 'India': 42,
 'Nepal': 13,
 'Mongolia': 63,
 'Zambia': 18,
 'Kenya': 16,
 'Ethiopia': 20,
 'Dji

In [40]:
test_list_25

{'France': 43,
 'Italy': 26,
 'Egypt': 24,
 'Saudi Arabia': 27,
 'Yemen': 14,
 'Somalia': 18,
 'Mexico': 42,
 'United States': 101,
 'Canada': 81,
 'Portugal': 13,
 'Spain': 28,
 'Algeria': 36,
 'Libyan Arab Jamahiriya': 32,
 'Chad': 26,
 'Sudan': 28,
 'Central African Republic': 17,
 'South Sudan': 18,
 'Congo, The Democratic Republic of the': 30,
 'Uganda': 7,
 'Tanzania, United Republic of': 17,
 'Mozambique': 19,
 'Madagascar': 20,
 'Western Sahara': 5,
 'Morocco': 28,
 'Mauritania': 22,
 'Mali': 25,
 'Burkina Faso': 12,
 'Togo': 5,
 'Benin': 8,
 'Nigeria': 20,
 'Gabon': 9,
 'Angola': 25,
 'Namibia': 21,
 'Botswana': 15,
 'South Africa': 27,
 'Australia': 72,
 'Papua New Guinea': 16,
 'Indonesia': 56,
 'Cuba': 10,
 'Venezuela': 22,
 'Brazil': 52,
 'Japan': 28,
 'Peru': 25,
 'Chile': 67,
 'Argentina': 90,
 'Uruguay': 8,
 'Myanmar': 17,
 'China': 102,
 'Russian Federation': 113,
 'India': 43,
 'Nepal': 10,
 'Mongolia': 62,
 'Zambia': 18,
 'Kenya': 18,
 'Ethiopia': 19,
 'United Arab E

In [41]:
test_list_30

{'France': 45,
 'Italy': 23,
 'Greece': 14,
 'Egypt': 23,
 'Saudi Arabia': 27,
 'Yemen': 14,
 'Somalia': 18,
 'Mexico': 41,
 'United States': 103,
 'Canada': 82,
 'Portugal': 12,
 'Spain': 24,
 'Algeria': 36,
 'Libyan Arab Jamahiriya': 31,
 'Chad': 26,
 'Sudan': 28,
 'Central African Republic': 17,
 'South Sudan': 18,
 'Congo, The Democratic Republic of the': 26,
 'Uganda': 7,
 'Tanzania, United Republic of': 17,
 'Madagascar': 20,
 'Western Sahara': 5,
 'Mauritania': 23,
 'Mali': 26,
 'Burkina Faso': 12,
 'Togo': 4,
 'Benin': 8,
 'Nigeria': 20,
 'Gabon': 9,
 'Angola': 23,
 'Botswana': 15,
 'South Africa': 26,
 'Australia': 70,
 'Papua New Guinea': 16,
 'Indonesia': 55,
 'Cuba': 11,
 'Venezuela': 21,
 'Colombia': 20,
 'Brazil': 52,
 'Japan': 28,
 'Peru': 27,
 'Chile': 62,
 'Argentina': 90,
 'Uruguay': 8,
 'Myanmar': 14,
 'China': 102,
 'Russian Federation': 112,
 'India': 44,
 'Nepal': 10,
 'Mongolia': 62,
 'Namibia': 20,
 'Zambia': 17,
 'Kenya': 16,
 'Ethiopia': 20,
 'Djibouti': 4,
 '

In [42]:
print(test_list_15 == test_list_20)
print(test_list_15 == test_list_25)
print(test_list_15 == test_list_30)

False
False
False


Write a function that compares two dictionarys, first compre the keys of the dictionary, must be the same

The second part compare tge values of each key

In [178]:
keys15 = set(test_list_15.keys())
keys20 = set(test_list_20.keys())
print(keys20 == keys15)

False


In [179]:
keys25 = set(test_list_25.keys())
keys30 = set(test_list_30.keys())
print(keys30 == keys25)

False


In [180]:
print(keys25 == keys20)

False


In [181]:
len(test_list_15)

169

In [182]:
len(test_list_20)

161

In [183]:
len(test_list_25)

161

In [184]:
len(keys15)

169

In [185]:
len(keys20)

161

In [186]:
len(keys25)

161

In [187]:
len(keys30)

161

In [188]:
keys15-keys20

{'Belize',
 'Bhutan',
 'Brunei Darussalam',
 'Comoros',
 'French Polynesia',
 'Liechtenstein',
 'Martinique',
 'New Caledonia',
 'Timor-Leste',
 'Trinidad and Tobago',
 'Turks and Caicos Islands'}

In [189]:
keys20-keys15

{'Equatorial Guinea', 'Guadeloupe', 'Kiribati'}

In [152]:
keys25-keys20

{'Bahrain',
 'Brunei Darussalam',
 'French Polynesia',
 'New Caledonia',
 'Palestinian Territory',
 'Reunion'}

In [153]:
keys20-keys25

{'Andorra', 'Bahamas', 'Cape Verde', 'Equatorial Guinea', 'Gambia', 'Kiribati'}

In [148]:
len(keys15)

169

In [None]:
def compare_dictionarys(dict1: dict, dict2: dict) -> bool:
    """

    """
    keys1 = set(dict1.keys())
    keys2 = set(dict2.keys())


In [109]:
df

Unnamed: 0,Date,lat,lon
0,2025-02-12 12:00:00,51.324729,-38.612693
1,2025-02-12 12:04:00,50.905665,-14.883611
2,2025-02-12 12:08:00,45.665062,6.091809
3,2025-02-12 12:12:00,37.041021,22.307132
4,2025-02-12 12:16:00,26.449077,34.684130
...,...,...,...
6082,2025-02-27 11:45:00,-45.862328,30.763534
6083,2025-02-27 11:49:00,-50.981686,51.800327
6084,2025-02-27 11:53:00,-51.277893,75.480906
6085,2025-02-27 11:57:00,-46.647216,97.013203


In [94]:
def get_range(df: pd.DataFrame ,start_date: str, end_date: str):
    """
    Reads DataFrame, then takes the range between the start and end date.
    ----------
    df : Pandas DataFrame

    Return
    ------
    df : Pandas DataFrame
        A specific range of the df from a start date to an end date.
    """

    start_date = dt.datetime.strptime(start_date, '%Y-%m-%d %H:%M')

    end_date = dt.datetime.strptime(end_date, '%Y-%m-%d %H:%M')

    for element in df["Date"]:
        if element >= start_date:
            start_date = element
            break
    for element in df["Date"]:
        if element >= end_date:
            end_date = element
            break
    print(start_date)
    print(end_date)
    print(type(start_date))
    print(type(end_date))

    #start_num = df.loc[(df['Date'] >= start_date)]
    #print(start_num)
    #end_num = df.loc[(df['Date'] <= end_date)]
    #print(end_num)
    return df.loc[(df['Date'] >= start_date) & (df['Date'] <= end_date)].reset_index(drop=True)



In [95]:
get_range(df, '2025-02-13 15:00', '2025-02-14 15:00')

2025-02-13 15:00:00
2025-02-14 15:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


Unnamed: 0,Date,lat,lon
0,2025-02-13 15:00:00,-45.634388,63.712719
1,2025-02-13 15:04:00,-50.888751,84.618382
2,2025-02-13 15:08:00,-51.347658,108.293069
3,2025-02-13 15:12:00,-46.854027,129.955787
4,2025-02-13 15:16:00,-38.710107,146.946466
...,...,...,...
356,2025-02-14 14:44:00,5.776810,-167.183333
357,2025-02-14 14:48:00,17.832678,-158.099674
358,2025-02-14 14:52:00,29.281849,-147.591803
359,2025-02-14 14:56:00,39.486195,-134.369545


test with how many countries fly over the new get_range function (use 5s freqency)

create new function with country name, return how many time it will fly over that country

within the radius of 20 miles of a specific point how many times will the ISS fly over.

given latitude and longitude return how many times the ISS will fly over it

In [112]:
test_date = '2025-02-13 15:00:00'
test_date = dt.datetime.strptime(test_date, '%Y-%m-%d %H:%M:%S')

In [113]:
test_date

datetime.datetime(2025, 2, 13, 15, 0)

In [114]:
df['Date'].loc(test_date)

ValueError: No axis named 2025-02-13 15:00:00 for object type Series

In [None]:
type(test_date)

In [None]:
df['Date'].info()