In [3]:
from bs4 import BeautifulSoup as bs
import requests
import sys
import pandas as pd

In [4]:
# Saving all the countries and orgazinations code into a tuple in order to concatenate each one with the sattelite url
COUNTRIES_AND_ORGANIZATIONS = ('US','CIS','ESA','EUTE','FR','GER','GLOB','IND','ITSO','JPN','PRC','SES','UK','CA')

In [7]:
# Getting all the sattelite's Norad ID 
def get_norad_id(codes):
    norad_id_list  = []
    for code in codes:
        url = f'https://www.n2yo.com/satellites/?c={code}&t=country'
        r = requests.get(url)
        soup = bs(r.content,'html.parser')
        tb = soup.find('table',class_="footable countrytab")
        all_tb_tr = tb.find_all('tr')
        for tr in all_tb_tr:
            try:
                if(tr.find_all('td',align="CENTER")[-1].string != "Unable to track"):
                    norad_id_list.append(tr.find('td',align = 'CENTER').string)
            except:
                pass       
    return norad_id_list 
   

In [15]:
norad_id_list = get_norad_id(COUNTRIES_AND_ORGANIZATIONS)

### Get the data of each country's sattelites
Saving each attribute into its specific list. Than return a dictionary with the lists inside it in order to create a dataframe easily​

In [11]:
# This function uses the norad id list in order to get the url pages and pull out the information
def get_data(noradid_list):
    name_lst = []
    type_lst = []
    norad_id_lst = []
    intl_code_lst = []
    perigee_lst = []
    apogee_lst = []
    period_lst = []
    launch_date_lst = []
    source_lst = []
    
    for norad_id in noradid_list:
        url = f'https://www.n2yo.com/satellite/?s={norad_id}'
        request = requests.get(url)
        soup = bs(request.content,'html.parser')
        

        
        """ Sattelite Perigee: """       
        try:
            perigee = soup.find('a',class_="showTip perigee").previous_sibling.strip()[2:-3]
            perigee = float(perigee.replace(',',''))
            perigee_lst.append(perigee)
        except AttributeError:
            continue
            
        
        
        """ Sattelite Apogee: """ 
        apogee = soup.find('a',class_="showTip apogee").previous_sibling.strip()[2:-3]
        apogee = float(apogee.replace(',',''))
        apogee_lst.append(apogee)
        
        
        
        """ Sattelite Period: """
        period = soup.find('a',class_="showTip period").previous_sibling.strip()[2:-7]
        period = float(period.replace(',',''))
        period_lst.append(period)

        
        
        """ Sattelite Name"""
        name = soup.find('div',id = 'satinfo').h1.string
        name_lst.append(name)
        
        
        
        """ Sattelite Type"""
        try:
            st_type = soup.find('li',class_='arrow').a.string
            type_lst.append(st_type)
        except AttributeError:
            st_type = None
            type_lst.append(st_type)

        
        
        """ Sattelite NORAD ID"""
        norad_id_lst.append(norad_id)
        
        
        
        """ Sattelite Int'l Code: """
        intl_code = soup.find('a',class_='showTip intlcode').previous_sibling.strip()[2:]
        intl_code_lst.append(intl_code)
        
        
        
        """ Sattelite Launch Date: """ 
        launch_date = soup.find('div',id="satinfo").find_all('a')[-1].string
        launch_date_lst.append(launch_date)
        
        
        
        """ Sattelite Source: """ 
        source = soup.find('div',id="satinfo").find_all('b')[9].next_element.next_element.strip()[2:]
        source_lst.append(source)
        
        
     
    data = {"Name" : name_lst, "Type" : type_lst, "Norad ID" : norad_id_lst, "Int'l Code" : intl_code_lst,
              "Perigee" : perigee_lst, "Apogee" : apogee_lst, "Period" : period_lst, "Launch Date" : launch_date_lst,
            "Source" :source_lst}
    return data
   

    

In [12]:
my_data_dict = get_data(norad_id_list)

In [13]:
df = pd.DataFrame(my_data_dict)

In [14]:
df

Unnamed: 0,Name,Type,Norad ID,Int'l Code,Perigee,Apogee,Period,Launch Date,Source
0,STARLINK-3314,Starlink,50851,2022-001BA,315.2,329.2,90.8,"January 6, 2022",United States (US)
1,STARLINK-3312,Starlink,50850,2022-001AZ,315.9,328.5,90.8,"January 6, 2022",United States (US)
2,STARLINK-3311,Starlink,50849,2022-001AY,315.4,329.3,90.8,"January 6, 2022",United States (US)
3,STARLINK-3315,Starlink,50848,2022-001AX,315.2,329.3,90.8,"January 6, 2022",United States (US)
4,STARLINK-3319,Starlink,50847,2022-001AW,315.5,329.5,90.8,"January 6, 2022",United States (US)
...,...,...,...,...,...,...,...,...,...
7084,ANIK A1 (TELESAT 1),Geostationary,6278,1972-090A,36139.7,36268.0,1457.1,"November 10, 1972",Canada (CA)
7085,ISIS 2,Space & Earth Science,5104,1971-024A,1363.3,1432.4,113.6,"April 1, 1971",Canada (CA)
7086,ISIS 1,Brightest,3669,1969-009A,585.0,3454.7,127.5,"January 30, 1969",Canada (CA)
7087,ALOUETTE 2,Space & Earth Science,1804,1965-098A,508.1,2642.9,117.5,"November 29, 1965",Canada (CA)


In [111]:
df.to_csv("Sattelites_Dataset.csv")