In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

import pandas as pd
import csv

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def extract_ship_info(imo_numbers):
    # Initialize the Chrome driver
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)

    # Initialize an empty dataframe
    df = pd.DataFrame()

    for imo_number in imo_numbers:
        # Access the page
        url = f"https://www.balticshipping.com/vessel/imo/{imo_number}"
        driver.get(url)

        # Get the complete HTML content of the page
        html_content = driver.page_source

        # Create a BeautifulSoup object to parse the HTML
        soup = BeautifulSoup(html_content, "html.parser")

        # Find the div with the class "ship-info-container"
        ship_info_container = soup.find("div", {"class": "ship-info-container", "style": "position: relative;"})

        if ship_info_container:
            # Find the table within the div
            ship_info_table = ship_info_container.find("table", {"class": "table ship-info", "style": "min-height: 710px;"})

            if ship_info_table:
                # Extract the data from the table
                data = {}
                rows = ship_info_table.find_all("tr")
                for row in rows:
                    cells = row.find_all("td")
                    headers = row.find_all("th")
                    if headers and cells:
                        data[headers[0].text.strip()] = [cell.text.strip() for cell in cells]

                # Convert the data to a dataframe
                new_df = pd.DataFrame(data)

                # Concatenate the new dataframe with the main dataframe
                df = pd.concat([df, new_df], ignore_index=True)

            else:
                print(f"Could not find the ship information table for IMO {imo_number}")
        else:
            print(f"Could not find the ship information section for IMO {imo_number}")

    # Close the browser
    driver.quit()

    # Return the dataframe
    return df


In [None]:
#abrir arquivo csv com os imo numbers imos.csv, para cara valor da coluna Imos, amarzenar em uma lista
with open('Imos.csv', 'r') as file:
    reader = csv.DictReader(file)
    imo_numbers = [row['Imo'] for row in reader]

In [19]:
#remover valores duplicados de imo_numbers
imo_numbers = list(set(imo_numbers))

In [27]:
#contar quantos valores tem em imo_numbers
print(f"Total IMOs: {len(imo_numbers)}")
print(imo_numbers[0:10])

Total IMOs: 405
['9448360', '9898204', '9857183', '9307346', '9450363', '9315018', '9267003', '9544932', '9318010', '9308584']


In [28]:
df_imos_caracteristicas = extract_ship_info(imo_numbers)

Could not find the ship information section for IMO 9898204
Could not find the ship information section for IMO 9857183
Could not find the ship information section for IMO 9920497
Could not find the ship information section for IMO 9745653
Could not find the ship information section for IMO 9930064
Could not find the ship information section for IMO 9955789
Could not find the ship information section for IMO 9755725
Could not find the ship information section for IMO 9888601
Could not find the ship information section for IMO 9746255
Could not find the ship information section for IMO 9930038
Could not find the ship information section for IMO 9770737
Could not find the ship information section for IMO 9756731
Could not find the ship information section for IMO 9936616
Could not find the ship information section for IMO 9929431
Could not find the ship information section for IMO 9985344
Could not find the ship information section for IMO 9924211
Could not find the ship information sect

Unnamed: 0,IMO number,MMSI,Name of the ship,Former names,Vessel type,Operating status,Flag,Gross tonnage,Deadweight,Length,...,Engine type,Engine model,Engine power,Year of build,Builder,Classification society,Home port,Owner,Manager,Description
0,9448360,244129000,POOLGRACHT,HHL FREMANTLE ...,General cargo vessel,Active,Netherlands,17644 tons,19381 tons,168 m,...,MAN,7L 58/64,9800 KW,2011,"HUDONG ZHONGHUA SHIPBUILDING GROUP - SHANGHAI,...",GERMANISCHER LLOYD,AMSTERDAM,"HANSA HEAVY LIFT - HAMBURG, GERMANY","HANSA HEAVY LIFT - HAMBURG, GERMANY",POOLGRACHT is a General cargo vessel built in ...
1,9307346,308371000,LARVIK,,Crude oil tanker,Active,Bahamas,35711 tons,61213 tons,213 m,...,,,,2006,"SUMITOMO HEAVY INDUSTRIES - TOKYO, JAPAN",LLOYD'S SHIPPING REGISTER,NASSAU,"BERGSHAV - GRIMSTAD, NORWAY","BERGSHAV - GRIMSTAD, NORWAY",LARVIK is a Crude oil tanker built in 2006 by ...
2,9450363,255806170,NORTHERN JAMBOREE,NORTHERN JAMBOREE ...,Container ship,Active,Portugal,94419 tons,108827 tons,332 m,...,MAN-B&W,10K98ME-C,57100 KW,2010,DAEWOO SHIPBUILDING & MARINE ENGINEERING CO. L...,GERMANISCHER LLOYD,MONROVIA,"NORDDEUTSCHE REEDEREI - HAMBURG, GERMANY","NORDDEUTSCHE REEDEREI - HAMBURG, GERMANY",NORTHERN JAMBOREE is a Container ship built in...
3,9315018,210283000,WEC DE HOOGH,ELITE ...,Container ship,Active,Cyprus,11662 tons,13716 tons,149 m,...,Sulzer,10RND90,8400 KW,2005,"JJ SIETAS SCHIFFSWERFT - HAMBURG, GERMANY",GERMANISCHER LLOYD,HARLINGEN,"JR SHIPPING - HARLINGEN, NETHERLANDS","JR SHIPPING - HARLINGEN, NETHERLANDS",WEC DE HOOGH is a Container ship built in 2005...
4,9267003,310487000,LNG OYO,,LNG carrier,Active,Bermuda,97561 tons,83068 tons,285 m,...,,,,2005,DAEWOO SHIPBUILDING & MARINE ENGINEERING CO. L...,LLOYD'S SHIPPING REGISTER,HAMILTON,"BW GAS - OSLO, NORWAY","BW FLEET MANAGEMENT - LYSAKER, NORWAY",LNG OYO is a LNG carrier built in 2005 by DAEW...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,9437309,351734000,ABB VANESSA,HR FREQUENCY ...,General cargo vessel,Active,Panama,9611 tons,12629 tons,139 m,...,MAK,6M43C,5400 KW,2009,"QINGSHAN SHIPYARD - WUHAN, CHINA",GERMANISCHER LLOYD,MONROVIA,"HAMMONIA REEDEREI - HAMBURG, GERMANY","HAMMONIA REEDEREI - HAMBURG, GERMANY",ABB VANESSA is a General cargo vessel built in...
339,9294991,636021155,MSC MUMBAI,NORTHERN JADE ...,Container ship,Active,Liberia,94483 tons,108106 tons,332 m,...,MAN-B&W,12K98ME-C,68470 KW,2005,DAEWOO SHIPBUILDING & MARINE ENGINEERING CO. L...,GERMANISCHER LLOYD,HAMBURG,"NORDDEUTSCHE REEDEREI - HAMBURG, GERMANY","NORDDEUTSCHE REEDEREI - HAMBURG, GERMANY",MSC MUMBAI is a Container ship built in 2005 b...
340,8616623,219483000,FINOLA,MV FINOLA ...,Livestock carrier,Active,Denmark,3228 tons,1748 tons,85 m,...,,,,1987,"ORSKOV YARD - FREDERIKSHAVN, DENMARK",BUREAU VERITAS,EGERNSUND,"CORRAL LINE - EGERNSUND, DENMARK","CORRAL LINE - EGERNSUND, DENMARK",FINOLA is a Livestock carrier built in 1987 by...
341,9238741,636016429,MSC MARGARITA,USC MA WARITA X ...,Container ship,Active,Liberia,66500 tons,71135 tons,277 m,...,MAN-B&W,12K90MC,74640 KW,2002,SAMSUNG SHIPBUILDING & HEAVY INDUSTRIES CO. LT...,GERMANISCHER LLOYD,MONROVIA,,,MSC MARGARITA is a Container ship built in 200...
