In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **SHIPPING ANALYTICS**

*Data from United Nations Conference on Trade and Development (UNCTAD).*

[link here](https://unctadstat.unctad.org/EN/BulkDownload.html)

4 datasets are used in this analysis, namely:
1. Merchant fleet by country of beneficial ownership, annual
2. Merchant fleet by type of registration and by type of ship, annual
3. Ships built by country of building, annual
4. Ship scrapping by country of demolition, annual

## **CONTENTS**

*click on titles to jump to section*

[<font color=black>**1. IMPORTING & CLEANING THE DATA**</font>](#1.-IMPORTING-&-CLEANING-THE-DATA)

[1.1. Merchant fleet by country of beneficial ownership](#1.1.-Merchant-fleet-by-country-of-beneficial-ownership)

[1.2. Merchant fleet by type of registration and by type of ship](#1.2.-Merchant-fleet-by-type-of-registraton-and-by-type-of-ship)

[1.3. Ships built by country of building](#1.3.-Ships-built-by-country-of-building)

[1.4. Ship scrapping by country of demolition](#1.4.-Ship-scrapping-by-country-of-demolition)

[<font color=black>**2. VISUALS**</font>](#2.-VISUALS)

[<font color=black>*2.1. Shipowners*</font>](#2.1.-Shipowners)

[2.1.1. World fleet (number of ships)](#fig1_1)

[2.1.2. World fleet (DWT)](#fig1_2)

[2.1.3. Five largest fleets by shipowners (number of ships)](#fig1_3)

[2.1.4. Five largest fleets by shipowners (DWT)](#fig1_4)

[2.1.5. Three largest fleets as % of total world fleet](#fig1_5)

[2.1.6. Five largest fleets by flag of registration (number of ships)](#fig1_6)

[2.1.7. Five largest fleets by flag of registration (DWT)](#fig1_7)

[<font color=black>*2.2. Ship types*</font>](#2.2.-Ship-types)

[2.2.1. Types of ships, world fleet](#fig2_1)

[2.2.2. Five largest bulker fleets by flag of registration](#fig2_2)

[2.2.3. Five largest tanker fleets by flag of registration](#fig2_3)

[2.2.4. Equivalent world tanker fleet, Aframax-](#fig2_4)

[2.2.5. Equivalent world tanker fleet, Suezmax+](#fig2_5)

[<font color=black>*2.3. Ship-building*</font>](#2.3.-Ship-building)

[2.3.2. Five largest ship-builders by location](#fig3_2)

[<font color=black>*2.4. Ship-scrapping*</font>](#2.4.-Ship-scrapping)

[2.4.2. Five largest ship-scrappers by location](#fig4_2)

[2.4.3. World ship-building/scrapping](#fig4_3)

### Import libraries that will be used for the analysis:

In [None]:
# import seaborn and matplotlib for visualizations
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
# set plotting aesthetics
plt.style.use("fivethirtyeight")
# set plot size
plt.rcParams["figure.figsize"] = (20, 10)
# set font parameters
font = {"weight":"normal", "size":20}
plt.rc("font", **font)
plt.rc("axes", titlesize=25, titleweight="bold")

# *1. IMPORTING & CLEANING THE DATA*

## 1.1. Merchant fleet by country of beneficial ownership

In [None]:
# import fleet beneficial owners data
ship_owners = pd.read_csv("/kaggle/input/shipping-analytics-world-merchant-fleet/US_FleetBeneficialOwners_ST202010231134_v2.csv")
# show info of imported data frame
ship_owners.info()

There are 3 features with only null values, namely *Dead weight tons in thousands Footnote, Percentage of total fleet Footnote* and *Number of ships Footnote*, so naturally we would like to drop these features.

In [None]:
# drop null features
ship_owners = ship_owners.drop(["Dead weight tons in thousands Footnote", \
                             "Percentage of total fleet Footnote", \
                             "Number of ships Footnote"], axis = 1)

In [None]:
# rename features
ship_owners.rename(columns={"FlagOfRegistration":"FlagID", "FlagOfRegistration Label":"Flag", \
                                       "BeneficialOwnership":"OwnerID", "BeneficialOwnership Label":"Owner", \
                                       "Dead weight tons in thousands":"DWT (thousands)", \
                                        "Percentage of total fleet":"% of total fleet", \
                                        "Number of ships":"# Ships"}, inplace=True)
# show top-5 rows of data frame
ship_owners.head()

In [None]:
# number of records with 0 number of ships
len(ship_owners[ship_owners["# Ships"] == 0])

There are 34103 records with 0 number of ships. These records will be removed.

In [None]:
# keep records that don't have a 0 number of ships
ship_owners = ship_owners[ship_owners["# Ships"] != 0]
# show info
ship_owners.info()

In [None]:
# see all distinct country owner names
ship_owners["Owner"].unique()

Some owner names like ACP, LAS or other wider geographic regions are of no interest since we would like to analyze only the data pertaining to particular states. Thus, a function will be created that receives a name (string) and returns whether or not (True or False, respectively) the record of the input should be removed, and the function will be applied to create a new feature and clean the data accordingly.

In [None]:
# function remove_index
def remove_index(name):
    """
    returns True if a word from the remove_words_with_list is contained in the input string
    returns True if the first 2 letters of the input string are uppercase
    else returns False
    """
    # if first two letters are uppercase, return True
    if name[0:2].isupper():
        return True
    # list with words contained in records you want to remove
    remove_words_with_list = ["Develop", "merging", "Selected", "et food", "High", "Middle", "Low", "Euro", \
                             "Asia", "conom", "n.e.", "n Africa", "n America", "member"]
    # list with booleans, element is True if any word from the latter list is contained in the input string
    remove_bool_list = list(map(lambda x: x in name, remove_words_with_list))
    # if any element in boolean list is true return True
    if any(remove_bool_list):
        return True
    # return False if nothing of the above returns True
    return False

# create feature remove_record, all records with value True are to be removed
ship_owners["remove_record"] = ship_owners["Owner"].apply(remove_index)

In [None]:
# keep records with remove_record value equal to False
ship_owners = ship_owners[ship_owners["remove_record"] == False]
# see cleared distinct country owner names
ship_owners["Owner"].unique()

In [None]:
# assign new indeces to dataframe
ship_owners.set_index(keys=pd.Index(range(len(ship_owners))), inplace=True)
# delete remove_record feature
del ship_owners["remove_record"]
# show info
ship_owners.info()

**Only 6503 records were left after cleaning the data. Having started with 57771 records, that's a 89% decrease in the number of records. The corresponding memory usage is 4.8 MB and 457 KB.**

# 1.2. Merchant fleet by type of registraton and by type of ship

In [None]:
# import merchant fleet types data
ship_types = pd.read_csv("/kaggle/input/shipping-analytics-world-merchant-fleet/US_MerchantFleet_ST202010011334_v4.csv")
# show info of imported data frame
ship_types.info()

Just like before we shall drop the features with solely null values.

In [None]:
# drop null features
ship_types = ship_types.drop(["Dead weight tons in thousands Footnote", \
                    "Percentage of total world Footnote", \
                    "Percentage of total fleet Footnote", \
                    "Number of ships Footnote", \
                    "Gross Tonnage in thousands Footnote"], axis=1)

In [None]:
# rename features
ship_types.rename(columns={"Economy":"CountryID", "Economy Label":"Country", "ShipType":"ShipTypeID", \
                              "ShipType Label":"ShipType", "Dead weight tons in thousands":"DWT (thousands)", \
                              "Percentage of total world":"% of total world", "Percentage of total fleet":"% of total fleet", \
                              "Number of ships":"# Ships", "Gross Tonnage in thousands":"GT (thousands)"}, \
                     inplace=True)
# show top-5 row of data frame
ship_types.head()

ShipTypeID and ShipType features will be converted into categorical ones.

In [None]:
# convert ShipTypeID and ShipType into categorical features
ship_types["ShipTypeID"] = pd.Categorical(ship_types["ShipTypeID"])
ship_types["ShipType"] = pd.Categorical(ship_types["ShipType"])
# show categories of ship types
ship_types["ShipType"].cat.categories

In [None]:
# number of records with a 0 number of ships value
len(ship_types[ship_types["# Ships"] == 0])

2969 records have a 0 number of ships value, these records will be dropped.

In [None]:
# keep records that don't have a 0 number of ships
ship_types = ship_types[ship_types["# Ships"] != 0]

As before we will narrow our analysis to certain countries so the data will be cleansed with the function defined in the previous section.

In [None]:
# create feature remove_record, all records with value True are to be removed
ship_types["remove_record"] = ship_types["Country"].apply(remove_index)

In [None]:
# keep records with a False remove_record value
ship_types = ship_types[ship_types["remove_record"] == False]

In [None]:
# assign new indeces
ship_types.set_index(keys=pd.Index(range(len(ship_types))), inplace=True)
# delete remove_record feature
del ship_types["remove_record"]
# show info
ship_types.info()

**From 79873 to 35605 records, a 55% decrease in the number of records.**

# 1.3. Ships built by country of building

In [None]:
# import ship building data
ship_building = pd.read_csv("/kaggle/input/shipping-analytics-world-merchant-fleet/US_ShipBuilding_ST202010131356_v1.csv")
# show info of imported data frame
ship_building.info()

2 null features; we drop them.

In [None]:
# drop null features
ship_building = ship_building.drop(["Percentage of total all economies Footnote", \
                                   "Gross Tonnage Footnote"], axis=1)
# show top-5 rows of data frame
ship_building.head()

In [None]:
# show distinct values of Series Label feature
ship_building["Series Label"].unique()

In [None]:
# delete Series Label feature since it does not provide any information
del ship_building["Series Label"]
# show distinct values of Series feature
ship_building["Series"].unique()

In [None]:
# delete Series feature since it does not provide any information
del ship_building["Series"]
# show distinct ShipType Label feature values
ship_building["ShipType Label"].unique()

In [None]:
# delete ShipType Label feature since it provides no information
del ship_building["ShipType Label"]
# show distinct ShipType feature values
ship_building["ShipType"].unique()

In [None]:
# delete ShipType feature since it provides no information
del ship_building["ShipType"]
# rename features
ship_building.rename(columns={"Country":"CountryID", "Country Label":"Country", \
                              "Percentage of total all economies":"% of total all economies", \
                              "Gross Tonnage":"GT"}, inplace=True)

In [None]:
# number of records with a 0 GT value
len(ship_building[ship_building["GT"] == 0])

154 records have a 0 GT value, so they will be removed.

In [None]:
# remove records with 0 GT value
ship_building = ship_building[ship_building.GT != 0]

Again, redundant geographic regions are also removed.

In [None]:
# create feature remove_record
ship_building["remove_record"] = ship_building["Country"].apply(remove_index)

In [None]:
# keep records with a False remove_record value
ship_building = ship_building[ship_building["remove_record"] == False]

In [None]:
# assign new indeces
ship_building.set_index(keys=pd.Index(range(len(ship_building))), inplace=True)
# delete remove_record feature
del ship_building["remove_record"]
# show info
ship_building.info()

**1314 to 322 records, 75% decrease in the number of records.**

# 1.4. Ship scrapping by country of demolition

In [None]:
# import ship scrapping dataset
ship_scraping = pd.read_csv("/kaggle/input/shipping-analytics-world-merchant-fleet/US_ShipScrapping_ST202010121509_v1.csv")
# show info of imported dataframe
ship_scraping.info()

2 null features that are to be dropped.

In [None]:
# drop null features
ship_scraping = ship_scraping.drop(["Percentage of total all economies Footnote", \
                                   "Gross Tonnage Footnote"], axis=1)
# show top-5 rows of dataframe
ship_scraping.head()

In [None]:
# show discint values of Series Label feature
ship_scraping["Series Label"].unique()

In [None]:
# delete Series Label feature since it provides no information
del ship_scraping["Series Label"]
# show distinct values of Series feature
ship_scraping["Series"].unique()

In [None]:
# delete Series feature since it provides no information
del ship_scraping["Series"]
# show distinct ShipType Label feature values
ship_scraping["ShipType Label"].unique()

In [None]:
# delete ShipType Label feature since it provides no information
del ship_scraping["ShipType Label"]
# show distinct ShipType feature values
ship_scraping["ShipType"].unique()

In [None]:
# delete ShipType feature since it provides no information
del ship_scraping["ShipType"]
# rename features
ship_scraping.rename(columns={"Country":"CountryID", "Country Label":"Country", \
                              "Percentage of total world":"% of total all economies", \
                             "Gross Tonnage":"GT"}, inplace=True)

In [None]:
# show top-5 row of dataframe
ship_scraping.head()

In [None]:
# number of records with a 0 GT value
len(ship_scraping[ship_scraping["GT"] == 0])

92 records with a 0 GT value are removed.

In [None]:
# remove records with 0 GT value
ship_scraping = ship_scraping[ship_scraping["GT"] != 0]

As always, we remove the regions we are not interested in.

In [None]:
# create feature remove_record
ship_scraping["remove_record"] = ship_scraping["Country"].apply(remove_index)

In [None]:
# keep records with a False remove_record value
ship_scraping = ship_scraping[ship_scraping["remove_record"] == False]

In [None]:
# assign new indeces to dataframe
ship_scraping.set_index(keys=pd.Index(range(len(ship_scraping))), inplace=True)
# del remove_record feature
del ship_scraping["remove_record"]
# convert GT feature to int data type
ship_scraping["GT"].values.astype(int)
# show info
ship_scraping.info()

# *2. VISUALS*

## 2.1. Shipowners

In [None]:
ship_owners.head()

A dataframe will be created containg the data of the world fleet.

In [None]:
world_fleet = ship_owners[ship_owners["Owner"] == "World"]
world_fleet = world_fleet[world_fleet["FlagID"] == 0]
world_fleet.head(10)

The number of ships of the world fleet from 2014 to 2020 is plotted below.

<a id="fig1_1"></a>

In [None]:
# bar chart of world fleet's number of ships from 2014 to 2020
plt.bar(world_fleet["Year"], world_fleet["# Ships"])
# title
plt.title("World fleet (number of ships)")
# y label
plt.ylabel("Number of ships")
#
plt.show()

The Dead Weight Tonnage, in thousands, of the world fleet from 2014 to 2020 is shown below. 

<a id="fig1_2"></a>

In [None]:
# bar chart
plt.bar(world_fleet["Year"], world_fleet["DWT (thousands)"])
# title
plt.title("World fleet (DWT)")
# y label
plt.ylabel("thousand DWT")
#
plt.show()

A dataframe will be created containg the data of the five largest fleets, the last 7 years, by shipowner.

In [None]:
# keep records that not referring to the world fleet
largest_fleets = ship_owners[ship_owners["Owner"] != "World"]
# keep records with more than 5% of total fleet
largest_fleets = largest_fleets[largest_fleets["% of total fleet"] > 5]
# keep records that refer to all flags
largest_fleets = largest_fleets[largest_fleets["FlagID"] == 0]

largest_fleets["Owner"].unique()

A graph of the five largest fleets', by shipowners, ships over the last 7 years is shown below.[](http://)

<a id="fig1_3"></a>

In [None]:
# bar chart index for countries with 7 records
index = np.arange(7)
# bar chart index for countries with 5 records
index_1 = np.arange(5)
# bar chart index for countries with 4 records
index_2 = np.arange(4) + 3
# bar width
width = 0.15

# China, number of ships, 2014-2020
plt.bar(index - 3*width, largest_fleets[largest_fleets["Owner"] == "China"]["# Ships"], width)
# Greece, number of ships, 2014-2020
plt.bar(index - 2*width, largest_fleets[largest_fleets["Owner"] == "Greece"]["# Ships"], width)
# Japan, number ships, 2014-2020
plt.bar(index - width, largest_fleets[largest_fleets["Owner"] == "Japan"]["# Ships"], width)
# Germany, number of ships, 2014-2018
plt.bar(index_1, largest_fleets[largest_fleets["Owner"] == "Germany"]["# Ships"], width)
# Singapore, number of ships, 2017-2020
plt.bar(index_2 + width, largest_fleets[largest_fleets["Owner"] == "Singapore"]["# Ships"][1:], width)
# x axis tick labels
plt.xticks(index - width, largest_fleets["Year"].unique())
# legend
plt.legend(("China", "Greece", "Japan", "Germany", "Singapore"))
# y axis label
plt.ylabel("Number of ships")
# title
plt.title("Five largest fleets by shipowners (number of ships)")
#
plt.show()

A graph of the five largest fleets', by shipowner, DWT over the last 7 years is shown below.

<a id="fig1_4"></a>

In [None]:
# bar chart index of countries with 7 records
index = np.arange(7)
# bar chart index of countries with 5 records
index_1 = np.arange(5)
# bar chart index of countries with 4 records
index_2 = np.arange(4) + 3
# bar width
width = 0.15

# China, DWT, 2014-2020
plt.bar(index - 3*width, largest_fleets[largest_fleets["Owner"] == "China"]["DWT (thousands)"], width)
# Greece, DWT, 2014-2020
plt.bar(index - 2*width, largest_fleets[largest_fleets["Owner"] == "Greece"]["DWT (thousands)"], width)
# Japan, DWT, 2014-2020
plt.bar(index - width, largest_fleets[largest_fleets["Owner"] == "Japan"]["DWT (thousands)"], width)
# Germany, DWT, 2014-2018
plt.bar(index_1, largest_fleets[largest_fleets["Owner"] == "Germany"]["DWT (thousands)"], width)
# Singapore, DWT, 2017-2020
plt.bar(index_2 + width, largest_fleets[largest_fleets["Owner"] == "Singapore"]["DWT (thousands)"][1:], width)
# x axis tick labels
plt.xticks(index - width, largest_fleets["Year"].unique())
# legend
plt.legend(("China", "Greece", "Japan", "Germany", "Singapore"))
# y axis label
plt.ylabel("thousand DWT")
# title
plt.title("Five largest fleets by shipowners (DWT)")
#
plt.show()

An area plot of the three largest fleets, by shipowner, from 2014-2020 is presented below.

<a id="fig1_5"></a>

In [None]:
# area stackplot
plt.stackplot(largest_fleets["Year"].unique(), \
              # China, % of total fleet, 2014-2020
              largest_fleets[largest_fleets["Owner"] == "China"]["% of total fleet"], \
              # Greece, % of total fleet, 2014-2020
              largest_fleets[largest_fleets["Owner"] == "Greece"]["% of total fleet"], \
              # Japan, % of total fleet, 2014-2020
              largest_fleets[largest_fleets["Owner"] == "Japan"]["% of total fleet"], \
              alpha=0.8)
# legend
plt.legend(("China", "Greece", "Japan"), \
          ncol=3)
# y axis label
plt.ylabel("% of total fleet by DWT")
# title
plt.title("Three largest fleets by shipowners")
#
plt.show()

A dataframe will be created containg the data of the five largest fleets, the last 7 years, by flag of registration.

In [None]:
# keep records that do not refer to all flags
flag_fleets = ship_owners[ship_owners["FlagID"] != 0]
# keep records that refer to world fleet
flag_fleets = flag_fleets[flag_fleets["Owner"] == "World"]
# keep records with more than 1500 ships
flag_fleets = flag_fleets[flag_fleets["# Ships"] > 1500]
flag_fleets["Flag"].unique()

Bar charts of the five largest fleets', by flag of registration, number of ships and DWT from 2014 to 2020, are shown below.

<a id="fig1_6"></a>

In [None]:
# bar chart index for countries with 7 records
index = np.arange(7)
# bar width
width = 0.15

# Panama flag, number of ships, 2014-2020
plt.bar(index - 3*width, flag_fleets[flag_fleets["Flag"] == "Panama"]["# Ships"], width)
# China flag, number of ships, 2014-2020
plt.bar(index - 2*width, flag_fleets[flag_fleets["Flag"] == "China"]["# Ships"], width)
# Liberia flag, number of ships, 2014-2020
plt.bar(index - width, flag_fleets[flag_fleets["Flag"] == "Liberia"]["# Ships"], width)
# Marshall Islands flag, number of ships, 2014-2020
plt.bar(index, flag_fleets[flag_fleets["Flag"] == "Marshall Islands"]["# Ships"], width)
# Singapore flag, number of ships, 2014-2020
plt.bar(index + width, flag_fleets[flag_fleets["Flag"] == "Singapore"]["# Ships"], width)
# x axis tick labels
plt.xticks(index - width, flag_fleets["Year"].unique())
# legend
plt.legend(("Panama", "China", "Liberia", "Marshall Islands", "Singapore"), \
          ncol=1, loc=[0.95, 0.5])
# y axis label
plt.ylabel("number of ships")
# title
plt.title("Five largest fleets by flag of registration (number of ships)")
#
plt.show()

<a id="fig1_7"></a>

In [None]:
# bar chart index for countries with 7 records
index = np.arange(7)
# bar width
width = 0.15

# Panama flag, DWT, 2014-2020
plt.bar(index - 3*width, flag_fleets[flag_fleets["Flag"] == "Panama"]["DWT (thousands)"], width)
# China flag, DWT, 2014-2020
plt.bar(index - 2*width, flag_fleets[flag_fleets["Flag"] == "China"]["DWT (thousands)"], width)
# Liberia, DWT, 2014-2020
plt.bar(index - width, flag_fleets[flag_fleets["Flag"] == "Liberia"]["DWT (thousands)"], width)
# Marshall Islands flag, DWT, 2014-2020
plt.bar(index, flag_fleets[flag_fleets["Flag"] == "Marshall Islands"]["DWT (thousands)"], width)
# Singapore flag, DWT, 2014-2020
plt.bar(index + width, flag_fleets[flag_fleets["Flag"] == "Singapore"]["DWT (thousands)"], width)
# x axis tick label
plt.xticks(index - width, flag_fleets["Year"].unique())
# legend
plt.legend(("Panama", "China", "Liberia", "Marshall Islands", "Singapore"), \
          ncol=1, loc=[0.95, 0.5])
# y axis label
plt.ylabel("thousand DWT")
# title
plt.title("Five largest fleets by flag of registration (DWT)")
#
plt.show()

### 2.2. Ship types

In [None]:
ship_types.head()

A dataframe will be created containg the data of the world fleet.

In [None]:
# keep records of world fleet
world_fleet_types = ship_types[ship_types["Country"] == "World"]
# keep records with ship types other than total fleet
world_fleet_types = world_fleet_types[world_fleet_types["ShipType"] != "Total fleet"]
# show info
world_fleet_types.info()

An area plot of the types of ships composing the world fleet is shown below.

<a id="fig2_1"></a>

In [None]:
# area stackplot
plt.stackplot(world_fleet_types["Year"].unique(), \
              # oil tankers, % of total fleet by DWT, 1980-2020
              world_fleet_types[world_fleet_types["ShipType"] == "Oil tankers"]["% of total fleet"], \
              # bulkers, % of total fleet by DWT, 1980-2020
              world_fleet_types[world_fleet_types["ShipType"] == "Bulk carriers"]["% of total fleet"], \
              # general cargo, % of total fleet by DWT, 1980-2020
              world_fleet_types[world_fleet_types["ShipType"] == "General cargo"]["% of total fleet"], \
              # container ships, % of total fleet by DWT, 1980-2020
              world_fleet_types[world_fleet_types["ShipType"] == "Container ships"]["% of total fleet"], \
              # other types, % of total fleet by DWT, 1980-2020
              world_fleet_types[world_fleet_types["ShipType"] == "Other types of ships"]["% of total fleet"])
# legend
plt.legend(("Oil tankers", "Bulk carriers", "General cargo", "Container ships", "Other"), \
           # legend position, number of columns
            ncol=1, loc=[0.98, 0.4])
# y axis label
plt.ylabel("% of total fleet by DWT")
# title
plt.title("Types of ships, world fleet")
# 
plt.show()

A dataframe will be created containing the data of bulkers.

In [None]:
# keep records with Bulk carriers ship type
bulkers = ship_types[ship_types["ShipType"] == "Bulk carriers"]
# keep records not referring to world fleet
bulkers = bulkers[bulkers["Country"] != "World"]
# keep records with more than 5% of total bulker fleet
bulkers = bulkers[bulkers["% of total world"] > 5]
bulkers["Country"].unique()

2 features will be added describing the average DWT and GT per bulker ship.

In [None]:
# create feature DWT (thousands) per ship as DWT over number of ships, of each record
bulkers["DWT (thousands) per ship"] = bulkers.apply(lambda row: \
                                                              row["DWT (thousands)"] / row["# Ships"], axis=1)
# create feature GT (thousands) per ship as GT over number of ships, of each record
bulkers["GT (thousands) per ship"] = bulkers.apply(lambda row: \
                                                            row["GT (thousands)"] / row["# Ships"], axis=1)

In [None]:
# keep records after 2013, because some countries don't have data before that year
bulkers = bulkers[bulkers["Year"] > 2013]

<a id="fig2_2"></a>

In [None]:
# are stackplot
plt.stackplot(bulkers["Year"].unique(), \
              # bulkers with Panama flag, % of total bulker fleet, 2014-2020
              bulkers[bulkers["Country"] == "Panama"]["% of total world"], \
              # bulkers with Liberia flag, % of total bulker fleet, 2014-2020
              bulkers[bulkers["Country"] == "Liberia"]["% of total world"], \
              # bulkers with Malta flag, % of total bulker fleet, 2014-2020
              bulkers[bulkers["Country"] == "Malta"]["% of total world"], \
              # bulkers with Marshall Islands flag, % of total bulker fleet, 2014-2020
              bulkers[bulkers["Country"] == "Marshall Islands"]["% of total world"], \
              # bulkers with China flag, % of total bulker fleet, 2014-2020
              bulkers[bulkers["Country"] == "China"]["% of total world"])
# legend
plt.legend(("Panama", "Liberia", "Malta", "Marshall Islands", "China"), \
           # legend position, number of columns
           loc=[0.98,0.4], ncol=1)
# y axis label
plt.ylabel("% of total bulker fleet by DWT")
# title
plt.title("Five largest bulker fleets by flag of registration")
#
plt.show()

A dataframe will be created containg the data of tankers.

In [None]:
# keep records with Oil tankers ship type
tankers = ship_types[ship_types["ShipType"] == "Oil tankers"]
# keep records not referring to world fleet
tankers = tankers[tankers["Country"] != "World"]
# keep records with more than 5% of total tanker fleet
tankers = tankers[tankers["% of total world"] > 5]
tankers["Country"].unique()

In [None]:
# keep records after 2002, because some countries don't have data before that year
tankers = tankers[tankers["Year"] > 2002]

An area plot is shown below, depicting the five largest tanker fleets by flag of registration from 2003 to 2020.

<a id="fig2_3"></a>

In [None]:
# area stackplot
plt.stackplot(tankers["Year"].unique(), \
              # tankers with Marshall Islands flag, % of total tanker fleet, 2003-2020
              tankers[tankers["Country"] == "Marshall Islands"]["% of total world"], \
              # tankers with Liberia flag, % of total tanker fleet, 2003-2020
              tankers[tankers["Country"] == "Liberia"]["% of total world"], \
              # tankers with Panama flag, % of total tanker fleet, 2003-2020
              tankers[tankers["Country"] == "Panama"]["% of total world"], \
              # tankers with Greece flag, % of total tanker fleet, 2003-2020
              tankers[tankers["Country"] == "Greece"]["% of total world"], \
              # tankers with Singapore flag, % of total tanker fleet, 2003-2020
              tankers[tankers["Country"] == "Singapore"]["% of total world"])
# legend
plt.legend(("Marshall Islands", "Liberia", "Panama", "Greece", "Singapore"), \
          # legend position, column number
          loc=[1.02, 0.4], ncol=1)
# y axis label
plt.ylabel("% of total tanker fleet by DWT")
# x axis limits
plt.xlim(2003, 2020)
# title
plt.title("Five largest tanker fleets by flag of registration")
# 
plt.show()

A dataframe is created that will contain information on the class of the oil tankers.

In [None]:
# keep records with Oil tankers ship type
tanker_types = ship_types[ship_types["ShipType"] == "Oil tankers"]
# keep records not referring to world fleet
tanker_types = tanker_types[tanker_types["Country"] != "World"]

In [None]:
# drop records with null values
tanker_types.dropna(inplace=True)
# show info
tanker_types.info()

Two features are added to the oil tanker types dataframe, similarly to the bulker dataframe.

In [None]:
# create feature DWT per ship as DWT over number of ships, of each record
tanker_types["DWT (thousands) per ship"] = tanker_types.apply(lambda row: \
                                                              row["DWT (thousands)"] / row["# Ships"], axis=1)
# create feature GT per ship as GT over number of ships, of each record
tanker_types["GT (thousands) per ship"] = tanker_types.apply(lambda row: \
                                                            row["GT (thousands)"] / row["# Ships"], axis=1)

A function is defined whereby the tankers are classified as Product tankers, Panamax, Aframax, Suezmax or VLCC, according to their DWT.

In [None]:
def tanker_class(DWT):
    """
    returns a string with the class of the tanker according to the input DWT
    """
    # if DWT is less than 60k, return Product tanker
    if (DWT < 60):
        return "Product tanker"
    # if DWT is more than 60k but less than 80k, return Panamax
    elif (DWT < 80):
        return "Panamax"
    # if DWT is more than 80k but less than 120k, return Aframax
    elif (DWT < 120):
        return "Aframax"
    # if DWT is more than 120k but less than 200k, return Suezmax
    elif (DWT < 200):
        return "Suezmax"
    # is DWT is more than 200k, return VLCC
    else:
        return "VLCC"

In [None]:
# create feature Class
tanker_types["Class"] = tanker_types["DWT (thousands) per ship"].apply(tanker_class)

In [None]:
tanker_types.head()

In [None]:
tanker_types["Country"].unique()

In [None]:
# remove records with duplicate countries or extended geographic regions like continents
tanker_types = tanker_types[~tanker_types["Country"].isin(("Africa excluding South Africa", "Dominica", \
                                                          "France, metropolitan", "China, Hong Kong SAR", \
                                                          "Africa", "America", "Caribbean", "Central America", \
                                                          "South America", "Oceania", "United Kingdom excl. Channel Islands and Isle of Man", \
                                                          "Isle of Man", "United States of America", "Micronesia"))]

A new dataframe is created where tankers are grouped by year and class. The only feature of this dataframe is the sum of the number of ships in the aforementioned groups. This dataframe represents an equivalent (estimated) world tanker fleet by year and class.

In [None]:
# create dataframe,group by year and class and sum the number of ships for each group
tanker_types = pd.DataFrame(tanker_types.groupby(["Year", "Class"])["# Ships"].sum())

In [None]:
# rename feature
tanker_types.rename(columns={"# Ships":"Equivalent World fleet"}, inplace=True)
tanker_types["Equivalent World fleet"] = tanker_types["Equivalent World fleet"].astype("int")
# print dataframe
print(tanker_types)

Two sub-dataframes are created, the one containing vessels of class Aframax and smaller and the other of class Suezmax and bigger.

In [None]:
# Aframax- dataframe, drop records of Suezmax and VLCC
small_tanker_types = tanker_types.drop(index=["Suezmax", "VLCC"], level="Class")
# Suezmax+ dataframe, drop records of Product tankers, Panamax and Aframax
big_tanker_types = tanker_types.drop(index=["Product tanker", "Panamax", "Aframax"], level="Class")

<a id="fig2_4"></a>

In [None]:
# bar chart of Aframax- tankers, 2011-2020
small_tanker_types.unstack(1).plot(kind="bar", width=0.7)
# y axis label
plt.ylabel("Number of ships")
# x axis label
plt.xlabel("")
# x axis ticks rotation
plt.xticks(rotation=0)
# title
plt.title("Equivalent world tanker fleet, Aframax-")
# legend
plt.legend(["Aframax", "Panamax", "Product tanker"], \
           bbox_to_anchor=(0.74,1), ncol=3)
# 
plt.show()

<a id="fig2_5"></a>

In [None]:
# bar chart of Suezmax+ tankers, 2011-2020
big_tanker_types.unstack(1).plot(kind="bar", width=0.8)
# y axis label
plt.ylabel("Number of ships")
# x axis label
plt.xlabel("")
# x axis ticks rotation
plt.xticks(rotation=0)
# title
plt.title("Equivalent world tanker fleet, Suezmax+")
# legend
plt.legend(["Suezmax", "VLCC"])
# 
plt.show()

### 2.3. Ship-building

In [None]:
ship_building.head()

In [None]:
# world ship building dataframe, keep records with "World" as country
world_ship_building = ship_building[ship_building["Country"] == "World"]

In [None]:
# convert GT to million GT
world_ship_building["GT"] = world_ship_building["GT"].apply(lambda x: x / 1e+6)
# rename feature
world_ship_building.rename(columns={"GT":"million GT"}, inplace=True)

In [None]:
# five biggest ship builders, remove records with "World" as country
big_ship_builders = ship_building[ship_building["Country"] != "World"]
# convert GT to million GT
big_ship_builders["GT"] = big_ship_builders["GT"].apply(lambda x: x / 1e+6)
# rename GT feature
big_ship_builders.rename(columns={"GT":"million GT"}, inplace=True)

<a id="fig3_2"></a>

In [None]:
# bar chart index
index = np.arange(6)
# width
width = 0.15

# GT of ships built in China, 2014-2019
plt.bar(index - 2*width, big_ship_builders[big_ship_builders["Country"] == "China"]["million GT"], width)
# GT of ships built in South Korea, 2014-2019
plt.bar(index - width, big_ship_builders[big_ship_builders["Country"] == "Korea, Republic of"]["million GT"], width)
# GT of ships built in Japan, 2014-2019
plt.bar(index, big_ship_builders[big_ship_builders["Country"] == "Japan"]["million GT"], width)
# GT of ships built in Philippines, 2014-2019
plt.bar(index + width, big_ship_builders[big_ship_builders["Country"] == "Philippines"]["million GT"], width)
# GT of ships built in Vietnam, 2014-2019
plt.bar(index + 2*width, big_ship_builders[big_ship_builders["Country"] == "Viet Nam"]["million GT"], width)
# x axis ticks indeces and labels
plt.xticks(index, big_ship_builders["Year"].unique())
# legend
plt.legend(["China", "South Korea", "Japan", "Philippines", "Vietnam"], \
           loc=[0.95, 0.5], ncol=1)
# y axis label
plt.ylabel("million Gross Tonnage")
# title
plt.title("Five largest ship-builders by location")
# 
plt.show()

### 2.4. Ship-scrapping

In [None]:
ship_scraping.head()

In [None]:
# convert GT to million GT
ship_scraping["GT"] = ship_scraping["GT"].apply(lambda x: x / 1e+6)
# rename GT feature
ship_scraping.rename(columns={"GT":"million GT"}, inplace=True)
# create world ship scrapping dataframe
world_ship_scrapping = ship_scraping[ship_scraping["Country"] == "World"]

In [None]:
# five biggest ship scrappers, drop records with "World" as country
big_ship_scrappers = ship_scraping[ship_scraping["Country"] != "World"]

<a id="fig4_2"></a>

In [None]:
# bar chart index
index = np.arange(3)
# bar width
width = 0.15

# GT of ships scrapped in Bangladesh, 2017-2019
plt.bar(index - 2*width, big_ship_scrappers[big_ship_scrappers["Country"] == "Bangladesh"]["million GT"], width)
# GT of ships scrapped in India, 2017-2019
plt.bar(index - width, big_ship_scrappers[big_ship_scrappers["Country"] == "India"]["million GT"], width)
# GT of ships scrapped in Turkey, 2017-2019
plt.bar(index, big_ship_scrappers[big_ship_scrappers["Country"] == "Turkey"]["million GT"], width)
# GT of ships scrapped in China, 2017-2019
plt.bar(index + width, big_ship_scrappers[big_ship_scrappers["Country"] == "China"]["million GT"], width)
# GT of ships scrapped in Pakistan, 2017-2019
plt.bar(index + 2*width, big_ship_scrappers[big_ship_scrappers["Country"] == "Pakistan"]["million GT"], width)
# x axis ticks indeces and labels
plt.xticks(index, big_ship_scrappers["Year"].unique())
# y axis label
plt.ylabel("million Gross Tonnage")
# title
plt.title("Five largest ship-scrappers by location")
# legend
plt.legend(["Bangladesh", "India", "Turkey", "China", "Pakistan"])
# 
plt.show()

<a id="fig4_3"></a>

In [None]:
# bar chart index
index = np.arange(3)
# bar width
width = 0.35

ax = plt.figure().gca()

# GT of ships built worldwide, 2017-2019, line chart
L1 = ax.plot(world_ship_building[world_ship_building["Year"] > 2016]["Year"], \
        world_ship_building[world_ship_building["Year"] > 2016]["million GT"])
# convert x axis labels to integers
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
# GT of ships built worldwide, 2017-2019, bar chart
ax.bar(world_ship_building[world_ship_building["Year"] > 2016]["Year"], \
       world_ship_building[world_ship_building["Year"] > 2016]["million GT"], -width, align="edge")
# GT of ships scrapped worldwide, 2017-2019, line chart
L2 = plt.plot(world_ship_building[world_ship_building["Year"] > 2016]["Year"], \
        world_ship_scrapping["million GT"])
# GT of ships scrapped worldwide, 2017-2019, bar chart
plt.bar(world_ship_building[world_ship_building["Year"] > 2016]["Year"], \
        world_ship_scrapping["million GT"], width, align="edge")
# y axis label
plt.ylabel("million Gross Tonnage")
# title
plt.title("World ship-building/scrapping")
# legend
plt.legend(["Building", "Scrapping"])
# show text with % changes from year to year
plt.text(2017.5, 63, "-7.6%", size=25, color=L1[0].get_color())
plt.text(2018.2, 62, "+7.8%", size=25, color=L1[0].get_color())
plt.text(2017.4, 23, "-4.1%", size=25, color=L2[0].get_color())
plt.text(2018.45, 17, "-6.7%", size=25, color=L2[0].get_color())
plt.show()