In [507]:
# %pip install plotly
# %pip install pandas
# %pip install seaborn
# %pip install nbformat >= 4.2.0

In [508]:
import pandas
import seaborn
import plotly

import warnings

In [509]:
MINIMUM_DATE = "2020-01-01"
MAXIMUM_DATE = "2024-01-01"

CONVERSION_FACTORS = {
  'TON': 907.185,
  'TNE': 1000,
  'KGS': 1,
  'Kgs': 1,
} 

UNIT_RATE_COLUMN = 'Unit Rate'

warnings.filterwarnings('ignore')

In [510]:
def set_datetime_index(dataframe, date_column):
    dataframe[date_column] = pandas.to_datetime(dataframe[date_column])
    dataframe.set_index(date_column, inplace=True)
    dataframe.sort_values(date_column, inplace=True)
    return dataframe

def filter_range(dataframe, column_name, minimum_date, maximum_date):
    return dataframe[
        (dataframe[column_name] >= minimum_date) &
        (dataframe[column_name] <= maximum_date)
    ]

def resample_weekly(series):
    return series.fillna(0).resample('W').sum()

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

In [511]:
volza_copper = pandas.read_csv('../../../volza/copper/copper.csv')
volza_magnesium = pandas.read_csv('../../../volza/magnesium/magnesium.csv')
volza_petroleum = pandas.read_csv('../../../volza/petroleum/petrol_crude_oil_spot_price.csv', sep=';')

def preprocess_data (dataframe):
  dataframe = filter_range(dataframe, 'Date', MINIMUM_DATE, MAXIMUM_DATE)
  dataframe = filter_range(dataframe, 'Value', 0, 100)
  dataframe = set_datetime_index(dataframe, 'Date')
  return dataframe

def convert_units (dataframe):
  # dataframe['Std. Quantity'] = dataframe['Std. Quantity'].fillna(1)
  dataframe[UNIT_RATE_COLUMN] = dataframe['Value'] / (dataframe['Std. Quantity'] * dataframe['Std. Unit'].map(CONVERSION_FACTORS))
  return dataframe

volza_copper = preprocess_data(volza_copper)
volza_magnesium = preprocess_data(volza_magnesium)
volza_petroleum = preprocess_data(volza_petroleum)

# Remove countries without usable and easy to standardize units
volza_copper = volza_copper[volza_copper['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
volza_magnesium = volza_magnesium[volza_magnesium['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
# volza_copper = volza_copper[volza_copper['Std. Quantity'].apply(is_number)]
# volza_magnesium = volza_magnesium[volza_magnesium['Std. Quantity'].apply(is_number)]

volza_copper = convert_units(volza_copper)
volza_magnesium = convert_units(volza_magnesium)

# Resample to weekly
volza_copper = volza_copper[[UNIT_RATE_COLUMN]].resample('W').mean().fillna(0)
volza_magnesium = volza_magnesium[[UNIT_RATE_COLUMN]].resample('W').sum().fillna(0)
volza_petroleum = volza_petroleum[['Value']].resample('W').sum().fillna(0)

In [512]:
volza_petroleum["Value"].max()

942.7

In [513]:
display(volza_copper.head(1))
display(volza_magnesium.sort_values(by=UNIT_RATE_COLUMN).tail(10)) 
display(volza_petroleum.head(1))          

display(volza_copper.describe())
display(volza_magnesium.describe()) 
display(volza_petroleum.describe())          

Unnamed: 0_level_0,Unit Rate
Date,Unnamed: 1_level_1
2020-01-05,0.0


Unnamed: 0_level_0,Unit Rate
Date,Unnamed: 1_level_1
2022-10-09,4.0
2022-02-13,4.039474
2022-01-09,4.173333
2022-06-12,14.368649
2022-12-18,38.39
2021-01-03,96.958695
2022-08-21,215.08
2022-08-28,224.995
2021-03-07,247.847568
2022-06-26,inf


Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2020-01-05,389.21


Unnamed: 0,Unit Rate
count,157.0
mean,inf
std,
min,0.0
25%,0.0
50%,0.0
75%,0.000796
max,inf


Unnamed: 0,Unit Rate
count,155.0
mean,inf
std,
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,inf


Unnamed: 0,Value
count,208.0
mean,581.798942
std,254.6118
min,0.0
25%,405.8225
50%,655.17
75%,788.8575
max,942.7


In [514]:
columns = []
columns.append({"name": "Volza Copper Unit Rate", "date": volza_copper.index, "value": volza_copper[UNIT_RATE_COLUMN]})
columns.append({"name": "Volza Magnesium Unit Rate", "date": volza_magnesium.index, "value": volza_magnesium[UNIT_RATE_COLUMN]})

columns.append({"name": "Volza Copper Quantity", "date": volza_copper.index, "value": volza_copper[UNIT_RATE_COLUMN]})
columns.append({"name": "Volza Magnesium Quantity", "date": volza_magnesium.index, "value": volza_magnesium[UNIT_RATE_COLUMN]})

columns.append({"name": "Petroleum Price", "date": volza_petroleum.index, "value": volza_petroleum["Value"]})

In [515]:
palette = list(seaborn.color_palette(palette='viridis', n_colors=len(columns)).as_hex())
figure = plotly.graph_objs.Figure()

for column, color in zip(columns, palette):
    figure.add_trace(plotly.graph_objs.Scatter(x = column["date"],
                             y = column["value"],
                             name = column["name"],
                             line_color = color, 
                             fill='tozeroy'))

figure.show()