In [1]:
# %pip install plotly
# %pip install pandas
# %pip install seaborn
# %pip install nbformat >= 4.2.0

In [2]:
import pandas
import seaborn
import plotly

import warnings

KeyboardInterrupt: 

In [None]:
MINIMUM_DATE = "2020-01-01"
MAXIMUM_DATE = "2024-01-01"

CONVERSION_FACTORS = {
  'TON': 907.185,
  'TNE': 1000,
  'KGS': 1,
  'Kgs': 1,
} 

RMB_TO_USD_RATE = 0.14

UNIT_RATE_COLUMN = 'Unit Rate'
STD_QUANTITY_COLUMN = 'Std. Quantity'

warnings.filterwarnings('ignore')

In [None]:
def set_datetime_index(dataframe, date_column):
    dataframe[date_column] = pandas.to_datetime(dataframe[date_column])
    dataframe.set_index(date_column, inplace=True)
    dataframe.sort_values(date_column, inplace=True)
    return dataframe

def filter_range(dataframe, column_name, minimum_date, maximum_date):
    return dataframe[
        (dataframe[column_name] >= minimum_date) &
        (dataframe[column_name] <= maximum_date)
    ]

def resample_weekly(series):
    return series.fillna(0).resample('W').sum()

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

In [None]:
volza_copper = pandas.read_csv('../../../volza/copper/copper.csv')
volza_magnesium = pandas.read_csv('../../../volza/magnesium/magnesium.csv')
volza_petroleum = pandas.read_csv('../../../volza/petroleum/petrol_crude_oil_spot_price.csv', sep=';')

def preprocess_data (dataframe):
  dataframe = filter_range(dataframe, 'Date', MINIMUM_DATE, MAXIMUM_DATE)
  dataframe = filter_range(dataframe, 'Value', 0, 100)
  dataframe = set_datetime_index(dataframe, 'Date')
  return dataframe

def convert_units (dataframe):
  # dataframe['Std. Quantity'] = dataframe['Std. Quantity'].fillna(1)
  dataframe[UNIT_RATE_COLUMN] = dataframe['Value'] / (dataframe[STD_QUANTITY_COLUMN] * dataframe['Std. Unit'].map(CONVERSION_FACTORS))
  return dataframe

volza_copper = preprocess_data(volza_copper)
volza_magnesium = preprocess_data(volza_magnesium)
volza_petroleum = preprocess_data(volza_petroleum)

# Remove countries without usable and easy to standardize units
volza_copper = volza_copper[volza_copper['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
volza_magnesium = volza_magnesium[volza_magnesium['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
# volza_copper = volza_copper[volza_copper['Std. Quantity'].apply(is_number)]
# volza_magnesium = volza_magnesium[volza_magnesium['Std. Quantity'].apply(is_number)]

volza_copper = convert_units(volza_copper)
volza_magnesium = convert_units(volza_magnesium)

# Resample to weekly
volza_copper_unit_rate = volza_copper[[UNIT_RATE_COLUMN]].dropna(0).resample('D').mean()
volza_magnesium_unit_rate = volza_magnesium[[UNIT_RATE_COLUMN]].dropna(0).resample('D').mean()
volza_copper_unit_rate = filter_range(volza_copper_unit_rate, UNIT_RATE_COLUMN, 0, 100)
volza_magnesium_unit_rate = filter_range(volza_magnesium_unit_rate, UNIT_RATE_COLUMN, 0, 100)

volza_petroleum = volza_petroleum[['Value']].resample('W').sum().fillna(0) * RMB_TO_USD_RATE

In [None]:
volza_petroleum["Value"].max()

131.978

In [None]:
# display(volza_copper.head(1))
# display(volza_magnesium.sort_values(by=UNIT_RATE_COLUMN).tail(10)) 
# display(volza_petroleum.head(1))          

# display(volza_copper.describe())
# display(volza_magnesium.describe()) 
# display(volza_petroleum.describe())          

In [None]:
columns = []
columns.append({"name": "Volza Copper Unit Rate", "date": volza_copper_unit_rate.index, "value": volza_copper_unit_rate[UNIT_RATE_COLUMN]})
columns.append({"name": "Volza Magnesium Unit Rate", "date": volza_magnesium_unit_rate.index, "value": volza_magnesium_unit_rate[UNIT_RATE_COLUMN]})

columns.append({"name": "Petroleum Price", "date": volza_petroleum.index, "value": volza_petroleum["Value"]})

In [None]:
palette = list(seaborn.color_palette(palette='viridis', n_colors=len(columns)).as_hex())
figure = plotly.graph_objs.Figure()

for column, color in zip(columns, palette):
    figure.add_trace(plotly.graph_objs.Scatter(x = column["date"],
                             y = column["value"],
                             name = column["name"],
                             line_color = color, 
                             fill='tozeroy'))

figure.show()