In [1]:
# %pip install plotly
# %pip install pandas
# %pip install seaborn
# %pip install nbformat >= 4.2.0
# %pip install -U kaleido

In [2]:
import pandas
import seaborn
import plotly

import warnings
from datetime import datetime
import os
import re

In [3]:
MINIMUM_DATE = "2020-01-01"
MAXIMUM_DATE = "2023-01-01"

CONVERSION_FACTORS = {
  'TON': 907.185,
  'TNE': 1000,
  'KGS': 1,
  'Kgs': 1,
} 

RMB_TO_USD_RATE = 0.14

UNIT_RATE_COLUMN = 'Unit Rate'
STD_QUANTITY_COLUMN = 'Std. Quantity'

warnings.filterwarnings('ignore')

In [4]:
def set_datetime_index(dataframe, date_column):
    dataframe[date_column] = pandas.to_datetime(dataframe[date_column])
    dataframe.set_index(date_column, inplace=True)
    dataframe.sort_values(date_column, inplace=True)
    return dataframe

def filter_range(dataframe, column_name, minimum_date, maximum_date):
    return dataframe[
        (dataframe[column_name] >= minimum_date) &
        (dataframe[column_name] <= maximum_date)
    ]

def resample_weekly(series):
    return series.fillna(0).resample('W').sum()

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

def zero_max_normalize(series):
    return (series-0)/(series.max()-0)

def mdy_to_ymd(d):
    return datetime.strptime(d, '%b %d, %Y').strftime('%Y-%m-%d')

def get_country_of_origin_unit_rate(dataframe, country):
    a = dataframe.groupby('Country of Origin').get_group(country)[[UNIT_RATE_COLUMN]].dropna(0).resample('W').mean()
    return filter_range(a, UNIT_RATE_COLUMN, 0, 100)

In [5]:
volza_copper = pandas.read_csv('../../../volza/copper/copper.csv')
volza_magnesium = pandas.read_csv('../../../volza/magnesium/magnesium.csv')
volza_petroleum = pandas.read_csv('../../../volza/petroleum/petrol_crude_oil_spot_price.csv', sep=';')
magnesium_spot_price = pandas.read_csv('../../../volza/magnesium/magnesium_price_2.csv')

def preprocess_data (dataframe):
  dataframe = filter_range(dataframe, 'Date', MINIMUM_DATE, MAXIMUM_DATE)
  dataframe = set_datetime_index(dataframe, 'Date')
  return dataframe

def convert_units (dataframe):
  # dataframe['Std. Quantity'] = dataframe['Std. Quantity'].fillna(1)
  dataframe[UNIT_RATE_COLUMN] = dataframe['Value'] / (dataframe[STD_QUANTITY_COLUMN] * dataframe['Std. Unit'].map(CONVERSION_FACTORS))
  return dataframe

volza_copper = preprocess_data(volza_copper)
volza_magnesium = preprocess_data(volza_magnesium)
volza_petroleum = preprocess_data(volza_petroleum)

magnesium_spot_price['Date'] = magnesium_spot_price['Date'].apply(lambda x : mdy_to_ymd(x))
magnesium_spot_price = preprocess_data(magnesium_spot_price)

# Remove countries without usable and easy to standardize units
volza_copper = volza_copper[volza_copper['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
volza_magnesium = volza_magnesium[volza_magnesium['Std. Unit'].isin(CONVERSION_FACTORS.keys())]
# volza_copper = volza_copper[volza_copper['Std. Quantity'].apply(is_number)]
# volza_magnesium = volza_magnesium[volza_magnesium['Std. Quantity'].apply(is_number)]

volza_copper = convert_units(volza_copper)
volza_magnesium = convert_units(volza_magnesium)

volza_magnesium_unit_rate_china = get_country_of_origin_unit_rate(volza_magnesium, 'China')
window_size = 20  # Adjust the window size based on your data
threshold = 2  # You can adjust this value based on your data
moving_avg = volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN].rolling(window=window_size).mean()
std_dev = volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN].rolling(window=window_size).std()
spikes = (abs(volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN] - moving_avg) > threshold * std_dev)
volza_magnesium_unit_rate_china['Spikes'] = (abs(volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN] - moving_avg) > threshold * std_dev).astype(int)

volza_magnesium_unit_rate_netherlands = get_country_of_origin_unit_rate(volza_magnesium, 'Netherlands')
window_size = 20  # Adjust the window size based on your data
threshold = 2  # You can adjust this value based on your data
moving_avg = volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN].rolling(window=window_size).mean()
std_dev = volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN].rolling(window=window_size).std()
spikes = (abs(volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN] - moving_avg) > threshold * std_dev)
volza_magnesium_unit_rate_netherlands['Spikes'] = (abs(volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN] - moving_avg) > threshold * std_dev).astype(int)

volza_magnesium_unit_rate_turkey = get_country_of_origin_unit_rate(volza_magnesium, 'Turkey')

volza_magnesium_unit_rate_germany = get_country_of_origin_unit_rate(volza_magnesium, 'Germany')
volza_magnesium_unit_rate_belgium = get_country_of_origin_unit_rate(volza_magnesium, 'Belgium')

# Resample to weekly
volza_copper_unit_rate = volza_copper[[UNIT_RATE_COLUMN]].dropna(0).resample('W').mean().interpolate(method='linear', limit=100)
volza_magnesium_unit_rate = volza_magnesium[[UNIT_RATE_COLUMN]].dropna(0).resample('W').mean()

volza_copper_unit_rate = filter_range(volza_copper_unit_rate, UNIT_RATE_COLUMN, 0, 100)
volza_magnesium_unit_rate = filter_range(volza_magnesium_unit_rate, UNIT_RATE_COLUMN, 0, 100)

volza_petroleum_brent = volza_petroleum.groupby('Spot Prices').get_group('Brent Spot Price (U.S. Dollars per Barrel)').drop(columns=['Spot Prices'])
volza_petroleum_wti = volza_petroleum.groupby('Spot Prices').get_group('WTI Spot Price (U.S. Dollars per Barrel)').drop(columns=['Spot Prices'])

volza_petroleum_brent = volza_petroleum_brent[['Value']].resample('W').sum().fillna(0) * RMB_TO_USD_RATE
volza_petroleum_wti = volza_petroleum_wti[['Value']].resample('W').sum().fillna(0) * RMB_TO_USD_RATE
volza_petroleum = volza_petroleum[['Value']].resample('W').sum().fillna(0) * RMB_TO_USD_RATE

# The magnesium spot price is in RMB per tonne, so we need to convert it to USD per kg
magnesium_spot_price["Open"] = magnesium_spot_price['Open'].apply(lambda x: float(x.replace(',', '')) / 1000 * RMB_TO_USD_RATE)

In [6]:
volza_petroleum["Value"].max()

173.4796

In [7]:
# display(volza_copper.head(1))
# display(volza_magnesium.sort_values(by=UNIT_RATE_COLUMN).tail(10)) 
# display(volza_petroleum.head(1))          

# display(volza_copper.describe())
# display(volza_magnesium.describe()) 
# display(volza_petroleum.describe())   
display(magnesium_spot_price.head(1))       

Unnamed: 0_level_0,Price,Open,High,Low,Vol.,Change %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,14150.0,1.981,14300.0,14000.0,-,0.00%


In [8]:
columns = []
# columns.append({
#   "name": f"Volza Copper Unit Rate ({volza_copper_unit_rate[UNIT_RATE_COLUMN].min()} - {volza_copper_unit_rate[UNIT_RATE_COLUMN].max()})", 
#   "date": volza_copper_unit_rate.index, 
#   "value": zero_max_normalize(volza_copper_unit_rate[UNIT_RATE_COLUMN])
# })
# columns.append({
#   "name": f"Volza Magnesium Unit Rate ({volza_magnesium_unit_rate[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate[UNIT_RATE_COLUMN].max()})", 
#   "date": volza_magnesium_unit_rate.index, 
#   "value": zero_max_normalize(volza_magnesium_unit_rate[UNIT_RATE_COLUMN])
# })

# ===========================
# Country of Origin Specific
# ===========================
columns.append({
  "name": f"Volza Magnesium Unit Rate (China, {volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN].max()})", 
  "date": volza_magnesium_unit_rate_china.index, 
  "value": zero_max_normalize(volza_magnesium_unit_rate_china[UNIT_RATE_COLUMN])
})

columns.append({
  "name": f"Volza Magnesium Unit Rate Spikes (China, {volza_magnesium_unit_rate_china['Spikes'].min()} - {volza_magnesium_unit_rate_china['Spikes'].max()})", 
  "date": volza_magnesium_unit_rate_china.index, 
  "value": zero_max_normalize(volza_magnesium_unit_rate_china['Spikes'])
})

columns.append({
  "name": f"Volza Magnesium Unit Rate (Netherlands, {volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN].max()})", 
  "date": volza_magnesium_unit_rate_netherlands.index, 
  "value": zero_max_normalize(volza_magnesium_unit_rate_netherlands[UNIT_RATE_COLUMN])
})

columns.append({
  "name": f"Volza Magnesium Unit Rate Spikes (Netherlands, {volza_magnesium_unit_rate_netherlands['Spikes'].min()} - {volza_magnesium_unit_rate_netherlands['Spikes'].max()})", 
  "date": volza_magnesium_unit_rate_netherlands.index, 
  "value": zero_max_normalize(volza_magnesium_unit_rate_netherlands['Spikes'])
})

# columns.append({
#   "name": f"Volza Magnesium Unit Rate (Turkey, {volza_magnesium_unit_rate_turkey[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate_turkey[UNIT_RATE_COLUMN].max()})", 
#   "date": volza_magnesium_unit_rate_turkey.index, 
#   "value": zero_max_normalize(volza_magnesium_unit_rate_turkey[UNIT_RATE_COLUMN])
# })

# columns.append({
#   "name": f"Volza Magnesium Unit Rate (Germany, {volza_magnesium_unit_rate_germany[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate_germany[UNIT_RATE_COLUMN].max()})", 
#   "date": volza_magnesium_unit_rate_germany.index, 
#   "value": zero_max_normalize(volza_magnesium_unit_rate_germany[UNIT_RATE_COLUMN])
# })

# columns.append({
#   "name": f"Volza Magnesium Unit Rate (Belgium, {volza_magnesium_unit_rate_belgium[UNIT_RATE_COLUMN].min()} - {volza_magnesium_unit_rate_belgium[UNIT_RATE_COLUMN].max()})", 
#   "date": volza_magnesium_unit_rate_belgium.index, 
#   "value": zero_max_normalize(volza_magnesium_unit_rate_belgium[UNIT_RATE_COLUMN])
# })



# columns.append({
#   "name": f"Magnesium Spot Price ({magnesium_spot_price['Open'].min()} - {magnesium_spot_price['Open'].max()}", 
#   "date": magnesium_spot_price.index, 
#   "value": zero_max_normalize(magnesium_spot_price['Open'])
# })

columns.append({
  "name": f"Average Petroleum Price ({volza_petroleum['Value'].min()} - {volza_petroleum['Value'].max()})", 
  "date": volza_petroleum.index, 
  "value": zero_max_normalize(volza_petroleum["Value"])
})

# columns.append({
#   "name": f"Brent Petroleum Price ({volza_petroleum_brent['Value'].min()} - {volza_petroleum_brent['Value'].max()})", 
#   "date": volza_petroleum_brent.index, 
#   "value": zero_max_normalize(volza_petroleum_brent["Value"])
# })


# columns.append({
#   "name": f"WTI Petroleum Price ({volza_petroleum_wti['Value'].min()} - {volza_petroleum_wti['Value'].max()})", 
#   "date": volza_petroleum_wti.index, 
#   "value": zero_max_normalize(volza_petroleum_wti["Value"])
# })

In [9]:
# palette = list(seaborn.color_palette(palette='viridis', n_colors=len(columns)).as_hex())
# figure = plotly.graph_objs.Figure()

# for column, color in zip(columns, palette):
#     figure.add_trace(plotly.graph_objs.Scatter(x = column["date"],
#                              y = column["value"],
#                              name = column["name"],
#                              line_color = color, 
#                              fill='tozeroy'))
# figure.show()
    
if not os.path.exists("images"):
    os.mkdir("images")

for column_1 in columns:
    for column_2 in columns:
        if column_1 == column_2:
            continue
        palette = list(seaborn.color_palette(palette='viridis', n_colors=2).as_hex())
        figure = plotly.graph_objs.Figure()
        figure.add_trace(plotly.graph_objs.Scatter(x = column_1["date"],
                                y = column_1["value"],
                                name = column_1["name"],
                                line_color = palette[0], 
                                fill='tozeroy'))
        figure.add_trace(plotly.graph_objs.Scatter(x = column_2["date"],
                                y = column_2["value"],
                                name = column_2["name"],
                                line_color = palette[1], 
                                fill='tozeroy'))
        # figure.write_image(f"images/_{column_1['name']}_{column_2['name']}.png")
        figure.show()