# China imports from Russia by commodity

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
from datetime import timedelta
import numpy as np
import glob

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

### Get commodity codes lookup

In [4]:
code_src = pd.read_csv(
    "https://raw.githubusercontent.com/datasets/harmonized-system/master/data/harmonized-system.csv",
    dtype={"level": str},
)

In [5]:
code_df = code_src[(code_src["parent"] == "TOTAL") | (code_src["level"] == "4")][
    ["hscode", "description", "level"]
]

In [6]:
code_df.rename(columns={"hscode": "code"}, inplace=True)

In [7]:
code_df.head()

Unnamed: 0,code,description,level
0,1,Animals; live,2
1,101,"Horses, asses, mules and hinnies; live",4
6,102,Bovine animals; live,4
12,103,Swine; live,4
16,104,Sheep and goats; live,4


---

### Get country codes

In [8]:
countries = pd.read_csv("../data/raw/TradingPartner.csv", encoding="GBK")

In [9]:
countries.rename(
    columns={
        "CODES": "trade_partner_code",
        "DESCRIPTION": "trade_partner_name",
    },
    inplace=True,
)

### Get exporting provinces

In [10]:
places = pd.read_csv("../data/raw/TradeCoPort.csv", encoding="GBK")

In [11]:
places.rename(
    columns={
        "CODE": "registration_place_code",
        "DESCRIPTION": "registration_place_name",
    },
    inplace=True,
)

---

### Import processed data

In [12]:
src = pd.read_csv("../data/raw/China imports from Russia 2021.csv", encoding="GBK")

In [13]:
src.columns = (
    src.columns.str.replace(r"\t", "", regex=True)
    .str.replace(" ", "_", regex=False)
    .str.lower()
)

In [14]:
src.rename(
    columns={
        "数据年月": "date_of_data",
        "商品编码": "commodity_code",
        "商品名称": "commodity_name",
        "贸易伙伴编码": "trade_partner_code",
        "贸易伙伴名称": "trade_partner_name",
        "第一数量": "first_quantity",
        "第一计量单位": "first_unit_of_measure",
        "第二数量": "second_quantity",
        "第二计量单位": "second_unit_of_measure",
        "美元": "us_dollar",
    },
    inplace=True,
)

In [15]:
src = src.replace(r"\t", "", regex=True)

In [16]:
src["us_dollar"] = src["us_dollar"].str.replace(",", "", regex=False).astype(int)

In [17]:
src["commodity_code"] = src["commodity_code"].astype(str)

In [18]:
src.drop(["unnamed:_9"], axis=1, inplace=True)

In [19]:
df = src.copy()

In [20]:
df.head()

Unnamed: 0,trade_partner_code,trade_partner_name,commodity_code,commodity_name,first_quantity,first_unit_of_measure,second_quantity,second_unit_of_measure,us_dollar
0,344,俄罗斯联邦,2012000,鲜或冷的带骨牛肉,4809,千克,0,?,100126
1,344,俄罗斯联邦,2013000,鲜或冷的去骨牛肉,224722,千克,0,?,3407709
2,344,俄罗斯联邦,2022000,冻带骨牛肉,4744699,千克,0,?,15902831
3,344,俄罗斯联邦,2023000,冻去骨牛肉,14829654,千克,0,?,120082914
4,344,俄罗斯联邦,2062900,其他冻牛杂碎,189244,千克,0,?,636385


---

### Create commodity code columns based on slices of others

In [21]:
df["commodity_chapter"] = df["commodity_code"].str[:2]
df["commodity_chapter_four"] = df["commodity_code"].str[:4]

In [22]:
df.head()

Unnamed: 0,trade_partner_code,trade_partner_name,commodity_code,commodity_name,first_quantity,first_unit_of_measure,second_quantity,second_unit_of_measure,us_dollar,commodity_chapter,commodity_chapter_four
0,344,俄罗斯联邦,2012000,鲜或冷的带骨牛肉,4809,千克,0,?,100126,20,2012
1,344,俄罗斯联邦,2013000,鲜或冷的去骨牛肉,224722,千克,0,?,3407709,20,2013
2,344,俄罗斯联邦,2022000,冻带骨牛肉,4744699,千克,0,?,15902831,20,2022
3,344,俄罗斯联邦,2023000,冻去骨牛肉,14829654,千克,0,?,120082914,20,2023
4,344,俄罗斯联邦,2062900,其他冻牛杂碎,189244,千克,0,?,636385,20,2062


### Merge the commodity codes with our export data and remove columns we don't need

In [23]:
merge = df.merge(code_df, left_on="commodity_chapter", right_on="code")

In [24]:
merge.head(1)

Unnamed: 0,trade_partner_code,trade_partner_name,commodity_code,commodity_name,first_quantity,first_unit_of_measure,second_quantity,second_unit_of_measure,us_dollar,commodity_chapter,commodity_chapter_four,code,description,level
0,344,俄罗斯联邦,2012000,鲜或冷的带骨牛肉,4809,千克,0,?,100126,20,2012,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2


In [25]:
merge.drop(
    [
        "commodity_name",
        "code",
    ],
    axis=1,
    inplace=True,
)

In [26]:
merge.rename(columns={"description": "commodity_chapter_desc"}, inplace=True)

In [27]:
merge_df = pd.merge(
    merge,
    code_df[["code", "description"]],
    left_on="commodity_chapter_four",
    right_on="code",
)

In [28]:
merge_df.drop(["code", "level"], axis=1, inplace=True)

In [29]:
merge_df.rename(
    columns={
        "description": "commodity_chapter_four_desc",
    },
    inplace=True,
)

### Behold, a dataframe

In [30]:
merged_df = merge_df[
    [
        "trade_partner_code",
        "trade_partner_name",
        "commodity_code",
        "commodity_chapter",
        "commodity_chapter_desc",
        "commodity_chapter_four",
        "commodity_chapter_four_desc",
        "first_unit_of_measure",
        "second_quantity",
        "second_unit_of_measure",
        "us_dollar",
    ]
].copy()

In [31]:
df = pd.merge(merged_df, countries, on=["trade_partner_code"])

In [32]:
df.drop(
    [
        "trade_partner_code",
        "trade_partner_name_x",
    ],
    axis=1,
    inplace=True,
)
df.rename(
    columns={
        "trade_partner_name_y": "exported_to",
    },
    inplace=True,
)

In [33]:
df.head()

Unnamed: 0,commodity_code,commodity_chapter,commodity_chapter_desc,commodity_chapter_four,commodity_chapter_four_desc,first_unit_of_measure,second_quantity,second_unit_of_measure,us_dollar,exported_to
0,20011000,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2001,"Vegetables, fruit, nuts and other edible parts of plants; prepared or preserved by vinegar or acetic acid",千克,0,?,188995,Russian Federation
1,20019090,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2001,"Vegetables, fruit, nuts and other edible parts of plants; prepared or preserved by vinegar or acetic acid",千克,0,?,22719,Russian Federation
2,20052000,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2005,"Vegetables preparations n.e.c.; prepared or preserved otherwise than by vinegar or acetic acid, not frozen, other than products of heading no. 2006",千克,0,?,585020,Russian Federation
3,20054000,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2005,"Vegetables preparations n.e.c.; prepared or preserved otherwise than by vinegar or acetic acid, not frozen, other than products of heading no. 2006",千克,0,?,2509,Russian Federation
4,20055990,20,"Preparations of vegetables, fruit, nuts or other parts of plants",2005,"Vegetables preparations n.e.c.; prepared or preserved otherwise than by vinegar or acetic acid, not frozen, other than products of heading no. 2006",千克,0,?,1116,Russian Federation


In [34]:
df.to_csv("../data/processed/china_imports_from_russia.csv", index=False)