## Import modules

In [24]:
import pandas as pd
import numpy as np
import geopandas as gpd

## Read in v4.1 curated cement and steel data

In [2]:
# Read in cement data - v4.1
cement_xlsx = "../../resources/cement_dataset_v4.1.csv"
cement_pd = pd.read_csv(cement_xlsx)

# Read in steel data - v4.1
steel_xlsx = "../../resources/steel_dataset_v4.1.csv"
steel_pd = pd.read_csv(steel_xlsx)

## Examine cement plant statistics

### Global cement plants - operating

In [3]:
# Total number of operating cement plants
opr_cement_pd = cement_pd[cement_pd["status"] == "Operating"]
tot_cement_cnt = len(opr_cement_pd["uid"])
print("Global count of operating cement plants: ", tot_cement_cnt)

Global count of operating cement plants:  3472


### China, not-China cement plants

In [4]:
# Count of China versus not-China cement plants
opr_cement_pd = opr_cement_pd.assign(in_china = (opr_cement_pd.country == "China"))

cntry_cement_cnt = opr_cement_pd.groupby("in_china")["uid"].nunique()
print("Count of China/Not-China cement plants: ")
cntry_cement_cnt

Count of China/Not-China cement plants: 


in_china
False    2204
True     1268
Name: uid, dtype: int64

### China, integrated/not-integrated cement plants

In [5]:
cement_chn_pd = opr_cement_pd[opr_cement_pd["country"] == "China"]
cement_chn_pd = cement_chn_pd.assign(plant_type_status = \
                                     np.where(cement_chn_pd["plant_type"].isnull(), "Unknown", "Known"))
                                     
intg_cement_cnt = cement_chn_pd.groupby("plant_type")["uid"].nunique()
print("Count of cement plants by production type in China:")
intg_cement_cnt

Count of cement plants by production type in China:


plant_type
Grinding       49
Integrated    801
Name: uid, dtype: int64

In [6]:
unknwn_cement_cnt = cement_chn_pd.groupby("plant_type_status")["uid"].nunique()
print("Count of cement plants by known/unknown production types in China:")
unknwn_cement_cnt

Count of cement plants by known/unknown production types in China:


plant_type_status
Known      850
Unknown    418
Name: uid, dtype: int64

### China, integrated cement, exact/approximate locations

In [7]:
cement_chn_intg = cement_chn_pd[cement_chn_pd["plant_type"] == "Integrated"]
exact_chn_intg_cnt = cement_chn_intg.groupby("accuracy")["uid"].nunique()
print("Count of integrated cement plants in China, by location status:")
exact_chn_intg_cnt

Count of integrated cement plants in China, by location status:


accuracy
Approximate    397
Exact          404
Name: uid, dtype: int64

### Operating plants with known capacity

In [8]:
knwn_cap_cement_pd = opr_cement_pd[(opr_cement_pd["reported_capacity"].notnull()) & \
                                   (opr_cement_pd["accuracy"] == "Exact")] # & \
                                 #  (opr_cement_pd["plant_type"] == "Integrated")]
print("Number of operating cement plants with exact locations and known capacity:", len(knwn_cap_cement_pd))

Number of operating cement plants with exact locations and known capacity: 366


### China, grinding or unknown plant type

In [9]:
grnd_unknw_type_pd = cement_chn_pd[(cement_chn_pd["plant_type"] == "Grinding") | \
                                   (cement_chn_pd["plant_type_status"] == "Unknown")]

print("Number of operating, grinding or unknown type cement plants in China:", len(grnd_unknw_type_pd))

grnd_unkwn_loc_cnt = grnd_unknw_type_pd.groupby("accuracy")["uid"].nunique()
print("Count of grinding or unknown type cement plants in China by location status:")
print(grnd_unkwn_loc_cnt)

Number of operating, grinding or unknown type cement plants in China: 467
Count of grinding or unknown type cement plants in China by location status:
accuracy
Approximate     26
Exact          441
Name: uid, dtype: int64


### Cement plants in Asia, excluding China

In [10]:
asia_cntry_pd = pd.read_csv("countries-asia-2020.csv")
asia_cement_pd = opr_cement_pd.merge(asia_cntry_pd, how="inner", on="country")
asia_cement_pd = asia_cement_pd[asia_cement_pd["country"] != "China"]

print("Number of operating cement plants in Asia (excluding China): ", len(asia_cement_pd))

Number of operating cement plants in Asia (excluding China):  877


In [11]:
asia_exct_cnt = asia_cement_pd.groupby("accuracy")["uid"].nunique()
print("Count of cement plants in Asia (excluding China) by location status:")
print(asia_exct_cnt)

Count of cement plants in Asia (excluding China) by location status:
accuracy
Approximate    135
Exact          742
Name: uid, dtype: int64


### Cement plants, rest of world

In [12]:
othr_cement_pd = opr_cement_pd[~opr_cement_pd["uid"].isin(asia_cement_pd["uid"].tolist()) & \
                               ~opr_cement_pd["in_china"]]
print("Number of operating cement plants outside Asia: ", len(othr_cement_pd))

othr_exct_cnt = othr_cement_pd.groupby("accuracy")["uid"].nunique()
print("Count of cement plants outside Asia by location status:")
print(othr_exct_cnt)

Number of operating cement plants outside Asia:  1327
Count of cement plants outside Asia by location status:
accuracy
Approximate     107
Exact          1220
Name: uid, dtype: int64


## Examine steel plant statistics

### Global steel plants - operating

In [13]:
# Total number of operating steel plants
opr_steel_pd = steel_pd[steel_pd["status"] == "Operating"]
tot_steel_cnt = len(opr_steel_pd["uid"])
print("Global count of operating steel plants: ", tot_steel_cnt)

Global count of operating steel plants:  2032


### China, not-China steel plants

In [14]:
# Count of China versus not-China steel plants
opr_steel_pd = opr_steel_pd.assign(in_china = (opr_steel_pd.country == "China"))

cntry_steel_cnt = opr_steel_pd.groupby("in_china")["uid"].nunique()
print("Count of China/Not-China steel plants: ")
cntry_steel_cnt

Count of China/Not-China steel plants: 


in_china
False    1721
True      311
Name: uid, dtype: int64

### China, exact/approximate locations

In [15]:
steel_chn_pd = opr_steel_pd[opr_steel_pd["country"] == "China"]
exact_chn_steel_cnt = steel_chn_pd.groupby("accuracy")["uid"].nunique()
print("Count of steel plants in China, by location status:")
exact_chn_steel_cnt

Count of steel plants in China, by location status:


accuracy
Approximate    183
Exact          128
Name: uid, dtype: int64

### Steel plants in Asia, excluding China

In [16]:
asia_steel_pd = opr_steel_pd.merge(asia_cntry_pd, how="inner", on="country")
asia_steel_pd = asia_steel_pd[asia_steel_pd["country"] != "China"]

print("Number of operating steel plants in Asia (excluding China): ", len(asia_steel_pd))

Number of operating steel plants in Asia (excluding China):  501


In [17]:
asia_exct_steel_cnt = asia_steel_pd.groupby("accuracy")["uid"].nunique()
print("Count of steel plants in Asia (excluding China) by location status:")
print(asia_exct_steel_cnt)

Count of steel plants in Asia (excluding China) by location status:
accuracy
Approximate    133
Exact          368
Name: uid, dtype: int64


### Steel plants, rest of the world

In [18]:
othr_steel_pd = opr_steel_pd[~opr_steel_pd["uid"].isin(asia_steel_pd["uid"].tolist()) & \
                               ~opr_steel_pd["in_china"]]
print("Number of operating steel plants outside Asia: ", len(othr_steel_pd))

othr_exct_steel_cnt = othr_steel_pd.groupby("accuracy")["uid"].nunique()
print("Count of steel plants outside Asia by location status:")
print(othr_exct_steel_cnt)

Number of operating steel plants outside Asia:  1220
Count of steel plants outside Asia by location status:
accuracy
Approximate     192
Exact          1028
Name: uid, dtype: int64


## Get samples for CloudFactory

In [27]:
# Integrated cement plants in China with exact locations
cement_samp_int_pd = opr_cement_pd[(opr_cement_pd["country"] == "China") & \
                                   (opr_cement_pd["plant_type"] == "Integrated") & \
                                   (opr_cement_pd["accuracy"] == "Exact")].sample(n=5)
# Grinding cement plants in China with exact locations
cement_samp_grd_pd = opr_cement_pd[(opr_cement_pd["country"] == "China") & \
                                   (opr_cement_pd["plant_type"] == "Grinding") & \
                                   (opr_cement_pd["accuracy"] == "Exact")].sample(n=5)

# Concatenate and write to geojson
cement_samp_pd = cement_samp_int_pd.append(cement_samp_grd_pd)
cement_samp_gpd = gpd.GeoDataFrame(cement_samp_pd, \
                                   geometry=gpd.points_from_xy(cement_samp_pd.longitude, cement_samp_pd.latitude),
                                   crs="EPSG:4326")
cement_samp_gpd.to_file("cement_China_samples.geojson", driver='GeoJSON')

In [31]:
# Steel plants in China with exact locations
steel_samp_pd = opr_steel_pd[(opr_steel_pd["country"] == "China") & \
                             (opr_steel_pd["accuracy"] == "Exact")].sample(n=10)

# Write to geojson
steel_samp_gpd = gpd.GeoDataFrame(steel_samp_pd, \
                                  geometry=gpd.points_from_xy(steel_samp_pd.longitude, steel_samp_pd.latitude),
                                  crs="EPSG:4326")
steel_samp_gpd.to_file("steel_China_samples.geojson", driver='GeoJSON')