In [1]:
import numpy as np
import pandas as pd
import os
import json
import re
import math
import warnings

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
'''
Calculating carbon emissions for three months and three particle sizes (New edition)
Multiple companies cooperate in simulation
'''
with open('../data/city_name_list.json', 'r') as f:
    city_name_list = json.load(f)

car_weight,man_weight,pkg_weight = 350,69.6,1.38
coeff_ele = 0.000113  # One kilogram of carbon is emitted per kilogram of objects transported
coeff_oil = 0.15 # 1km of carbon emissions of oil vehicles kg
cm_weight = car_weight + man_weight

df_result = pd.read_csv('../data/waybill_base_20240911.csv')

company_list = ['JD','SF','YT','YD','ZT','ST']
num_cooperator = len(company_list)
month_list = ['2023_01','2023_07','2024_01']

for month in month_list:
    print(month)
    for company in company_list:
        # Initialize the value and complete it at the end
        df_result[f'sum_Ce_{company}_{month}'] = 0
        df_result[f'average_operator_Ce_{company}_{month}'] = 0
        df_result[f'average_package_Ce_{company}_{month}'] = 0
        
        filepath = f'../../{company}/final_result/{company}_final_result_{month}.csv'
        if not os.path.exists(filepath):
            continue
        print(company)
        df_company = pd.read_csv(filepath,low_memory=False)

        # Multiply the different distances by the new coefficients
        df_company['aoi_distance'] = df_company['aoi_distance'].astype(float) / math.sqrt(num_cooperator)
        df_company['site_distance_all'] = df_company['site_distance_all'].astype(float) / math.sqrt(num_cooperator)
        df_company['stay_distance'] = df_company['stay_distance'].astype(float) / num_cooperator
        df_company['all_distance'] = df_company['aoi_distance'] + df_company['site_distance_all'] + df_company['stay_distance']

        # data filter
        data = df_company['all_distance'].to_numpy()
        Q1 = np.percentile(data, 25)
        Q3 = np.percentile(data, 75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_company = df_company[(df_company['all_distance']>lower_bound)&(df_company['all_distance']<upper_bound)]
        
        city_company_list = df_company['city'].unique().tolist()
        
        for city in city_company_list:
            df_city = df_company[df_company['city'] == city].reset_index()
            
            # Look for the city name in waybill_base
            flag = False
            repeat_list = []
            city_res = '' 
            for temp_list in city_name_list:
                # Have the same name, go through the same name to the horse
                if city in temp_list:
                    repeat_list = temp_list
                    for city_temp in repeat_list:
                        if city_temp in df_result['city'].values:
                            city_res = city_temp
                            flag = True
                            break
            if len(repeat_list) == 0:
                if city in df_result['city'].values:
                    city_res = city
                    flag = True
        
            if not flag:
                print(f'match error：{city}')
                continue

            # We randomly assign energy to each guy in proportion to the trolley
            elevator_ratio = df_result.loc[df_result['city']==city_res,f'elevator_ratio_{month}'].values
            df_city['energy'] = np.random.rand(len(df_city)) # 0 electricity 1 oil
            df_city['energy'] = df_city['energy'].apply(lambda x: 1 if x > elevator_ratio else 0)

            pkg_num = df_result.loc[df_result['city']==city_res,f'num_package_{company}_{month}'].values[0]
            
            pkg_avg_operator = int(pkg_num/len(df_city)) # The number of packages in the city is higher than the number of small brothers
            if pkg_avg_operator < 1:
                pkg_avg_operator = 1
            
            # Calculate each guy's carbon footprint
            df_city['Ce'] = 0 # Carbon emission, unit kg
            df_city.loc[df_city['energy']==1,'Ce'] = df_city['all_distance'].astype(float) * coeff_oil
            df_city.loc[df_city['energy']==0,'Ce'] = coeff_ele * (cm_weight * df_city['all_distance'].astype(float) + pkg_weight * pkg_avg_operator * df_city['all_distance'].astype(float)/df_city['wave_count'].astype(int)/2)
            
            # Total emissions of city particle size
            city_sum_Ce = df_city['Ce'].sum()
            # The average emission of city particle size
            num_operator = df_result.loc[df_result['city']==city_res,f'num_operator_{company}_{month}'].values[0]
            city_average_operator_Ce = city_sum_Ce / num_operator
            # The average emission of city particle size parcels            
            city_average_package_Ce = city_sum_Ce / pkg_num
            
            df_result.loc[df_result['city']==city_res, f'sum_Ce_{company}_{month}'] = city_sum_Ce
            df_result.loc[df_result['city']==city_res, f'average_operator_Ce_{company}_{month}'] = city_average_operator_Ce
            df_result.loc[df_result['city']==city_res, f'average_package_Ce_{company}_{month}'] = city_average_package_Ce

df_result.to_csv('../data/cooperator_sum_result_20240924.csv',index=False)

In [None]:
'''
Calculating carbon emissions for three months and three particle sizes (New edition)
'''

with open('../data/city_name_list.json', 'r') as f:
    city_name_list = json.load(f)

car_weight,man_weight,pkg_weight = 350,69.6,1.38
coeff_ele = 0.000113  # One kilogram of carbon is emitted per kilogram of objects transported
coeff_oil = 0.15 # 1km of carbon emissions of oil vehicles kg
cm_weight = car_weight + man_weight

df_result = pd.read_csv('../data/cooperator_sum_result_20240924.csv')

company_list = ['JD','SF','YT','YD','ZT','ST']
month_list = ['2023_01','2023_07','2024_01']

for month in month_list:
    print(month)
    for company in company_list:
        # Initialize the value and complete it at the end
        df_result[f'sum_Ce_{company}_{month}'] = 0
        df_result[f'average_operator_Ce_{company}_{month}'] = 0
        df_result[f'average_package_Ce_{company}_{month}'] = 0
        
        filepath = f'../../{company}/final_result/{company}_final_result_{month}.csv'
        if not os.path.exists(filepath):
            continue
        print(company)
        df_company = pd.read_csv(filepath,low_memory=False)

        # Data filtering
        data = df_company['all_distance'].to_numpy()
        Q1 = np.percentile(data, 25)
        Q3 = np.percentile(data, 75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_company = df_company[(df_company['all_distance']>lower_bound)&(df_company['all_distance']<upper_bound)]
        
        city_company_list = df_company['city'].unique().tolist()
        
        for city in city_company_list:
            df_city = df_company[df_company['city'] == city].reset_index()
            
            # Look for the city name in waybill_base
            flag = False
            repeat_list = []
            city_res = '' 
            for temp_list in city_name_list:
                # Have the same name, go through the same name to the horse
                if city in temp_list:
                    repeat_list = temp_list
                    for city_temp in repeat_list:
                        if city_temp in df_result['city'].values:
                            city_res = city_temp
                            flag = True
                            break
            if len(repeat_list) == 0:
                if city in df_result['city'].values:
                    city_res = city
                    flag = True
        
            if not flag:
                print(f'match error：{city}')
                continue

            # We randomly assign energy to each guy in proportion to the trolley
            elevator_ratio = df_result.loc[df_result['city']==city_res,f'elevator_ratio_{month}'].values
            df_city['energy'] = np.random.rand(len(df_city)) # 0 electricity 1 oil
            df_city['energy'] = df_city['energy'].apply(lambda x: 1 if x > elevator_ratio else 0)

            pkg_num = df_result.loc[df_result['city']==city_res,f'num_package_{company}_{month}'].values[0]
            
            pkg_avg_operator = int(pkg_num/len(df_city)) # The number of packages in the city is higher than the number of small brothers
            if pkg_avg_operator < 1:
                pkg_avg_operator = 1
            
            # Calculate each guy's carbon footprint
            df_city['Ce'] = 0 # Carbon emission, unit kg
            df_city.loc[df_city['energy']==1,'Ce'] = df_city['all_distance'] * coeff_oil
            df_city.loc[df_city['energy']==0,'Ce'] = coeff_ele * (cm_weight * df_city['all_distance'] + pkg_weight * pkg_avg_operator * df_city['all_distance']/df_city['wave_count']/2)
            
            # Total emissions of city particle size
            city_sum_Ce = df_city['Ce'].sum()
            # The average emission of city particle size
            num_operator = df_result.loc[df_result['city']==city_res,f'num_operator_{company}_{month}'].values[0]
            city_average_operator_Ce = city_sum_Ce / num_operator
            # The average emission of city particle size parcels          
            city_average_package_Ce = city_sum_Ce / pkg_num
            
            df_result.loc[df_result['city']==city_res, f'sum_Ce_{company}_{month}'] = city_sum_Ce
            df_result.loc[df_result['city']==city_res, f'average_operator_Ce_{company}_{month}'] = city_average_operator_Ce
            df_result.loc[df_result['city']==city_res, f'average_package_Ce_{company}_{month}'] = city_average_package_Ce

df_result.to_csv('../data/cooperator_sum_result_20240924.csv',index=False)

In [None]:
'''
Calculate the distance of three months and three grains
'''
with open('../data/city_name_list.json', 'r') as f:
    city_name_list = json.load(f)

df_result = pd.read_csv('../data/cooperator_sum_result_20240924.csv')

company_list = ['JD','SF','YT','YD','ZT','ST']
month_list = ['2023_01','2023_07','2024_01']

for month in month_list:
    print(month)
    for company in company_list:
        print(company)
        df_result[f'sum_distance_{company}_{month}'] = 0
        df_result[f'average_operator_distance_{company}_{month}'] = 0
        df_result[f'average_package_distance_{company}_{month}'] = 0
        filepath = f'../../{company}/final_result/{company}_final_result_{month}.csv'
        if not os.path.exists(filepath):
            continue
        df_company = pd.read_csv(filepath,low_memory=False)

        if company in ['SF','ZT','YT','YD']:
            df_company = df_company.drop_duplicates(subset=['courier','phone'])
        if company == 'JD':
            df_company = df_company.drop_duplicates(subset=['operator_id'])

        # data filtering
        data = df_company['all_distance'].to_numpy()
        Q1 = np.percentile(data, 25)
        Q3 = np.percentile(data, 75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_company = df_company[(df_company['all_distance']>lower_bound)&(df_company['all_distance']<upper_bound)]
        
        city_company_list = df_company['city'].unique().tolist()

        for city in city_company_list:
            df_city = df_company[df_company['city'] == city].reset_index()
    
            # Look for the city name in waybill_base
            flag = False
            repeat_list = []
            city_res = '' 
            for temp_list in city_name_list:
                if city in temp_list:
                    repeat_list = temp_list
                    for city_temp in repeat_list:
                        if city_temp in df_result['city'].values:
                            city_res = city_temp
                            flag = True
                            break
            if len(repeat_list) == 0:
                if city in df_result['city'].values:
                    city_res = city
                    flag = True
        
            if not flag:
                print(f'match error：{city}')
                continue
            
            # Total distance of city granularity
            city_sum_distance = df_city['all_distance'].sum()
            # The average distance of city granularity
            num_operator = df_result.loc[df_result['city']==city_res,f'num_operator_{company}_{month}'].values[0]
            city_average_operator_distance = city_sum_distance / num_operator
            pkg_num = df_result.loc[df_result['city']==city_res,f'num_package_{company}_{month}'].values[0]
            city_average_package_distance = city_sum_distance / pkg_num
            df_result.loc[df_result['city']==city_res, f'sum_distance_{company}_{month}'] = city_sum_distance
            df_result.loc[df_result['city']==city_res, f'average_operator_distance_{company}_{month}'] = city_average_operator_distance
            df_result.loc[df_result['city']==city_res, f'average_package_distance_{company}_{month}'] = city_average_package_distance

df_result.to_csv('../data/cooperator_sum_result_20240924.csv',index=False)

In [7]:
'''
Complete missing data
'''
df = pd.read_csv('../data/cooperator_sum_result_20240924.csv')

city_exclude = ['China-North Korea Joint Area', 'Sansha City', 'Taiwan', 'Hong Kong', 'Macau', 'Shuanghe City']
month_list = ['2024_01','2023_07','2023_01']
company_list = ['JD','SF','YT','YD','ZT','ST']
feature_list = ['sum_Ce','average_operator_Ce','average_package_Ce','num_operator','sum_distance','average_operator_distance','average_package_distance']
city_level_list = ['First-tier cities', 'New first-tier cities', 'Second-tier cities', 'Third-tier cities', 'Fourth-tier cities', 'Fifth-tier cities']

for feature in feature_list:
    print(feature)
    dict_2024_01 = {}  # Temporarily store dict 2024 for use in 202301 and 202307
    for month in month_list:
        print(month)
        
        # Build the dictionary for the current month
        ratio_dict = {}  # ratio=company_base/company_target
        for company_base in company_list:
            ratio_dict[company_base] = {}
            for city_level in city_level_list:
                ratio_dict[company_base][city_level] = {}
                for company_target in company_list:
                    if month != '2024_01' and company_base == 'ST':
                        ratio_dict[company_base][city_level][company_target] = dict_2024_01['ST'][city_level][company_target]
                    elif month != '2024_01' and company_target == 'ST':
                        ratio_dict[company_base][city_level][company_target] = dict_2024_01[company_base][city_level]['ST']
                    else:
                        df_temp = df[(df[f'{feature}_{company_base}_{month}'] > 0) & (df[f'{feature}_{company_target}_{month}'] > 0) & (df['city_level'] == city_level)]
                        ratio = (df_temp[f'{feature}_{company_base}_{month}'] / df_temp[f'{feature}_{company_target}_{month}']).mean()
                        ratio_dict[company_base][city_level][company_target] = ratio

        if month == '2024_01':
            dict_2024_01 = ratio_dict
            
        # The missing values are populated according to the value of ratio_dict
        # Go through every city
        for i in range(len(df)):  
            city_name = df.loc[i,'city']
            if city_name not in city_exclude:
                city_level = df.loc[i,'city_level']
                for company_base in company_list:
                    if df.loc[i,f'{feature}_{company_base}_{month}'] == 0:
                        company_houxuan_list = []
                        if company_base in ['JD','SF']:
                            company_houxuan_list = ['JD','SF']
                        if company_base in ['YT','YD','ZT','ST']:
                            company_houxuan_list = ['YT','YD','ZT','ST']
                        data_list = []
                        for company_houxuan in company_houxuan_list:
                            if company_houxuan == company_base:
                                continue
                            elif df.loc[i,f'{feature}_{company_houxuan}_{month}'] > 0:
                                data = df.loc[i,f'{feature}_{company_houxuan}_{month}'] * ratio_dict[company_base][city_level][company_houxuan]
                                data_list.append(data)
                        if len(data_list) > 0:
                            df.loc[i,f'{feature}_{company_base}_{month}'] = sum(data_list)/len(data_list)
                        else:
                            company_houxuan_list = company_list
                            for company_houxuan in company_houxuan_list:
                                if company_houxuan == company_base:
                                    continue
                                elif df.loc[i,f'{feature}_{company_houxuan}_{month}'] > 0:
                                    data = df.loc[i,f'{feature}_{company_houxuan}_{month}'] * ratio_dict[company_base][city_level][company_houxuan]
                                    data_list.append(data)
                            if len(data_list) > 0:
                                df.loc[i,f'{feature}_{company_base}_{month}'] = sum(data_list)/len(data_list)
                            else:
                                pass

df.to_csv('../data/quan_cooperator_sum_result_20240924.csv',index=False)

sum_Ce
2024_01
2023_07
2023_01
average_operator_Ce
2024_01
2023_07
2023_01
average_package_Ce
2024_01
2023_07
2023_01
num_operator
2024_01
2023_07
2023_01
sum_distance
2024_01
2023_07
2023_01
average_operator_distance
2024_01
2023_07
2023_01
average_package_distance
2024_01
2023_07
2023_01


In [None]:
'''
Calculate the effect of emission reduction, multiple companies cooperate to distribute
'''
def cal_sum_Ce(df,month):
    company_list = ['JD','SF','YT','YD','ZT','ST']
    df[f'sum_Ce_{month}'] = 0
    for company in company_list:
        df[f'sum_Ce_{month}'] = df[f'sum_Ce_{month}'] + df[f'sum_Ce_{company}_{month}']
    return df[f'sum_Ce_{month}'].sum()

month = '2024_01'
df_origin = pd.read_csv('../data/quan_sum_result_20240911.csv')
sum_Ce_origin = cal_sum_Ce(df_origin,month)
print(sum_Ce_origin)
df_cooperator = pd.read_csv('../data/quan_cooperator_sum_result_20240924.csv')
sum_Ce_cooperator = cal_sum_Ce(df_cooperator,month)
print(sum_Ce_cooperator)
des_ratio = (sum_Ce_origin - sum_Ce_cooperator) / sum_Ce_origin
print(des_ratio)