In [None]:
import numpy as np
import pandas as pd
import os
import json
import re
import math
import warnings

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
'''
Count the number of couriers
'''
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
company_list = ['JD','SF','YT','YD','ZT','ST']
num_courier = 0

for company in company_list:
    company_num_courier = df[f'num_operator_{company}_2024_01'].sum()
    num_courier += company_num_courier
    print(f'{company}:{company_num_courier}')
print(num_courier)

In [None]:
'''
Count the number of cities at each level
'''
company_list = ['JD','SF','YT','YD','ZT','ST']
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
city_level_list = ['First-tier cities', 'New first-tier cities', 'Second-tier cities', 'Third-tier cities', 'Fourth-tier cities', 'Fifth-tier cities']
for city_level in city_level_list:
    num = len(df[df['city_level']==city_level])
    print(f'{city_level}:{num}')

In [None]:
# The law of city-level carbon emission was analyzed
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
company_list = ['JD','SF','YT','YD','ZT','ST']
df['sum_Ce_2024_01'] = 0
for company in company_list:
    df['sum_Ce_2024_01'] += df[f'sum_Ce_{company}_2024_01']
df = df[df['sum_Ce_2024_01']>0]

city_level_list = ['First-tier cities', 'New first-tier cities', 'Second-tier cities', 'Third-tier cities', 'Fourth-tier cities', 'Fifth-tier cities']
for city_level in city_level_list:
    df_temp = df[df['city_level']==city_level]
    Ce_mean = df_temp['sum_Ce_2024_01'].mean()
    print(f'{city_level}:{Ce_mean}')
Ce_mean = df['sum_Ce_2024_01'].mean()
print(f'National average:{Ce_mean}')
Ce_min = df['sum_Ce_2024_01'].min()
city_min = df.loc[df['sum_Ce_2024_01'].idxmin(),'city']
print(f'min:{city_min}:{Ce_min}')
Ce_max = df['sum_Ce_2024_01'].max()
city_max = df.loc[df['sum_Ce_2024_01'].idxmax(),'city']
print(f'max:{city_max}:{Ce_max}')

In [None]:
def statistic_feature(df,feature):
    for city_level in city_level_list:
        df_temp = df[df['city_level']==city_level]
        Ce_mean = df_temp[feature].mean()
        print(f'{city_level}:{Ce_mean}')
    Ce_mean = df[feature].mean()
    print(f'National average:{Ce_mean}')
    Ce_min = df[feature].min()
    city_min = df.loc[df[feature].idxmin(),'city']
    print(f'min:{city_min}:{Ce_min}')
    Ce_max = df[feature].max()
    city_max = df.loc[df[feature].idxmax(),'city']
    print(f'max:{city_max}:{Ce_max}')

In [None]:
# Calculate and count the daily carbon emissions at the junior level
df['num_operator_2024_01'] = 0
for company in company_list:
    df['num_operator_2024_01'] += df[f'num_operator_{company}_2024_01']
df['average_operator_2024_01'] = df['sum_Ce_2024_01']/df['num_operator_2024_01']

feature = 'average_operator_2024_01'
statistic_feature(df,feature)

In [None]:
# Calculate daily carbon emissions at the package level
df['num_package_2024_01'] = 0
for company in company_list:
    df['num_package_2024_01'] += df[f'num_package_{company}_2024_01']
df = df[df['num_package_2024_01']>0]
df['average_package_2024_01'] = df['sum_Ce_2024_01']/df['num_package_2024_01']

feature = 'average_package_2024_01'
statistic_feature(df,feature)

In [None]:
'''
CO2 evolving package number
'''
month_list = ['2023_01','2023_07','2024_01']
company_list = ['JD','SF','YT','YD','ZT','ST']
city_level_list = ['First-tier cities', 'New first-tier cities', 'Second-tier cities', 'Third-tier cities', 'Fourth-tier cities', 'Fifth-tier cities']
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
for month in month_list:
    df[f'num_package_{month}'] = 0
    for company in company_list:
        df[f'num_package_{month}'] += df[f'num_package_{company}_{month}']
    df[f'num_package_{month}'] = df[f'num_package_{month}'] * 30 / 1000000000
df = df[df['num_package_2024_01'] > 0]

for city_level in city_level_list:
    df_temp = df[df['city_level']==city_level]
    sum_Ce_2023_01 = df_temp['num_package_2023_01'].sum()
    sum_Ce_2023_07 = df_temp['num_package_2023_07'].sum()
    sum_Ce_2024_01 = df_temp['num_package_2024_01'].sum()
    ratio = (sum_Ce_2024_01-sum_Ce_2023_01)/sum_Ce_2023_01
    print(f'{city_level}: 2023_01:{sum_Ce_2023_01} 2023_07:{sum_Ce_2023_07} 2024_01:{sum_Ce_2024_01} ratio:{ratio}')

sum_Ce_2023_01 = df['num_package_2023_01'].sum()
sum_Ce_2023_07 = df['num_package_2023_07'].sum()
sum_Ce_2024_01 = df['num_package_2024_01'].sum()
ratio = (sum_Ce_2024_01-sum_Ce_2023_01)/sum_Ce_2023_01
print(f'sum: 2023_01:{sum_Ce_2023_01} 2023_07:{sum_Ce_2023_07} 2024_01:{sum_Ce_2024_01} ratio:{ratio}')

df['rise'] = df['num_package_2024_01']-df['num_package_2023_01']
df = df[df['rise'] != 0]
Ce_min = df['rise'].min()
city_min = df.loc[df['rise'].idxmin(),'city']
print(f'min:{city_min}:{Ce_min}')
Ce_max = df['rise'].max()
city_max = df.loc[df['rise'].idxmax(),'city']
print(f'max:{city_max}:{Ce_max}')

In [None]:
'''
CO2 evolving Carbon emission change
'''
month_list = ['2023_01','2023_07','2024_01']
company_list = ['JD','SF','YT','YD','ZT','ST']
city_level_list = ['First-tier cities', 'New first-tier cities', 'Second-tier cities', 'Third-tier cities', 'Fourth-tier cities', 'Fifth-tier cities']
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
for month in month_list:
    df[f'sum_Ce_{month}'] = 0
    for company in company_list:
        df[f'sum_Ce_{month}'] += df[f'sum_Ce_{company}_{month}']
    df[f'sum_Ce_{month}'] = df[f'sum_Ce_{month}'] * 30 / 1000

for city_level in city_level_list:
    df_temp = df[df['city_level']==city_level]
    sum_Ce_2023_01 = df_temp['sum_Ce_2023_01'].sum()
    sum_Ce_2023_07 = df_temp['sum_Ce_2023_07'].sum()
    sum_Ce_2024_01 = df_temp['sum_Ce_2024_01'].sum()
    ratio = (sum_Ce_2024_01-sum_Ce_2023_01)/sum_Ce_2023_01
    print(f'{city_level}: 2023_01:{sum_Ce_2023_01} 2023_07:{sum_Ce_2023_07} 2024_01:{sum_Ce_2024_01} ratio:{ratio}')

sum_Ce_2023_01 = df['sum_Ce_2023_01'].sum()
sum_Ce_2023_07 = df['sum_Ce_2023_07'].sum()
sum_Ce_2024_01 = df['sum_Ce_2024_01'].sum()
ratio = (sum_Ce_2024_01-sum_Ce_2023_01)/sum_Ce_2023_01
print(f'sum: 2023_01:{sum_Ce_2023_01} 2023_07:{sum_Ce_2023_07} 2024_01:{sum_Ce_2024_01} ratio:{ratio}')

df['rise'] = df['sum_Ce_2024_01']-df['sum_Ce_2023_01']
Ce_min = df['rise'].min()
city_min = df.loc[df['rise'].idxmin(),'city']
print(f'min:{city_min}:{Ce_min}')
Ce_max = df['rise'].max()
city_max = df.loc[df['rise'].idxmax(),'city']
print(f'max:{city_max}:{Ce_max}')

In [None]:
# Calculate the total number of couriers
company_list = ['JD','SF','YT','YD','ZT','ST']
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
sum_operator = 0
for company in company_list:
    sum_operator += df[f'num_operator_{company}_2024_01'].sum()
print(sum_operator)