In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.font_manager as font_manager

font_path = '/home/font/Times New Roman/times.ttf'  
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path)
plt.rcParams['font.size'] = 18
plt.rcParams['font.family'] = prop.get_name()

In [None]:
df_result = pd.read_csv('../data/quan_three_grid_city_level_Ce.csv')
df_base = pd.read_csv('../data/waybill_base_20240911.csv')

In [None]:
display(df_result.head())

In [None]:
display(df_base.head())

In [None]:
print(len(df_result))
df_result = df_result[(df_result!= 0).all(axis=1)]
df = pd.merge(df_result,df_base,on='city',how='inner')
df.loc[df['city_level']=='Tire 1','city_level'] = 'Tier 1'
df.loc[df['city_level']=='New Tire 1','city_level'] = 'New Tier 1'
df.loc[df['city_level']=='Tire 2','city_level'] = 'Tier 2'
df.loc[df['city_level']=='Tire 3','city_level'] = 'Tier 3'
df.loc[df['city_level']=='Tire 4','city_level'] = 'Tier 4'
df.loc[df['city_level']=='Tire 5','city_level'] = 'Tier 5'
print(len(df))
display(df.head())

In [None]:
print(len(df_result))
df_result = df_result[(df_result!= 0).all(axis=1)]
df = pd.merge(df_result,df_base,on='city',how='inner')
df.loc[df['city_level']=='Tire 1','city_level'] = 'Tier 1'
df.loc[df['city_level']=='New Tire 1','city_level'] = 'New Tier 1'
df.loc[df['city_level']=='Tire 2','city_level'] = 'Tier 2'
df.loc[df['city_level']=='Tire 3','city_level'] = 'Tier 3'
df.loc[df['city_level']=='Tire 4','city_level'] = 'Tier 4'
df.loc[df['city_level']=='Tire 5','city_level'] = 'Tier 5'
print(len(df))
display(df.head())

In [None]:
feature_list = ['sum_Ce_day','average_operator_Ce_day','average_package_Ce_day']
for feature in feature_list:
    x_feature = 'city_level_num'
    
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 8))
    
    city_levels = ['Tier 1', 'New Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
    positions = np.arange(len(city_levels))
    colors = sns.color_palette("pastel")

    df_temp = pd.DataFrame()
    for i, city in enumerate(city_levels):
        city_data = df[df['city_level'] == city]
        data_temp = city_data[feature].to_numpy()
        Q1 = np.percentile(data_temp, 25)
        Q3 = np.percentile(data_temp, 75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        city_data = city_data[(city_data[feature]>lower_bound)&(city_data[feature]<upper_bound)]
        if len(df_temp) == 0:
            df_temp = city_data
        else:
            df_temp = pd.concat([df_temp,city_data],axis=0)
    print(len(df_temp))        
    
    violin_parts = plt.violinplot([df_temp[df_temp['city_level'] == city][feature] for city in city_levels], 
                                  positions=positions - 0.15, widths=0.2, showmeans=False, showmedians=False, showextrema=False)
    for i, pc in enumerate(violin_parts['bodies']):
        color = colors[i]
        pc.set_facecolor(color)
        pc.set_edgecolor('black')
        pc.set_alpha(0.9)
        # 只保留左半边
        path = pc.get_paths()[0]
        vertices = path.vertices
        mean_x = np.mean(vertices[:, 0])
        vertices[:, 0] = np.clip(vertices[:, 0], -np.inf, mean_x)
        path.vertices = vertices
    
    box_data = [df_temp[df_temp['city_level'] == city][feature] for city in city_levels]
    box_plot = plt.boxplot(box_data, positions=positions, widths=0.1, patch_artist=True, medianprops=dict(color='black'))
    for i, box in enumerate(box_plot['boxes']):
        color = colors[i]
        box.set_facecolor(color)
        
    for i, city in enumerate(city_levels):
        city_data = df_temp[df_temp['city_level'] == city][feature]
        color = colors[i]
        n = len(city_data)
        x_values = np.random.uniform(0.5, 2, n) * 0.15 + positions[i]
        plt.scatter(x_values, city_data, color=color, s=50, alpha=1.0, edgecolor='white', linewidth=1.2, label='Scatter' if i == 0 else "")

    fs = 25
    plt.xticks(ticks=positions, labels=city_levels, fontsize=fs, fontproperties=prop)
    plt.yticks(fontsize=fs, fontproperties=prop)

    plt.xlabel('City Level', fontsize=fs, fontproperties=prop)
    if feature == 'average_operator_Ce_day':
        plt.ylabel('GHG Emissions (kg)', fontsize=fs, fontproperties=prop)
    elif feature == 'average_package_Ce_day':
        plt.ylabel('GHG Emissions (g)', fontsize=fs, fontproperties=prop)
    else:
        plt.ylabel('GHG Emissions (t)', fontsize=fs, fontproperties=prop)

    sns.despine()
    ax = plt.gca()

    ax.set_xticklabels(ax.get_xticklabels(), fontsize=fs)
    ax.set_yticklabels(ax.get_yticks(), fontsize=fs)
    plt.tight_layout()
    plt.show()

In [None]:
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
df['sum_Ce_2023_01'],df['sum_Ce_2023_07'],df['sum_Ce_2024_01'] = 0,0,0
company_list = ['JD','SF','YT','YD','ZT','ST']
month_list = ['2023_01','2023_07','2024_01']

columns_list = [] 
for month in month_list:
    for company in company_list:
        columns_list.append(f'sum_Ce_{company}_{month}')
        df[f'sum_Ce_{month}'] += df[f'sum_Ce_{company}_{month}']
    df[f'sum_Ce_{month}'] *= 31
    df[f'sum_Ce_{month}'] /= 1000
    df[f'sum_Ce_{month}'] /= 1000 

df['has_zero'] = df[columns_list].eq(0).any(axis=1) 
for month in month_list:
    df.loc[df['has_zero'], 'sum_Ce_2023_01'] = 0
    df.loc[df['has_zero'], 'sum_Ce_2023_07'] = 0
    df.loc[df['has_zero'], 'sum_Ce_2024_01'] = 0

df.loc[df['city_level']=='Tire 1','city_level'] = 'Tier 1'
df.loc[df['city_level']=='New Tire 1','city_level'] = 'New Tier 1'
df.loc[df['city_level']=='Tire 2','city_level'] = 'Tier 2'
df.loc[df['city_level']=='Tire 3','city_level'] = 'Tier 3'
df.loc[df['city_level']=='Tire 4','city_level'] = 'Tier 4'
df.loc[df['city_level']=='Tire 5','city_level'] = 'Tier 5'

city_level_list = ['Tier 1', 'New Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
for city_level in city_level_list:
    city_level = city_level.replace(' ', '_')
    exec(f'{city_level} = []')
    
print(len(df))
df = df[['city','city_level','sum_Ce_2023_01','sum_Ce_2023_07','sum_Ce_2024_01']]
display(df.head())

for month in month_list:
    for city_level in city_level_list:
        sum_Ce = df.loc[df['city_level']==city_level,f'sum_Ce_{month}'].sum()
        city_level = city_level.replace(' ', '_')
        exec(f'{city_level}.append(sum_Ce)')
    

Tiers = [Tier_1,New_Tier_1,Tier_2,Tier_3,Tier_4,Tier_5]
print(Tiers)
Tiers_height = [Tier_1[:],New_Tier_1[:],Tier_2[:],Tier_3[:],Tier_4[:],Tier_5[:]]
sum_temps = []
for i in range(len(Tiers[0])):
    sum_temp = 0
    for j in range(len(Tiers)):
        sum_temp += Tiers[j][i]
    print(f'{i}:{sum_temp}')
    sum_temps.append(sum_temp)
    for j in range(len(Tiers)):
        ratio = int(round(Tiers[j][i]/sum_temp, 2) * 100)
        Tiers_height[j][i] = ratio
print((sum_temps[1]-sum_temps[0])/sum_temps[0])
print((sum_temps[2]-sum_temps[1])/sum_temps[1])
print((sum_temps[2]-sum_temps[0])/sum_temps[0])

sns.set(style="whitegrid")

time_points = ['2023.01', '2023.07', '2024.01']
x = np.arange(len(time_points))
colors = sns.color_palette("pastel")
fig, ax = plt.subplots(figsize=(7, 9))
bar_width = 0.4

bottoms = np.zeros(shape = (1, len(time_points)))
for i in range(len(city_level_list)):
    city_level = city_level_list[i]
    city_level2 = city_level.replace(' ', '_')
    if i == 0:
        bars = ax.bar(x, Tiers[i], width=bar_width, label=city_level, color=colors[i])
        bottoms = np.array(Tiers[i])
    else:
        bars = ax.bar(x, Tiers[i], width=bar_width, bottom=bottoms, label=city_level, color=colors[i])
        bottoms = bottoms + np.array(Tiers[i])
    for j, bar in enumerate(bars):
        height = bar.get_height()
        ax.annotate(f'{Tiers_height[i][j]}%',
                    xy=(bar.get_x() + bar.get_width() / 2, bar.get_y() + height / 2),
                    xytext=(0, 0),  # 使文本在子柱子的中间
                    textcoords="offset points",
                    ha='center', va='center', color='black', fontproperties=prop, fontsize=20)

ax.set_ylabel('GHG Emissions (kt)', fontsize=21, fontproperties=prop)
ax.set_xticks(x)
ax.set_xticklabels(time_points, fontsize=21, fontproperties=prop)
ax.set_yticklabels(ax.get_yticks(), fontsize=21, fontproperties=prop)

font_path = '/home/font/Times New Roman/times.ttf'
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path, size=18)
plt.rcParams['font.family'] = prop.get_name()
ax.legend(title='', title_fontsize=18, fontsize=18, loc='upper left', bbox_to_anchor=(-0.03, 1.15), prop=prop,ncol=3, borderpad=0.5)

ax.tick_params(axis='y', labelsize=21)
sns.despine()

ax = plt.gca()
ax.set_xticklabels(ax.get_xticklabels(), fontsize=21)
ax.set_yticklabels(ax.get_yticks(), fontsize=21)
from matplotlib.ticker import FuncFormatter
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0f}'.format(y)))

plt.tight_layout()
plt.show()

In [None]:
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
df['sum_Ce_2023_01'],df['sum_Ce_2023_07'],df['sum_Ce_2024_01'] = 0,0,0
company_list = ['JD','SF','YT','YD','ZT','ST']
month_list = ['2023_01','2023_07','2024_01']

columns_list = [] 
for month in month_list:
    for company in company_list:
        columns_list.append(f'sum_Ce_{company}_{month}')
        df[f'sum_Ce_{month}'] += df[f'sum_Ce_{company}_{month}']
    df[f'sum_Ce_{month}'] *= 31 
    df[f'sum_Ce_{month}'] /= 1000 
    df[f'sum_Ce_{month}'] /= 1000 

df['has_zero'] = df[columns_list].eq(0).any(axis=1) 
for month in month_list:
    df.loc[df['has_zero'], 'sum_Ce_2023_01'] = 0
    df.loc[df['has_zero'], 'sum_Ce_2023_07'] = 0
    df.loc[df['has_zero'], 'sum_Ce_2024_01'] = 0

df.loc[df['city_level']=='Tire 1','city_level'] = 'Tier 1'
df.loc[df['city_level']=='New Tire 1','city_level'] = 'New Tier 1'
df.loc[df['city_level']=='Tire 2','city_level'] = 'Tier 2'
df.loc[df['city_level']=='Tire 3','city_level'] = 'Tier 3'
df.loc[df['city_level']=='Tire 4','city_level'] = 'Tier 4'
df.loc[df['city_level']=='Tire 5','city_level'] = 'Tier 5'

city_level_list = ['Tier 1', 'New Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
for city_level in city_level_list:
    city_level = city_level.replace(' ', '_')
    exec(f'{city_level} = []')
    
print(len(df))
df = df[['city','city_level','sum_Ce_2023_01','sum_Ce_2023_07','sum_Ce_2024_01']]
display(df.head())

for month in month_list:
    for city_level in city_level_list:
        sum_Ce = df.loc[df['city_level']==city_level,f'sum_Ce_{month}'].sum()
        city_level = city_level.replace(' ', '_')
        exec(f'{city_level}.append(sum_Ce)')
    

Tiers = [Tier_1,New_Tier_1,Tier_2,Tier_3,Tier_4,Tier_5]
print(Tiers)
Tiers_height = [Tier_1[:],New_Tier_1[:],Tier_2[:],Tier_3[:],Tier_4[:],Tier_5[:]]
sum_temps = []
for i in range(len(Tiers[0])):
    sum_temp = 0
    for j in range(len(Tiers)):
        sum_temp += Tiers[j][i]
    print(f'{i}:{sum_temp}')
    sum_temps.append(sum_temp)
    for j in range(len(Tiers)):
        ratio = int(round(Tiers[j][i]/sum_temp, 2) * 100)
        Tiers_height[j][i] = ratio
print((sum_temps[1]-sum_temps[0])/sum_temps[0])
print((sum_temps[2]-sum_temps[1])/sum_temps[1])
print((sum_temps[2]-sum_temps[0])/sum_temps[0])

sns.set(style="whitegrid")

time_points = ['2023.01', '2023.07', '2024.01']
x = np.arange(len(time_points))

colors = sns.color_palette("pastel")

fig, ax = plt.subplots(figsize=(7, 9))

bar_width = 0.4

bottoms = np.zeros(shape = (1, len(time_points)))
for i in range(len(city_level_list)):
    city_level = city_level_list[i]
    city_level2 = city_level.replace(' ', '_')
    if i == 0:
        bars = ax.bar(x, Tiers[i], width=bar_width, label=city_level, color=colors[i])
        bottoms = np.array(Tiers[i])
    else:
        bars = ax.bar(x, Tiers[i], width=bar_width, bottom=bottoms, label=city_level, color=colors[i])
        bottoms = bottoms + np.array(Tiers[i])
    for j, bar in enumerate(bars):
        height = bar.get_height()
        ax.annotate(f'{Tiers_height[i][j]}%',
                    xy=(bar.get_x() + bar.get_width() / 2, bar.get_y() + height / 2),
                    xytext=(0, 0),  
                    textcoords="offset points",
                    ha='center', va='center', color='black', fontproperties=prop, fontsize=20)

ax.set_ylabel('GHG Emissions (kt)', fontsize=21, fontproperties=prop)
ax.set_xticks(x)
ax.set_xticklabels(time_points, fontsize=21, fontproperties=prop)
ax.set_yticklabels(ax.get_yticks(), fontsize=21, fontproperties=prop)

font_path = '/home/font/Times New Roman/times.ttf'
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path, size=18)
plt.rcParams['font.family'] = prop.get_name()
ax.legend(title='', title_fontsize=18, fontsize=18, loc='upper left', bbox_to_anchor=(-0.03, 1.15), prop=prop,ncol=3, borderpad=0.5)
ax.tick_params(axis='y', labelsize=21)

sns.despine()
ax = plt.gca()
ax.set_xticklabels(ax.get_xticklabels(), fontsize=21)
ax.set_yticklabels(ax.get_yticks(), fontsize=21)
from matplotlib.ticker import FuncFormatter
ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0f}'.format(y)))

plt.tight_layout()
plt.show()

In [None]:
from statsmodels.iolib.summary2 import summary_col
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

fs=23
df = pd.read_csv('../data/quan_sum_result_20240911.csv')
company_list = ['JD','SF','YT','YD','ZT','ST']
df['sum_Ce_2024_01'] = 0
df['sum_distance_2024_01'] = 0
for company in company_list:
    df['sum_Ce_2024_01'] += df[f'sum_Ce_{company}_2024_01']
    df['sum_distance_2024_01'] += df[f'sum_distance_{company}_2024_01']
df['sum_Ce_2024_01'] = df['sum_Ce_2024_01']/1000
df['sum_distance_2024_01'] = df['sum_distance_2024_01']/1000
df['elevator_ratio_2024_01'] = df['elevator_ratio_2024_01'] * 100

feature_list = ['sum_distance_2024_01','city_level_num','GDP_2022','people_2022','city_ratio','elevator_ratio_2024_01']
feature_dict = {'sum_distance_2024_01':'Traveling Mileage (10³km)','city_level_num':'City Level','GDP_2022':'GDP (100 Million RMB)','people_2022':'Population (10⁴)','city_ratio':'Urbanization Rate (%)','elevator_ratio_2024_01':'proportion of Electric Vehicles (%)'}

for feature in feature_list:
    sns.set_theme(style="white", context="notebook", palette="muted") 
    plt.figure(figsize=(10, 6))
    df_temp = df[df[feature]>0]
    sns.regplot(x=df_temp[feature], y=df_temp['sum_Ce_2024_01'], scatter_kws={'s': 100, 'color': 'dodgerblue', 'edgecolor': 'k', 'alpha': 0.7},
                line_kws={'color': 'red', 'linewidth': 2})
    
    X = df_temp[feature]
    Y = df_temp['sum_Ce_2024_01']
    X = sm.add_constant(X)
    model = sm.OLS(Y,X)
    results = model.fit()
    r2 = results.rsquared
    correlation_coefficient = df_temp[feature].corr(df_temp['sum_Ce_2024_01'], method='spearman')
    print(f'R2:{r2}, spearman coefficient:{correlation_coefficient}')

    plt.xlabel(f'{feature_dict[feature]}', fontsize=fs,fontproperties=prop)
    plt.ylabel('Carbon Emissions (t)', fontsize=fs,fontproperties=prop)
    
    plt.text(0.2, 0.95, f'R² = {r2:.2f}', transform=plt.gca().transAxes, fontsize=fs, verticalalignment='top', fontproperties=prop)
    
    ax = plt.gca()
    ax.spines['bottom'].set_color('black')
    ax.spines['bottom'].set_linewidth(1.5)
    ax.spines['left'].set_color('black')
    ax.spines['left'].set_linewidth(1.5)
    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    
    ax.set_ylim(bottom=0)
    
    ax.tick_params(axis='both', direction='out', length=6, width=1.5, colors='black')
    if feature=='city_level_num':
        xticks = ax.get_xticks()
        xticklabels = []
        for x in xticks:
            if x == 1:
                xticklabels.append('Tier 1')
            elif x == 2:
                xticklabels.append('Tier 2')
            elif x == 3:
                xticklabels.append('Tier 3')
            elif x == 4:
                xticklabels.append('Tier 4')
            elif x == 5:
                xticklabels.append('Tier 5')
            else:
                xticklabels.append('')

        ax.set_xticks(xticks)
        ax.set_xticklabels(xticklabels, fontproperties=prop, fontsize=fs)
    else:    
        ax.set_xticklabels(ax.get_xticklabels(), fontproperties=prop, fontsize=fs)
        
    ax.set_yticklabels(ax.get_yticklabels(), fontproperties=prop, fontsize=fs)
    plt.grid(False)

    plt.tight_layout()
    plt.show()