# 对各个国家地表温度变化分析

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns
import time
import warnings
warnings.filterwarnings('ignore')

global_temp_country = pd.read_csv('../input/temper5430/GlobalLandTemperaturesByCountry.csv')

In [2]:
pip install plotly


The following command must be run outside of the IPython shell:

    $ pip install plotly

The Python package manager (pip) can only be used from outside of IPython.
Please reissue the `pip` command in a separate terminal or command prompt.

See the Python documentation for more information on how to install packages:

    https://docs.python.org/3/installing/


## 1  绘制各国平均温度图

In [3]:
#移除重复的国家（殖民地不作为国家）和无温度信息的国家
global_temp_country_clear = global_temp_country[~global_temp_country['Country'].isin(
    ['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
     'United Kingdom', 'Africa', 'South America'])]

global_temp_country_clear = global_temp_country_clear.replace(
   ['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
   ['Denmark', 'France', 'Netherlands', 'United Kingdom'])

#取各国家温度平均值
countries = np.unique(global_temp_country_clear['Country'])
mean_temp = []
for country in countries:
    mean_temp.append(global_temp_country_clear[global_temp_country_clear['Country'] == 
                                               country]['AverageTemperature'].mean())
data = [ dict(
        type = 'choropleth',
        locations = countries,
        z = mean_temp,
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(tickprefix = '', 
            title = '# Average\nTemperature,\n°C')
            )
       ]

layout = dict(
    title = 'Average land temperature in countries',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'rgb(0,255,255)',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
print(fig)
py.iplot(fig, validate=False, filename='worldmap')

{'data': [{'type': 'choropleth', 'locations': array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Anguilla', 'Antigua And Barbuda', 'Argentina',
       'Armenia', 'Aruba', 'Asia', 'Australia', 'Austria', 'Azerbaijan',
       'Bahamas', 'Bahrain', 'Baker Island', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros', 'Congo',
       'Congo (Democratic Republic Of The)', 'Costa Rica', 'Croatia',
       'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic', "Côte D'Ivoire",
       'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt'

## 2 按平均温度给国家分类，并绘制水平主图

In [4]:
mean_temp_bar, countries_bar = (list(x) for x in zip(*sorted(zip(mean_temp, countries), 
                                                             reverse = True)))
sns.set(font_scale=0.9) 
f, ax = plt.subplots(figsize=(4.5, 50))
colors_cw = sns.color_palette('coolwarm', len(countries))
sns.barplot(mean_temp_bar, countries_bar, palette = colors_cw[::-1])
Text = ax.set(xlabel='Average temperature', title='Average land temperature in countries')

### 是否存在全球变暖？

In [5]:
global_temp = pd.read_csv("/home/mw/input/temper5430/GlobalTemperatures.csv")

#从日期中抽取年
years = np.unique(global_temp['dt'].apply(lambda x: x[:4]))
mean_temp_world = []
mean_temp_world_uncertainty = []

for year in years:
    mean_temp_world.append(global_temp[global_temp['dt'].apply(
        lambda x: x[:4]) == year]['LandAverageTemperature'].mean())
    mean_temp_world_uncertainty.append(global_temp[global_temp['dt'].apply(
                lambda x: x[:4]) == year]['LandAverageTemperatureUncertainty'].mean())

trace0 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) + np.array(mean_temp_world_uncertainty),
    fill= None,
    mode='lines',
    name='Uncertainty top',
    line=dict(
        color='rgb(0, 255, 255)',
    )
)
trace1 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) - np.array(mean_temp_world_uncertainty),
    fill='tonexty',
    mode='lines',
    name='Uncertainty bot',
    line=dict(
        color='rgb(0, 255, 255)',
    )
)

trace2 = go.Scatter(
    x = years, 
    y = mean_temp_world,
    name='Average Temperature',
    line=dict(
        color='rgb(199, 121, 093)',
    )
)
data = [trace0, trace1, trace2]

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='Average land temperature in world',
    showlegend = False)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

从表中可以看出，全球在变暖，地表平均温度在过去30年达到了顶峰，最快的温度攀升也发生在这30年！这很让人担心啊，我希望人类能有办法利用生态能源，减少二氧化碳排放，否则我们就完蛋了。这张图也显示了置信区间，标明温度测量在过去几年中越来越精确了。

# 对森林覆盖率进行分析  
导入相关库

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,PolynomialFeatures

解决中文异常

In [2]:
# 解决中文异常
plt.rcParams['font.sans-serif'] = ['SimHei']
# 解决负号异常e
plt.rcParams['axes.unicode_minus'] = False

加载数据集

In [3]:
# 加载森林数据集
forests = pd.read_excel('/home/mw/input/forest6844/forest.xlsx')

In [6]:
forests.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 201 entries, 0 to 213
Data columns (total 40 columns):
Country Name                     201 non-null object
Country Code                     201 non-null object
Capital                          201 non-null object
Continent                        201 non-null object
Area (kmÂ²)                      201 non-null int64
Population Density (per kmÂ²)    201 non-null float64
Population Growth Rate           201 non-null float64
World Population Percentage      201 non-null float64
Population Rank                  201 non-null int64
Forest Area 1990                 201 non-null float64
Forest Area 1991                 201 non-null float64
Forest Area 1992                 201 non-null float64
Forest Area 1993                 201 non-null float64
Forest Area 1994                 201 non-null float64
Forest Area 1995                 201 non-null float64
Forest Area 1996                 201 non-null float64
Forest Area 1997                 201 no

2000年-2020年世界森林覆盖率变化情况

In [9]:
# 缺失值处理
forests=forests.dropna()
forests.head(5)
forests.isnull().any()

Country Name                     False
Country Code                     False
Capital                          False
Continent                        False
Area (kmÂ²)                      False
Population Density (per kmÂ²)    False
Population Growth Rate           False
World Population Percentage      False
Population Rank                  False
Forest Area 1990                 False
Forest Area 1991                 False
Forest Area 1992                 False
Forest Area 1993                 False
Forest Area 1994                 False
Forest Area 1995                 False
Forest Area 1996                 False
Forest Area 1997                 False
Forest Area 1998                 False
Forest Area 1999                 False
Forest Area 2000                 False
Forest Area 2001                 False
Forest Area 2002                 False
Forest Area 2003                 False
Forest Area 2004                 False
Forest Area 2005                 False
Forest Area 2006         

In [10]:
# 获取2000-2020年森林覆盖率
forests_areas=[]
years=[year for year in range(2000,2021)]
for year in range(2000,2021):
    forests_areas.append(((np.multiply(forests[f'Forest Area {year}']/100,forests['Area (kmÂ²)'])/(forests['Area (kmÂ²)'].sum())).sum())*100)
forests_areas

[32.119297787954146,
 32.079565875647525,
 32.04125746884719,
 32.0025712183868,
 31.9473853946705,
 31.908746800166597,
 31.873758400238007,
 31.83550963862277,
 31.801569850841393,
 31.76317261621645,
 31.724340705195885,
 31.69090041866129,
 31.659527471868167,
 31.62617598367984,
 31.592735434118936,
 31.558508229219,
 31.506983682961152,
 31.456036546898815,
 31.419411904612826,
 31.381754252260386,
 31.345385243401168]

In [11]:
forests_areas_max=np.max(forests_areas)
forests_areas_min=np.min(forests_areas)
forests_areas_mean=np.mean(forests_areas)
print('最大森林覆盖率：',forests_areas_max)
print('最小森林覆盖率：',forests_areas_min)
print('平均森林覆盖率：',forests_areas_mean)

最大森林覆盖率： 32.119297787954146
最小森林覆盖率： 31.345385243401168
平均森林覆盖率： 31.73021880592709


In [12]:
# 添加注释内容
mark_indexs=[0,5,10,15,20]
notes_x=[2000,2005,2010,2015,2020]
notes_y=[]
for i in mark_indexs:
        notes_y.append(forests_areas[i])

In [13]:
# 森林覆盖率变化情况可视化
plt.style.use('default')
plt.figure(figsize=(8,6))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.plot(years,forests_areas,color='blue',markevery=mark_indexs,marker='o')
plt.title('Change in world forest cover 2000-2020')
plt.xlabel('Year')
plt.ylabel('Forest cover(%)')
plt.xticks([year for year in range(2000,2021,5)])
for x,y in zip(notes_x,notes_y):
    plt.text(x,y,s=f"{x,round(y,2)}",style='italic',color="black",fontsize=16)

findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.


In [14]:
# 森林覆盖率建立线性回归模型
X=np.array(years).reshape(-1,1)
y=forests_areas
model=LinearRegression()
lin_reg=model.fit(X,y)
y_predict=lin_reg.predict(X)
y_predict

array([32.11102387, 32.07294337, 32.03486286, 31.99678235, 31.95870185,
       31.92062134, 31.88254083, 31.84446033, 31.80637982, 31.76829931,
       31.73021881, 31.6921383 , 31.65405779, 31.61597729, 31.57789678,
       31.53981627, 31.50173577, 31.46365526, 31.42557475, 31.38749425,
       31.34941374])

In [15]:
# 森林覆盖率变化模型拟合结果与真实值对比图
plt.figure(figsize=(8,6))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.plot(years,forests_areas,color='blue')
plt.plot(X,y_predict,color='red')
plt.xlabel('Year')
plt.ylabel('Forest cover')
plt.xticks([i for i in range(2000,2021,5)])
plt.title('Change in world forest cover 2000-2020')

Text(0.5, 1.0, 'Change in world forest cover 2000-2020')

In [16]:
# 取各个国家的平均森林覆盖面积

forest_area_cols = [f'Forest Area {i}' for i in range(1991, 2021)]
df_forest_area = forests[['Country Name'] + forest_area_cols].dropna()

# 计算每个国家的平均森林覆盖率
df_forest_area['Average Forest Area'] = df_forest_area[forest_area_cols].mean(axis=1)


# 打印结果
print(df_forest_area[['Country Name', 'Average Forest Area']])
# 转列表 
country_list = df_forest_area['Country Name'].tolist()
forest_area_list = df_forest_area['Average Forest Area'].tolist()
data = [ dict(
        type = 'choropleth',
        locations = country_list,
        z = forest_area_list,
        locationmode = 'country names',
        text = country_list,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(tickprefix = '', 
            title = '# Average\Forest Area,\kmÂ²')
            )
       ]

layout = dict(
    title = 'Average Forest Area in Contries',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'rgb(0,255,255)',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
print(fig)
py.iplot(fig, validate=False, filename='worldmap')

                       Country Name  Average Forest Area
0                             Aruba             2.333333
1                       Afghanistan             1.850994
2                            Angola            59.401866
3                           Albania            28.492056
4                           Andorra            34.042553
5              United Arab Emirates             4.278114
6                         Argentina            11.571741
7                           Armenia            11.643805
8                    American Samoa            87.855000
9               Antigua and Barbuda            20.639773
10                        Australia            17.185449
11                          Austria            46.611001
12                       Azerbaijan            12.301997
13                          Burundi             8.924234
14                          Belgium            22.740632
15                            Benin            34.499379
16                     Burkina 

# 对温室气体进行分析

In [17]:
# 加载温室气体数据集
gas = pd.read_csv('/home/mw/input/greenhousegas6025/Greenhouse_Gas.csv')

In [18]:
gas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1139 entries, 0 to 1138
Data columns (total 25 columns):
ObjectId2              1139 non-null int64
Country/Region         1139 non-null object
ISO2                   0 non-null float64
ISO3                   1139 non-null object
Indicator              1139 non-null object
Unit                   1139 non-null object
Source                 1139 non-null object
CTS_Code               1139 non-null object
CTS_Name               1139 non-null object
CTS_Full_Descriptor    1139 non-null object
Industry               1139 non-null object
Gas_Type               1139 non-null object
Scale                  1139 non-null object
F2010                  1139 non-null float64
F2011                  1139 non-null float64
F2012                  1139 non-null float64
F2013                  1139 non-null float64
F2014                  1139 non-null float64
F2015                  1139 non-null float64
F2016                  1139 non-null float64
F2017    

In [19]:
gas.head(5)

Unnamed: 0,ObjectId2,Country/Region,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,...,F2012,F2013,F2014,F2015,F2016,F2017,F2018,F2019,F2020,F2021
0,1,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Economic Activity...",...,191.474222,189.086091,189.617437,191.657071,193.568495,193.794399,192.37873,193.144276,185.830471,189.489504
1,2,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Economic Activity...",...,0.760913,0.686725,0.641152,0.694193,0.649161,0.629175,0.59431,0.580705,0.581687,0.599934
2,3,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Economic Activity...",...,1324.126567,1367.184515,1382.271817,1389.855475,1379.580179,1385.725479,1393.030334,1392.513388,1354.925652,1346.210276
3,4,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Economic Activity...",...,604.039665,600.311119,600.592452,606.028724,612.034928,617.563879,619.379502,613.195224,610.516451,591.93263
4,5,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Economic Activity...",...,527.851767,577.100579,591.420776,591.475487,573.327595,573.738026,580.677791,585.593183,557.997044,564.188209


In [20]:
gas_type = set(gas['Gas_Type'].tolist())
gas_type

{'Carbon dioxide',
 'Fluorinated gases',
 'Greenhouse gas',
 'Methane',
 'Nitrous oxide'}

In [21]:
# 统计出温室气体所占的比例
# 1、准备数据：根据年份统计温室气体的排放总量
greenhouse = []
years = ['F2010','F2011','F2012','F2013','F2014','F2015','F2016','F2017','F2018','F2019','F2020','F2021']
for year in years:
    greenhouse.append((gas.groupby('Gas_Type')[year]).sum())
# 将温室气体和“CO2,F,CO,CH4”分开存储
    # 注意这里greenhouse[0][0]中存储的是一个Series，要注意对他的访问
    # 有两种访问方式.loc[标签]   /  .iloc[位置索引]
print((greenhouse[0]))

Gas_Type
Carbon dioxide       347007.831056
Fluorinated gases      6330.292185
Greenhouse gas       460226.751891
Methane               80231.848917
Nitrous oxide         26656.779732
Name: F2010, dtype: float64


对温室气体逐年分析，生成折线图

In [22]:
plt.style.use('default')
mark_indexs = [0,2,4,6,8,10,11]
notes_x = [2010,2012,2014,2016,2018,2020,2021]
value = [i.iloc[2] for i in greenhouse]
print(value)
note_y = []
for i in mark_indexs:
    note_y.append(value[i])
X = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021]
plt.figure(figsize=(10,8))
plt.plot(X,value,color='blue',markevery=mark_indexs,marker='o')
plt.title('Greenhouse gas emissions 2010-2021')
plt.xlabel('Year')
plt.ylabel('Greenhouse gas emivaluessions')
plt.xticks([year for year in range(2010,2022,1)])
for x,y in zip(notes_x,note_y):
    plt.text(x,y,s=f"{x,round(y,2)}",style='italic',color="black",fontsize=16)

[460226.7518905162, 472463.3973619642, 478822.70805663435, 484935.3045685764, 489198.9842551398, 486997.3947152817, 487056.6380496531, 494767.9791580441, 504517.10869881295, 505484.11703959387, 487911.359853397, 509379.58510433]


In [23]:
# 对气体排放量按照年份进行可视化——饼图
# 颜色对照表：https://finthon.com/matplotlib-color-list/
"""
Carbon dioxide:二氧化碳
Fluorinated gases:氟气
Greenhouse gas：温室气体
Methane：甲烷
Nitrous oxide：一氧化碳
"""
# 2、准备画布
fig, axes = plt.subplots(4, 3, figsize=(20, 20))
number = 0
# labels = list(gas_type)
# labels.remove('Greenhouse gas')
labels = ['Carbon dioxide', 'Fluorinated gases', 'Methane', 'Nitrous oxide']
colors = ['cyan', 'lightgreen', 'darkorange', 'blue']
explode = [0.05, 0.05, 0.05, 0.05]
for year, year_value in zip(years, greenhouse):
    value = list()
    value.append(year_value.iloc[0]/year_value.iloc[2]*100)
    value.append(year_value.iloc[1]/year_value.iloc[2]*100)
    value.append(year_value.iloc[3]/year_value.iloc[2]*100)
    value.append(year_value.iloc[4]/year_value.iloc[2]*100)
    print(value)
    axes[number//3][number % 3].pie(value,
                                    colors=colors,
                                    labels=labels,
                                    explode=explode,
                                    autopct='%0.2f%%',
                                    shadow=False,
                                    startangle=90,
                                    pctdistance=0.5,
                                    labeldistance=1.05)
    axes[number//3][number % 3].axis('equal')
    axes[number//3][number % 3].set_title(str(year)+" Gas emissions")
    number += 1
axes[0][0].legend()


[75.39931775605821, 1.3754724510779672, 17.433112827043455, 5.792096965877246]
[75.55099335296238, 1.4377681093919312, 17.281896518720743, 5.729342021112345]
[75.64685326790685, 1.545731423181079, 17.195547815660696, 5.6118674934639365]
[75.64832480916913, 1.6271475312427286, 16.9938881809147, 5.730639478914267]
[75.61081886277641, 1.7478026936244946, 16.929939848390198, 5.7114385944830275]
[75.52246740842264, 1.6621133731484008, 17.046811885003077, 5.768607338061282]
[75.39046741792433, 1.7530673211671837, 17.096471999381528, 5.759993262866072]
[75.43926420191995, 1.8400085843841507, 17.01237293948483, 5.708354279640089]
[75.543304929309, 1.9190103755150967, 16.895094367626456, 5.642590325958154]
[75.43316118725505, 1.993143281777899, 16.949459165604814, 5.624236366419035]
[74.2130153803657, 2.1762208322854795, 17.84499226609015, 5.765771517270841]
[74.95387300134723, 2.2003457123263757, 17.202967451576637, 5.642813835077008]


<matplotlib.legend.Legend at 0x7f2ec7f09f60>

In [24]:
# 显示温室气体的水平柱状图
t1 = [] # 二氧化碳
t2 = [] # 氟气
t3 = [] # 甲烷
t4 = [] # 一氧化碳
t5 = [] # 温室气体
years = range(2010, 2022)
for t in greenhouse:
    t1.append(t.iloc[0])
    t2.append(t.iloc[1])
    t3.append(t.iloc[3])
    t4.append(t.iloc[4])
    t5.append(t.iloc[2])
width = 0.45
ind = np.arange(len(years))
fig, ax = plt.subplots(figsize=(12, 8))
p1 = ax.barh(ind, t3, height=width, color='#d62728')
p2 = ax.barh(ind, t1, height=width, left=t3)
p3 = ax.barh(ind, t2, height=width, left=np.add(t1, t3))
p4 = ax.barh(ind, t4, height=width, left=np.add(np.add(t1, t2), t3))
plt.xlabel('Gas emissions')
plt.ylabel('Year')
plt.yticks(ind, years)
# Carbon dioxide:二氧化碳
# Fluorinated gases:氟气
# Greenhouse gas：温室气体
#  ：甲烷
# Nitrous oxide：一氧化碳
plt.legend((p1[0], p2[0], p3[0], p4[0]), ('Methane', 'Carbon dioxide', 'Fluorinated gases', 'Nitrous oxide'),
            loc='upper right', 
            bbox_to_anchor=(1.25, 1.05))
plt.show()

In [25]:
# 氟气和一氧化碳的折线图观察大体趋势
fig, ax1 = plt.subplots(figsize=(12, 8))

ax1.plot(years, t2, color='orangered', marker='o', linestyle='-', label='Fluorinated gases')
ax1.set_ylabel('Fluorinated gases')

ax2 = ax1.twinx()
ax2.plot(years, t4, color='blueviolet', marker='D', linestyle='-.', label='Nitrous oxide')
ax2.set_ylabel('Nitrous oxide')

plt.xticks(years, rotation=45)
plt.title('Emissions of Fluorinated gases and Nitrous oxide from 2010 to 2020')

# 在图例中添加两条折线的标签
handles, labels = [], []
for ax in fig.axes:
    for h, l in zip(*ax.get_legend_handles_labels()):
        handles.append(h)
        labels.append(l)

plt.legend(handles, labels, loc='upper left')
plt.show()


In [26]:
# 绘制温室气体以及四种气体的折线图，下填颜色
plt.style.use('default')
plt.figure(figsize=(8, 6))
plt.plot(t5,color='pink')
plt.plot(t1,color='blue')
plt.plot(t3,color='green')
plt.plot(t4,color='yellow')
plt.plot(t2,color='purple')

plt.legend(['greenhouse', 'CO2','CH4','CO','F'])
plt.xticks(range(12),years,rotation=45)
plt.ylabel("Gas emissions")
plt.xlabel("Years")
plt.title("Greenhouse_Gas 2010~2021")

plt.fill_between(x=range(12),y1=t1, y2=t5, facecolor='pink', alpha=0.3)
plt.fill_between(x=range(12),y1=t3, y2=t1, facecolor='blue', alpha=0.4)
plt.fill_between(x=range(12),y1=t4, y2=t3, facecolor='green', alpha=0.5)
plt.fill_between(x=range(12),y1=t2, y2=t4, facecolor='yellow', alpha=0.6)
plt.fill_between(x=range(12),y1=0, y2=t2, facecolor='purple', alpha=0.7)
plt.show()

## 对世界CO2气体排放量进行分析

In [27]:
gas_CO2 = pd.read_csv('/home/mw/input/CO29187/Atmospheric_CO2_Concentrations.csv')

In [28]:
gas_CO2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1548 entries, 0 to 1547
Data columns (total 12 columns):
ObjectId               1548 non-null int64
Country                1548 non-null object
ISO2                   0 non-null float64
ISO3                   1548 non-null object
Indicator              1548 non-null object
Unit                   1548 non-null object
Source                 1548 non-null object
CTS_Code               1548 non-null object
CTS_Name               1548 non-null object
CTS_Full_Descriptor    1548 non-null object
Date                   1548 non-null object
Value                  1548 non-null float64
dtypes: float64(2), int64(1), object(9)
memory usage: 145.2+ KB


In [29]:
# 获取数据：Date,Value
# 因为数据是58年3月开始的，然后后面都是完整的一年十二个月数据，并且每一个月的数据应该有两个对应的有效值，一个是二氧化碳的含量（Parts Per Million），一个是比例（Percent）
# 先取最远和最近一个月的数据看个大概趋势
test1 = '1559M'
test2 = '2022M'
value = []
rate = []
month = ['01','02','03','04','05','06','07','08','09','10','11','12']
# 获取2022年的数据,偶数列是具体的量，奇数列是比例
for i in month:
    row = (gas_CO2.loc[gas_CO2['Date'] == test2+i])
    for j, row in row.iterrows():
        if j % 2 == 0:
            value.append(row['Value'])
        else:
            rate.append(row['Value'])
value,rate
# print(type(gas_CO2.loc[gas_CO2['Date'] == test2+month[0],['Value']]))

([418.19,
  419.28,
  418.81,
  420.23,
  420.99,
  420.99,
  418.9,
  417.19,
  415.95,
  415.78,
  417.51,
  418.95],
 [0.64, 0.61, 0.28, 0.28, 0.44, 0.49, 0.47, 0.66, 0.64, 0.45, 0.6, 0.54])

In [30]:
# 数据可视化
fig, ax = plt.subplots()
ax.plot(month, value, color='black', marker='o', linestyle='-', label='Value')

# 绘制散点图
colors = value
sizes = value
cmap = plt.cm.get_cmap('cool') # 使用 'cool' 色图
size = value
sc = ax.scatter(month, value, c=colors, cmap=cmap, s=value, label='Scatter')

# 添加图例
handles, labels = [], []
for ax in fig.axes:
    for h, l in zip(*ax.get_legend_handles_labels()):
        handles.append(h)
        labels.append(l)

plt.legend(handles, labels, loc='upper left')

# 显示颜色条
plt.colorbar(sc)

plt.title("Scatter Plot with Color Map and Line Plot")
plt.xlabel("Month")
plt.ylabel("Value")

plt.show()


## 对甲烷进行分析  
因为有报告显示，畜牧业生产造成了14.5%的人为温室气体排放，其中65%来自养牛业。但相比运输、能源等其他高排放部门，气候政策对畜牧业的关注很少.  
对于中国本土来说，主要来自农业。

In [31]:
data = pd.read_csv('/home/mw/input/Methane7092/全球甲烷排放量_千吨二氧化碳当量（1960-2020年）.csv')

In [32]:
years = range(1970,2019)
# value = ['Country Name']
data.loc[data['Country Name'] == 'China']


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
90,China,CHN,Methane emissions (kt of CO2 equivalent),EN.ATM.METH.KT.CE,,,,,,,...,1106190.0,1151100.0,1178020.0,1206210.0,1237520.0,1242150.0,1239130.0,1238630.0,,


In [33]:
# 1970之前的年份都是空值，1970~2018有值
value = []
for i in years:
    value.append(data.loc[90,str(i)])

In [34]:
# 绘制时间序列图
plt.plot(value)
plt.xlabel('Year')
plt.ylabel('Methane Emission (kt)')
plt.title('China Methane Emission from 1970-2018')
plt.show()

为了使用ARIMA模型进行预测，我们需要先对数据进行平稳性检验。由于ARIMA模型要求时间序列数据是平稳的，如果数据不平稳，则需要进行差分操作来使其平稳。我们可以使用单位根检验（ADF）来检验时间序列数据是否是平稳的。

In [35]:
from statsmodels.tsa.stattools import adfuller
df = pd.DataFrame(value, columns=['value'])
# ADF检验函数
def perform_adf_test(data):
    result = adfuller(data)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))

# 对原始数据执行ADF检验
print('Results of ADF Test on Original Data:')
perform_adf_test(df['value'])

Results of ADF Test on Original Data:
ADF Statistic: 0.008655
p-value: 0.959242
Critical Values:
	1%: -3.575
	5%: -2.924
	10%: -2.600


根据上述结果，原始数据的p值很高（大于0.05），因此我们不能拒绝原假设——即该时间序列数据是非平稳的。接下来，我们需要对数据进行一些处理以使其变得平稳。  

针对这个问题，我们可以使用差分操作，对时间序列数据进行一阶差分。一阶差分就是将每一个数据点减去前一个数据点的值，从而消除趋势和季节性变化。以下代码演示如何对数据进行一阶差分：

In [36]:
# 对原始数据进行一阶差分
df_diff = df.diff().dropna()
print(df_diff)
# 绘制差分后的时间序列图
plt.plot(df_diff)
plt.xlabel('Year')
plt.ylabel('Methane Emission (kt)')
plt.title('China Methane Emission from 1970-2018 (First Difference)')
plt.show()

# 对差分后的数据执行ADF检验
print('Results of ADF Test on First Difference Data:')
perform_adf_test(df_diff['value'])

       value
1    32726.0
2    11957.0
3     1220.0
4     3842.0
5    23779.0
6     7173.0
7     9817.0
8     5608.0
9    -2854.0
10   -5220.0
11   -7991.0
12    9397.0
13   20026.0
14   18202.0
15    -157.0
16   16981.0
17   15099.0
18   20858.0
19   29991.0
20 -261202.0
21   12730.0
22    8590.0
23    5900.0
24   16090.0
25   24800.0
26   25410.0
27  -32130.0
28   22100.0
29   11860.0
30    3920.0
31   -5440.0
32   -4280.0
33   -3980.0
34   13790.0
35    7970.0
36   45690.0
37   38200.0
38   52020.0
39   45330.0
40   44920.0
41   42360.0
42   44910.0
43   26920.0
44   28190.0
45   31310.0
46    4630.0
47   -3020.0
48    -500.0


Results of ADF Test on First Difference Data:
ADF Statistic: -6.529296
p-value: 0.000000
Critical Values:
	1%: -3.578
	5%: -2.925
	10%: -2.601


据上述结果，差分后的数据的p值小于0.05，因此我们可以拒绝原假设，即该时间序列数据是非平稳的。接下来，我们可以使用这些差分后的数据来构建ARIMA模型。  

以下代码演示如何使用Python中的statsmodels库来训练ARIMA模型并进行预测：

In [37]:
from statsmodels.tsa.arima_model import ARIMA

# 拟合ARIMA模型
model = ARIMA(df_diff, order=(1, 1, 1))
result = model.fit()

# 进行预测
forecast = result.forecast(steps=10)

# 输出预测结果
print('Forecasted Methane Emissions:')
print(forecast)

forecast_list = list(forecast)

# 将预测结果转换为DataFrame格式，并添加日期索引
forecast_list[0] = np.insert(forecast_list[0], 0, -500.0)
print(forecast_list[0])

forecast_df = pd.DataFrame(forecast_list[0], columns=['forecast'])
forecast_dates = pd.date_range(start='2019-01-01', periods=11, freq='Y')
forecast_dates_str = forecast_dates.strftime('%Y-%m-%d')
forecast_df.set_index(forecast_dates_str, inplace=True)


# 绘制预测结果图表
plt.plot(df_diff)
plt.plot([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], forecast_df['forecast'], color='red')
plt.xlabel('Year')
plt.ylabel('Methane Emission (kt)')
plt.title('China Methane Emission from 1970-2022 (ARIMA Forecast)')
plt.legend(['Actual', 'Forecast'])
plt.show()


Forecasted Methane Emissions:
(array([22081.24232004, 22973.29351804, 23488.8170791 , 23997.80406345,
       24506.67757185, 25015.54911028, 25524.42061452, 26033.29211816,
       26542.16362179, 27051.03512543]), array([42947.83564416, 42954.30709626, 42954.30905063, 42954.30905129,
       42954.3090513 , 42954.3090513 , 42954.3090513 , 42954.3090513 ,
       42954.30905131, 42954.30905131]), array([[-62094.96875646, 106257.45339654],
       [-61215.6013715 , 107162.18840758],
       [-60700.08164094, 107677.71579913],
       [-60191.09465789, 108186.70278479],
       [-59682.2211495 , 108695.57629319],
       [-59173.34961107, 109204.44783163],
       [-58664.47810684, 109713.31933587],
       [-58155.6066032 , 110222.19083952],
       [-57646.73509957, 110731.06234316],
       [-57137.86359594, 111239.93384679]]))
[ -500.         22081.24232004 22973.29351804 23488.8170791
 23997.80406345 24506.67757185 25015.54911028 25524.42061452
 26033.29211816 26542.16362179 27051.03512543]


尝试了很多方法，但是不能显示中文，就只能在编译器中运行，生成标签云

In [6]:
from wordcloud import WordCloud

# 读取文本文件内容
with open("/home/mw/input/word_cloud1749/词云.txt", "r", encoding='utf-8') as file:
    text = file.read()

import matplotlib.font_manager as fm

my_font = fm.FontProperties(fname="simsun.ttc")

# 使用WordCloud库生成标签云
cloud = WordCloud(width=800, height=800,
                      background_color='white',
                      min_font_size=10).generate(text)

# 显示标签云
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(cloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.savefig("tag_cloud.png")

words_dict = cloud.process_text(text)
for word, frequency in words_dict.items():
    print(word, frequency)


气候变化是全人类的共同挑战 1
应对气候变化 1
事关中华民族永续发展 1
关乎人类前途命运 1
中国高度重视应对气候变化 1
作为世界上最大的发展中国家 1
中国克服自身经济 1
社会等方面困难 1
实施一系列应对气候变化战略 1
措施和行动 1
参与全球气候治理 1
应对气候变化取得了积极成效 1
中共十八大以来 1
在习近平生态文明思想指引下 1
中国贯彻新发展理念 1
将应对气候变化摆在国家治理更加突出的位置 1
不断提高碳排放强度削减幅度 1
不断强化自主贡献目标 2
以最大努力提高应对气候变化力度 1
推动经济社会发展全面绿色转型 1
建设人与自然和谐共生的现代化 1
2020年9月22日 1
中国国家主席习近平在第七十五届联合国大会一般性辩论上郑重宣示 1
中国将提高国家自主贡献力度 1
采取更加有力的政策和措施 1
二氧化碳排放力争于2030年前达到峰值 1
努力争取2060年前实现碳中和 2
中国正在为实现这一目标而付诸行动 1
作为负责任的国家 1
中国积极推动共建公平合理 1
合作共赢的全球气候治理体系 3
为应对气候变化贡献中国智慧中国力量 1
面对气候变化严峻挑战 1
中国愿与国际社会共同努力 1
并肩前行 1
助力 1
巴黎协定 12
行稳致远 1
为全球应对气候变化作出更大贡献 1
为介绍中国应对气候变化进展 1
分享中国应对气候变化实践和经验 1
增进国际社会了解 1
特发布本白皮书 1
中国应对气候变化新理念 1
中国把应对气候变化作为推进生态文明建设 1
实现高质量发展的重要抓手 1
基于中国实现可持续发展的内在要求和推动构建人类命运共同体的责任担当 1
形成应对气候变化新理念 1
以中国智慧为全球气候治理贡献力量 1
牢固树立共同体意识 1
坚持共建人类命运共同体 1
地球是人类唯一赖以生存的家园 1
面对全球气候挑战 1
人类是一荣俱荣 1
一损俱损的命运共同体 1
没有哪个国家能独善其身 2
世界各国应该加强团结 1
推进合作 1
携手共建人类命运共同体 1
这是各国人民的共同期待 1
也是中国为人类发展提供的新方案 1
坚持共建人与自然生命共同体 1
中华文明历来崇尚天人合一 1
道法自然 1
但人类进入工业文明时代以来 1
在创造巨大物质财富的同时 1
人与自然深层次矛盾日益凸显 1
当前的新冠肺炎疫情更是触发了对人与自