In [126]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import io
import numpy as np
import re
import matplotlib.pyplot as plt

In [127]:
# PBOC statistics front page

def get_html(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r=requests.get(url, headers=headers)
    return r

url='http://www.pbc.gov.cn/diaochatongjisi/116219/index.html'
r=get_html(url)
soup=BeautifulSoup(r.content, 'html.parser')
domain='http://www.pbc.gov.cn'
data_link_by_year={re.search(r'[0-9]{4}', i.get_text())[0]: domain+i.get('href') for i in 
                   soup.find('table', {'id': '11854'}).find_next_sibling('table').find_all('a')}


In [128]:
def get_year_data_cat_list(url):
    r=get_html(url)
    soup=BeautifulSoup(r.content, 'html.parser')
    data_link={i.get_text(): domain+i.get('href') for i in 
            soup.find('div', {'name': '右侧内容'}).find_all('a')}
    return data_link

In [147]:
def get_data_list(url):
    r=get_html(url)
    soup=BeautifulSoup(r.content, 'html.parser')

    data={}
    for table in soup.find_all('table', {'class': 'a2015'}):
        lst=[]
        for i, item in enumerate(table.find_all('td')):
            if i ==0:
                title=item.get_text(strip=True)
            else:
                if item.find('a') is None:
                    lst.append({item.get_text(): None})
                else:
                    link=item.find('a').get('href')
                    if link is None:
                        lst.append({item.get_text(): None})
                    else:
                        lst.append({item.get_text(): domain+link})
        data[title]=lst
    return data

In [140]:
result=[]
for key, item in data_link_by_year.items():
    dict_data={}
    dict_data['year']=key
    dict_data['cat_link']=get_year_data_cat_list(item)
    result.append(dict_data)

In [148]:
for i, year in enumerate(result):
    lst_result=[]
    for key, link in year['cat_link'].items():
        dict_data={}
        dict_data['name']=key
        dict_data['result']=get_data_list(link)
        lst_result.append(dict_data)

    result[i]['cat_result']=lst_result

In [230]:
# 社会融资规模存量统计表Aggregate Financing to the Real Economy (Stock)
key='社会融资规模'
table='社会融资规模增量统计表Aggregate Financing to the Real Economy (Flow)'
dict_social_finance_link={}
for year in result:
    for ind in year['cat_result']:
        if key in ind['name']:
            if ind['result'].get(table) is not None:
                for i in ind['result'].get(table):
                    file_format='htm'
                    if i.get(file_format) is not None:
                        dict_social_finance_link[year['year']]=i.get(file_format)
dict_social_finance_link

{'2024': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2024/07/2024071217593034505.htm',
 '2023': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2024/02/2024021916002638137.htm',
 '2022': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2023/05/2023051517115479366.htm',
 '2021': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2022/03/2022031616071576422.htm',
 '2020': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2021/02/2021022218250352920.htm',
 '2019': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2020/02/2020022816310196340.htm',
 '2018': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2019/01/2019011618533643491.htm',
 '2017': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118102745057.htm',
 '2016': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118065466576.htm',
 '2015': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118034695174.htm'}

In [369]:
def format_social_finance_df(df):
    df=df.dropna(how='all', axis=1)
    # col=['月份', '社會融資規模增量', '人民幣貸款', '外幣貸款', '委托貸幣', '信托貸款', '未貼現銀行承兌匯票', '企業債券', '非金融企業境內股票融資']
    # df.columns=col
    col=df.iloc[5].str.strip()
    col[0]='月份'
    df.columns=col
    df=df[df['月份'].astype(str).str.contains(r'[0-9]{4}\.[0-9]{2}', regex=True)]
    df=df.set_index('月份')
    df.index=pd.to_datetime(df.index, format='%Y.%m')
    df=df.apply(pd.to_numeric)
    return df

In [320]:
dfs=[]
for key, url in dict_social_finance_link.items():
    data=get_html(url)
    df=pd.read_html(data.content)[0]
    df=format_social_finance_df(df)
    dfs.append(df)

In [368]:
new_df=dfs[5].copy()
new_df=new_df[new_df['社会融资规模增量']!='100.0']
new_df

5,社会融资规模增量,人民币贷款,外币贷款（折合人民币）,委托贷款,信托贷款,未贴现银行承兑汇票,企业债券,政府债券,非金融企业境内股票融资,存款类金融机构资产支持证券,贷款核销
月份,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019.01,46791,35668,343,-699,345,3787,4829,1700,289,-466,249
2019.02,9665,7641,-105,-508,-37,-3103,875,4347,119,-14,201
2019.03,29602,19584,3,-1070,528,1365,3546,3412,122,261,1227
2019.04,16710,8733,-330,-1197,129,-357,3949,4433,262,243,316
2019.05,17124,11855,191,-631,-52,-768,1033,3857,259,383,392
2019.06,26243,16737,-4,-827,15,-1311,1439,6867,153,607,1806
2019.07,12872,8086,-221,-987,-676,-4562,2944,6427,593,286,244
2019.08,21956,13045,-247,-513,-658,157,3384,5059,256,269,351
2019.09,25142,17612,-440,-22,-672,-431,2431,3777,289,284,1692
2019.1,8680,5470,-10,-667,-624,-1053,2032,1871,180,623,416


In [346]:

df=pd.concat(dfs)
# df=df.sort_index()
df

5,社会融资规模增量,人民币贷款,外币贷款（折合人民币）,委托贷款,信托贷款,未贴现银行承兑汇票,企业债券,政府债券,非金融企业境内股票融资,存款类金融机构资产支持证券,贷款核销,地方政府专项债券
月份,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-01-01,64734.0,48401.0,989.0,-359.0,732.0,5636.0,4320.0,2947.0,422.0,-203.0,474.0,
2024-02-01,14959.0,9773.0,-9.0,-172.0,571.0,-3686.0,1423.0,6011.0,114.0,-210.0,488.0,
2024-03-01,48335.0,32920.0,543.0,-465.0,681.0,3552.0,4237.0,4626.0,227.0,-588.0,1587.0,
2024-04-01,-658.0,3349.0,-310.0,89.0,142.0,-4490.0,1707.0,-937.0,186.0,-1967.0,520.0,
2024-05-01,20623.0,8197.0,-487.0,-9.0,224.0,-1331.0,285.0,12266.0,111.0,-426.0,734.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
2015-08-01,11097.0,7756.0,-620.0,1198.0,317.0,-1577.0,3121.0,,479.0,,,
2015-09-01,13571.0,10417.0,-2344.0,2422.0,-159.0,-1279.0,3805.0,,349.0,,,
2015-10-01,5593.0,5574.0,-1317.0,1390.0,-201.0,-3697.0,3331.0,,121.0,,,
2015-11-01,10255.0,8873.0,-1142.0,910.0,-301.0,-2545.0,3378.0,,568.0,,,


In [354]:
df=pd.read_html(get_html(dict_social_finance_link['2019']).content)[0]
df=format_social_finance_df(df)
df

5,社会融资规模增量,人民币贷款,外币贷款（折合人民币）,委托贷款,信托贷款,未贴现银行承兑汇票,企业债券,政府债券,非金融企业境内股票融资,存款类金融机构资产支持证券,贷款核销
月份,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019.01,46791,35668,343,-699,345,3787,4829,1700,289,-466,249
2019.02,9665,7641,-105,-508,-37,-3103,875,4347,119,-14,201
2019.03,29602,19584,3,-1070,528,1365,3546,3412,122,261,1227
2019.04,16710,8733,-330,-1197,129,-357,3949,4433,262,243,316
2019.05,17124,11855,191,-631,-52,-768,1033,3857,259,383,392
...,...,...,...,...,...,...,...,...,...,...,...
2019.08,100.0,59.4,-1.1,-2.3,-3.0,0.7,15.4,23.0,1.2,1.2,1.6
2019.09,100.0,70.1,-1.8,-0.1,-2.7,-1.7,9.7,15.0,1.2,1.1,6.7
2019.10,100.0,63.0,-0.1,-7.7,-7.2,-12.1,23.4,21.6,2.1,7.2,4.8
2019.11,100.0,68.4,-1.3,-4.8,-3.4,2.9,16.7,8.6,2.6,3.5,3.2


In [347]:
dict_social_finance_link

{'2024': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2024/07/2024071217593034505.htm',
 '2023': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2024/02/2024021916002638137.htm',
 '2022': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2023/05/2023051517115479366.htm',
 '2021': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2022/03/2022031616071576422.htm',
 '2020': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2021/02/2021022218250352920.htm',
 '2019': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2020/02/2020022816310196340.htm',
 '2018': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2019/01/2019011618533643491.htm',
 '2017': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118102745057.htm',
 '2016': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118065466576.htm',
 '2015': 'http://www.pbc.gov.cn/diaochatongjisi/resource/cms/2018/04/2018041118034695174.htm'}