# 最简单的JSON处理
1. 从网络端接受到JSON的字符串

In [1]:
import json  # python中自带的处理json数据格式的包

In [2]:
# 一个简单的JSON字符串
# 任务：使用你的程序解析这个字符串
x = """
{
     "firstName": "四 (Si)",
     "lastName": "李 (Lee)",
     "address": {
         "streetAddress": "21 2nd Street",
         "city": "New York",
         "state": "NY",
         "postalCode": 10021
     },
     "phoneNumbers": [
         "212 555-1234",
         "646 555-4567"
     ]
 }
"""

In [3]:
# 从网络
# text ==> parse ==> JSON object
py_json = json.loads(x)

# the result is a Python dictionary:
print(py_json["firstName"])

四 (Si)


In [4]:
display(py_json)

{'firstName': '四 (Si)',
 'lastName': '李 (Lee)',
 'address': {'streetAddress': '21 2nd Street',
  'city': 'New York',
  'state': 'NY',
  'postalCode': 10021},
 'phoneNumbers': ['212 555-1234', '646 555-4567']}

In [5]:
# 在Python中，数据格式从字符串被解读成数据结构
# 其中只包括：字典/表格/基本数据结构
print(type(py_json))
print(type(py_json["phoneNumbers"]))
print(type(py_json["firstName"]))

<class 'dict'>
<class 'list'>
<class 'str'>


In [6]:
# JSON object ==> stringify ==> string
# 将JSON转化回字符串，进而保存/传输数据
json_str = json.dumps(py_json)
print(type(json_str))

<class 'str'>


In [7]:
print(json_str)
# 区分ASCII/unicode/UTF-code
# 这里的中文被转化为对应Unicode代码

{"firstName": "\u56db (Si)", "lastName": "\u674e (Lee)", "address": {"streetAddress": "21 2nd Street", "city": "New York", "state": "NY", "postalCode": 10021}, "phoneNumbers": ["212 555-1234", "646 555-4567"]}


In [8]:
with open('simple.json', 'w+') as f:
    f.write(json_str)

添加一个关于json资料的hints

# 一个真实的网络API使用JSON的例子

In [9]:
from datetime import datetime
from datetime import date

In [10]:
# 参考: https://www.yahoofinanceapi.com/tutorial
# https://www.yahoofinanceapi.com/（网页中尝试API）
import requests

# 雅虎API询问最近五个交易日的股价
url = "https://yfapi.net/v8/finance/chart/AAPL"


# 查询的股票(APPL: 苹果;)
# (注意：我们询问的内容本身就是JSON的数据格式)
querystring = {"symbols":"AAPL",
               "interval": "1d", # 数据间隔1天
               "range": "5d"  # 最近五天的数据
              }

# 我的免费api key (也是JSON数据格式)
# 你可以申请自己的免费api
headers = {'x-api-key': "o42FRwhDu35ksbeuyuDpo3xCIJ7j5QaYamMjP778"}

response = requests.request("GET", url, headers=headers, params=querystring)

print(response.text)

{"chart":{"result":[{"meta":{"currency":"USD","symbol":"AAPL","exchangeName":"NMS","instrumentType":"EQUITY","firstTradeDate":345479400,"regularMarketTime":1649267210,"gmtoffset":-14400,"timezone":"EDT","exchangeTimezoneName":"America/New_York","regularMarketPrice":172.14,"chartPreviousClose":177.77,"priceHint":2,"currentTradingPeriod":{"pre":{"timezone":"EDT","start":1649232000,"end":1649251800,"gmtoffset":-14400},"regular":{"timezone":"EDT","start":1649251800,"end":1649275200,"gmtoffset":-14400},"post":{"timezone":"EDT","start":1649275200,"end":1649289600,"gmtoffset":-14400}},"dataGranularity":"1d","range":"5d","validRanges":["1d","5d","1mo","3mo","6mo","1y","2y","5y","10y","ytd","max"]},"timestamp":[1648733400,1648819800,1649079000,1649165400,1649267210],"indicators":{"quote":[{"open":[177.83999633789062,174.02999877929688,174.57000732421875,177.5,172.36000061035156],"high":[178.02999877929688,174.8800048828125,178.49000549316406,178.3000030517578,173.24000549316406],"close":[174.61

In [11]:
# # 保存得到的JSON数据
# with open('appl.json', 'w+') as f:
#     f.write(response.text)

所有的空格都被省略了，可以打开JSON网站 

验证JSON 或者 增加tab缩紧让人可以理解格式(prettyprint)

In [12]:
parsed = json.loads(response.text)
print(json.dumps(parsed, indent=4))

{
    "chart": {
        "result": [
            {
                "meta": {
                    "currency": "USD",
                    "symbol": "AAPL",
                    "exchangeName": "NMS",
                    "instrumentType": "EQUITY",
                    "firstTradeDate": 345479400,
                    "regularMarketTime": 1649267210,
                    "gmtoffset": -14400,
                    "timezone": "EDT",
                    "exchangeTimezoneName": "America/New_York",
                    "regularMarketPrice": 172.14,
                    "chartPreviousClose": 177.77,
                    "priceHint": 2,
                    "currentTradingPeriod": {
                        "pre": {
                            "timezone": "EDT",
                            "start": 1649232000,
                            "end": 1649251800,
                            "gmtoffset": -14400
                        },
                        "regular": {
                            "timezone":

In [13]:
# 任务：将以上数据用Pandas转化为表格(这个cell只是数据处理)
import pandas as pd
time_ser = parsed['chart']['result'][0]['timestamp']
cols = parsed['chart']['result'][0]['indicators']['quote'][0]
adj_close = parsed['chart']['result'][0]['indicators']['adjclose'][0]['adjclose']

appl_df = pd.DataFrame(cols)
appl_df["Adj Close"] = pd.Series(adj_close)
appl_df['timestamp'] = time_ser
appl_df['Date'] = pd.to_datetime(appl_df.timestamp, unit='s')
appl_df.index = appl_df['Date'].dt.strftime('%Y-%m-%d')
appl_df = appl_df[["open", "high", "low", "close", "Adj Close", "volume"]]
appl_df.columns = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]
display(appl_df)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-31,177.839996,178.029999,174.399994,174.610001,174.610001,103049300
2022-04-01,174.029999,174.880005,171.940002,174.309998,174.309998,78699800
2022-04-04,174.570007,178.490005,174.440002,178.440002,178.440002,76468400
2022-04-05,177.5,178.300003,174.419998,175.059998,175.059998,73311300
2022-04-06,172.360001,173.240005,170.130005,172.139999,172.139999,48978885


In [14]:
# # 保存数据
# appl_df.to_csv("appl.csv")

# 使用现成的包下载同样的数据

In [15]:
# 当然，你也可以使用现成的包来进行运算
# 比如：https://github.com/ranaroussi/yfinance
# 其本质也是访问Yahoo API帮你调用数据
# 在编程中，我们称这样的工具/结构为：wrapper"包装"
import yfinance as yf

data = yf.download("AAPL", start="2022-03-31", end="2022-04-07")

[*********************100%***********************]  1 of 1 completed


In [16]:
display(data)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-31,177.839996,178.029999,174.399994,174.610001,174.610001,103049300
2022-04-01,174.029999,174.880005,171.940002,174.309998,174.309998,78699800
2022-04-04,174.570007,178.490005,174.440002,178.440002,178.440002,76468400
2022-04-05,177.5,178.300003,174.419998,175.059998,175.059998,73311300
2022-04-06,172.360001,173.240005,170.130005,172.130005,172.130005,48987850
