In [35]:
import csv
import sys
import os.path
import json
import matplotlib
matplotlib.use('TkAgg')
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['font.sans-serif'] = ['SimHei']

In [36]:
def datajsons():
    """生成器，用于遍历所有用户的json数据
    """
    path = sys.path[0]
    if path == '':
        path = os.getcwd()
    FILEPATH = os.path.join(os.path.dirname(path), 'datafile')  # 此脚本文件上一级路径中的datafile文件夹
    SUFFIX = '.csv'
    TABLEHEADER = ['user_url_token', 'user_data_json', 'user_following_list']
    
    # 数据文件夹不存在，就退出
    if not os.path.exists(FILEPATH):
        return None
    
    # 从存储数据文件的文件夹中找出所有csv文件，得到一个包含所有csv绝对路径文件名的list。
    csvfilelist = list()
    for filename in os.listdir(FILEPATH):
        filename = os.path.join(FILEPATH, filename)
        if os.path.splitext(filename)[1] == SUFFIX:
            with open(filename, 'r', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)
                if reader.fieldnames == TABLEHEADER:
                    csvfilelist.append(os.path.join(FILEPATH, filename))
    csvfilelist.sort()
        
    # 从上面的列表中，依次遍历每个文件，得到一个包含已经爬取用户的url token的list。
    usercrawled = list()
    for filename in csvfilelist:
        with open(filename, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                user_data_json = json.loads(row[TABLEHEADER[1]])
                yield user_data_json
    return None

In [37]:
# 遍历所有用户，读取需要的信息
voteupCountList = list() # 获得赞同数
thankedCountList = list() # 获得感谢数
followingCountList = list() # 该用户关注的用户数
followerCountList = list() # 该用户被其他用户关注数
favoriteCountList = list() # 收藏数
favoritedCountList = list() # 被收藏数
answerCountList = list() # 回答数
articlesCountList = list() # 文章数
questionCountList = list() # 提问数
followingColumnsCountList = list() # 关注专栏数
followingFavlistsCountList = list() #  关注收藏夹数
followingTopicCountList = list() # 关注话题数
followingQuestionCountList = list() # 关注问题数

jsons = datajsons()
for user in jsons:
    try:
        voteupCountList.append(user['voteupCount'])
        thankedCountList.append(user['thankedCount'])
        followingCountList.append(user['followingCount'])
        followerCountList.append(user['followerCount'])
        favoriteCountList.append(user['favoriteCount'])
        favoritedCountList.append(user['favoritedCount'])
        answerCountList.append(user['answerCount'])
        articlesCountList.append(user['articlesCount'])
        questionCountList.append(user['questionCount'])
        followingColumnsCountList.append(user['followingColumnsCount'])
        followingFavlistsCountList.append(user['followingFavlistsCount'])
        followingTopicCountList.append(user['followingTopicCount'])
        followingQuestionCountList.append(user['followingQuestionCount'])
    except:
        pass

In [39]:
black = '#212121'
gray = '#727272'
red1 = '#D32F2F'
red2 = '#F44336'
orange1 = '#FF9500'
orange2 = '#FFb44A'
orange3 = '#ffd191'
orange4 = '#FFF1DE'

In [44]:
# 72万知乎用户获得赞同数分布直方图
fig, axes = plt.subplots(3,2)
fig.set_size_inches(18,10)
fig.suptitle('72万知乎用户获得赞同数和感谢数分布', fontsize=16, color=red1)
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.2, hspace=0.5)

# 把一个二维子图数组压扁
subplotlist = list()
for i in axes:
    for j in i:
        subplotlist.append(j)

# 每个直方图的赞数统计范围
edge = [[0,100],
       [100,1000],
       [1000,10000],
       [10000,100000],
       [100000,1000000],
       [1000000,4000000]
       ]
# 每个直方图的组距
widthlist = [1, 5, 50, 500, 5000, 50000]

for i in range(len(subplotlist)):
    tempList = [x for x in voteupCountList if x>=edge[i][0] and x<edge[i][1]]
    voteupCountArray = np.array(tempList)
    tempList = [x for x in thankedCountList if x>=edge[i][0] and x<edge[i][1]]
    thankedCountArray = np.array(tempList)
    subplotlist[i].hist([voteupCountArray,thankedCountArray], normed=0, histtype='barstacked', 
                        bins=int((edge[i][1]-edge[i][0])/widthlist[i]), color=[red1,'k'], alpha = 0.5)
    subplotlist[i].set_xlim(edge[i][0], edge[i][1])
    subplotlist[i].set_title('获得%d-%d赞同/感谢用户分布（组距：%d）'%(edge[i][0],edge[i][1],widthlist[i]), color=red1)
    subplotlist[i].set_xlabel('获得赞同/感谢（次）',color=red1)
    subplotlist[i].set_ylabel('用户数量（人）',color=red1)
    subplotlist[i].set_facecolor(orange4)
    subplotlist[i].grid(True, linestyle='--')

# 微调
axes[0,0].set_ylim(0,300000)
fig.show()

In [43]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

x = np.array(range(20))
h1 = np.abs(np.random.randn(20))
h2 = np.abs(np.random.randn(20))
ax.bar(x, h1, 2, zdir='y', color=red1, alpha=0.6)
ax.bar(x, h2, 1, zdir='y', color=orange1, alpha=0.8)
ax.set_ylim(0,3)
fig.show()

In [45]:
"""
Demonstrates using custom hillshading in a 3D surface plot.
"""
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cbook
from matplotlib import cm
from matplotlib.colors import LightSource
import matplotlib.pyplot as plt
import numpy as np

filename = cbook.get_sample_data('jacksboro_fault_dem.npz', asfileobj=False)
with np.load(filename) as dem:
    z = dem['elevation']
    nrows, ncols = z.shape
    x = np.linspace(dem['xmin'], dem['xmax'], ncols)
    y = np.linspace(dem['ymin'], dem['ymax'], nrows)
    x, y = np.meshgrid(x, y)

region = np.s_[5:50, 5:50]
x, y, z = x[region], y[region], z[region]

fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))

ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(z, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolors=rgb,
                       linewidth=0, antialiased=False, shade=False)

plt.show()

KeyboardInterrupt: 