In [13]:
# 导入相关处理包
import pandas as pd
# 读取候选人信息，由于原始数据没有表头，需要添加表头
candidates = pd.read_csv("weball20.txt", sep = '|',names=['CAND_ID','CAND_NAME','CAND_ICI','PTY_CD','CAND_PTY_AFFILIATION','TTL_RECEIPTS',
                                                          'TRANS_FROM_AUTH','TTL_DISB','TRANS_TO_AUTH','COH_BOP','COH_COP','CAND_CONTRIB',
                                                          'CAND_LOANS','OTHER_LOANS','CAND_LOAN_REPAY','OTHER_LOAN_REPAY','DEBTS_OWED_BY',
                                                          'TTL_INDIV_CONTRIB','CAND_OFFICE_ST','CAND_OFFICE_DISTRICT','SPEC_ELECTION','PRIM_ELECTION','RUN_ELECTION'
                                                          ,'GEN_ELECTION','GEN_ELECTION_PRECENT','OTHER_POL_CMTE_CONTRIB','POL_PTY_CONTRIB',
                                                          'CVG_END_DT','INDIV_REFUNDS','CMTE_REFUNDS'])
# 读取候选人和委员会的联系信息
ccl = pd.read_csv("ccl.txt", sep = '|',names=['CAND_ID','CAND_ELECTION_YR','FEC_ELECTION_YR','CMTE_ID','CMTE_TP','CMTE_DSGN','LINKAGE_ID'])
# 关联两个表数据
ccl = pd.merge(ccl,candidates)
# 提取出所需要的列
ccl = pd.DataFrame(ccl, columns=[ 'CMTE_ID','CAND_ID', 'CAND_NAME','CAND_PTY_AFFILIATION'])

In [14]:
# 读取个人捐赠数据，由于原始数据没有表头，需要添加表头
# 提示：读取本文件大概需要5-10s
itcont = pd.read_csv('itcont_2020_20200722_20200820.txt', sep='|',names=['CMTE_ID','AMNDT_IND','RPT_TP','TRANSACTION_PGI',
                                                                                  'IMAGE_NUM','TRANSACTION_TP','ENTITY_TP','NAME','CITY',
                                                                                  'STATE','ZIP_CODE','EMPLOYER','OCCUPATION','TRANSACTION_DT',
                                                                                  'TRANSACTION_AMT','OTHER_ID','TRAN_ID','FILE_NUM','MEMO_CD',
                                                                                  'MEMO_TEXT','SUB_ID'])

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
# 将候选人与委员会关系表ccl和个人捐赠数据表itcont合并，通过 CMTE_ID
c_itcont =  pd.merge(ccl,itcont)
# 提取需要的数据列
c_itcont = pd.DataFrame(c_itcont, columns=[ 'CAND_NAME','NAME', 'STATE','EMPLOYER','OCCUPATION',
                                           'TRANSACTION_AMT', 'TRANSACTION_DT','CAND_PTY_AFFILIATION'])

In [16]:
#空值处理，统一填充 NOT PROVIDED
c_itcont['STATE'].fillna('NOT PROVIDED',inplace=True)
c_itcont['EMPLOYER'].fillna('NOT PROVIDED',inplace=True)
c_itcont['OCCUPATION'].fillna('NOT PROVIDED',inplace=True)

In [17]:
# 对日期TRANSACTION_DT列进行处理
c_itcont['TRANSACTION_DT'] = c_itcont['TRANSACTION_DT'] .astype(str)
# 将日期格式改为年月日  7242020	
c_itcont['TRANSACTION_DT'] = [i[3:7]+"0"+i[0]+i[1:3] for i in c_itcont['TRANSACTION_DT'] ]

In [18]:
###构建州-时间-捐款 -Dataframe
df_1=c_itcont.groupby(['STATE','TRANSACTION_DT']).agg('sum')  #按照州和日期排序groupby，对捐款进行聚合agg

In [19]:
import pandas as pd
#时间序列处理

datetime_index = pd.date_range(start="20200722",end="20200820") 
dt_index_epochs = datetime_index.astype(int) 
dt_index = dt_index_epochs.astype("U10")  
n_periods =len(datetime_index)

In [20]:
#美国相关地理数据
import json
import folium
import requests


url = (
    "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data"
)
us_states = f"{url}/us-states.json"
geo_json_data = json.loads(requests.get(us_states).text) #边界

In [21]:
##构建各时间下，州对应捐款的颜色透明属性
import geopandas as gpd
gdf = gpd.read_file(us_states)
import numpy as np
import datetime
styledata = {}
for country in gdf.index:
    ccountry=gdf.iloc[country]["id"]
    amt=[0]*n_periods
    for i in range(0,len(datetime_index)-1):
        x=datetime.datetime.strftime(datetime_index[i],'%Y%2m%2d')  #转字符串
        try:
            amt[i]=amt[i]+df_1.loc[(ccountry,x),"TRANSACTION_AMT"]
        except KeyError:
            amt[i]=amt[i]
    df = pd.DataFrame(
        {
            "color": amt,   #loc标签定位，使用名称,
            "opacity": amt,  #透明
        },
        index=dt_index,  #索引是日期
    )
    
    df = df.cumsum() #求列的累加值 
    styledata[country] = df  

In [22]:
##处理颜色透明数值 对应十六进制
max_color, min_color, max_opacity, min_opacity = 0, 0, 0, 0

for country, data in styledata.items():
    max_color = max(max_color, data["color"].max())
    min_color = min(max_color, data["color"].min())
    max_opacity = max(max_color, data["opacity"].max())
    max_opacity = min(max_color, data["opacity"].max())
from branca.colormap import linear

#cmap = linear.PuRd_09.scale(min_color, max_color)
cmap = linear.Reds_09.scale(min_color, max_color)
#cmap = linear.PuBuGn_09.scale(min_color, max_color)

def norm(x):
    return (x - x.min()) / (x.max() - x.min())


for country, data in styledata.items():
    data["color"] = data["color"].apply(cmap)
    data["opacity"] = norm(data["opacity"])
#styledata.get(0).head()

In [23]:
styledict = {
    str(country): data.to_dict(orient="index") for country, data in styledata.items()
}

In [24]:
#画美国地图


m = folium.Map(
    location=[43, -100], ## 初始化地图中心
    zoom_start=4,
    tiles="stamentoner"  #瓷砖风格tiles="stamentonerbackground"
)

folium.GeoJson(geo_json_data).add_to(m) #地图边界
folium.map.CustomPane("labels").add_to(m) #使得州的名称在最上层1
m


In [25]:
#时间轴地图，颜色可能需要一定时间加载，前几天颜色较浅
from folium.plugins import TimeSliderChoropleth
g = TimeSliderChoropleth(
    gdf.to_json(),
    styledict=styledict,
).add_to(m)
folium.TileLayer("stamentonerlabels", pane="labels").add_to(m)  #使得州的名称在最上层2

m