-
Notifications
You must be signed in to change notification settings - Fork 0
/
proj.py
56 lines (42 loc) · 1.86 KB
/
proj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
"""
Created on Sun May 5 11:29:54 2019
@author: Soundar Balakumaran, Ph.D.
"""
import pandas as pd
import time
import matplotlib.pyplot as plt
t1=time.perf_counter()
dt = pd.read_csv("us.2017.singleages.adjusted.txt",names=['Data']) #,skiprows=1319460)
ct = pd.read_csv("county.csv")
ct['FIPS']=ct['FIPS'].apply(lambda x: '{0:0>5}'.format(x)) #adding preceding zeroes
dt = dt.iloc[1:]
dt['year'] = dt['Data'].str[:4]
dt['year'] = pd.to_datetime(dt['year']).dt.year
dt['state'] = dt['Data'].str[4:6]
dt['FIPS'] = dt['Data'].str[6:11]
dt['reg'] = dt['Data'].str[11:13]
dt['race'] = dt['Data'].str[13:14]
dt['hisp'] = dt['Data'].str[14:15]
dt['sex'] = dt['Data'].str[15:16]
dt['age'] = dt['Data'].str[16:18]
dt['age'] = dt['age'].astype(int)
dt['pop'] = dt['Data'].str[18:]
dt['pop'] = dt['pop'].astype(int)
dt = dt.drop('Data',axis=1)
t2=time.perf_counter()
print("parsed in...", round(t2-t1,3),"sec")
merged = pd.merge(dt, ct, on="FIPS")
merged = merged[(merged['year']== 2017)&(merged['state'] != 'AK')&(merged['state'] != 'HI')]
mergedsum = merged.groupby(merged['FIPS'], as_index=False)['pop'].sum()
mergedsum = pd.merge(mergedsum,ct,on='FIPS')
old = merged[(merged['age'] > 70)]
oldsum = old.groupby(old['FIPS'], as_index=False)['pop'].sum()
oldsum = pd.merge(oldsum,ct,on='FIPS')
fig,ax1 = plt.subplots(1,1,figsize=(20,16))
ax1.scatter(mergedsum['Longitude'], mergedsum['Latitude'],mergedsum['pop']/1000, alpha=0.2, marker='s',edgecolors="black") # edgecolors="black", linewidth=2, c=merged['pop'], cmap="OrRd",)
ax1.scatter(oldsum['Longitude'], oldsum['Latitude'],oldsum['pop']/1000, alpha=0.5, marker='s',edgecolors="black") # edgecolors="black", linewidth=2, c=old['pop'], cmap="OrRd",)
plt.axis('equal')
plt.show()
t3=time.perf_counter()
print("plotted in...", round(t3-t2,3),"sec")