In [None]:
import matplotlib.pyplot as plt
import matplotlib
import geopandas
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import cartopy.crs as ccrs
import numpy as np

def scale_bar(ax, length=None, location=(0.5, 0.05), linewidth=3):
    """
    ax is the axes to draw the scalebar on.
    length is the length of the scalebar in km.
    location is center of the scalebar in axis coordinates.
    (ie. 0.5 is the middle of the plot)
    linewidth is the thickness of the scalebar.
    """
    #Get the limits of the axis in lat long
    llx0, llx1, lly0, lly1 = ax.get_extent(ccrs.PlateCarree())
    #Make tmc horizontally centred on the middle of the map,
    #vertically at scale bar location
    sbllx = (llx1 + llx0) / 2
    sblly = lly0 + (lly1 - lly0) * location[1]
    tmc = ccrs.TransverseMercator(sbllx, sblly)
    #Get the extent of the plotted area in coordinates in metres
    x0, x1, y0, y1 = ax.get_extent(tmc)
    #Turn the specified scalebar location into coordinates in metres
    sbx = x0 + (x1 - x0) * location[0]
    sby = y0 + (y1 - y0) * location[1]

    #Calculate a scale bar length if none has been given
    #(Theres probably a more pythonic way of rounding the number but this works)
    if not length: 
        length = (x1 - x0) / 5000 #in km
        ndim = int(np.floor(np.log10(length))) #number of digits in number
        length = round(length, -ndim) #round to 1sf
        #Returns numbers starting with the list
        def scale_number(x):
            if str(x)[0] in ['1', '2', '5']: return int(x)        
            else: return scale_number(x - 10 ** ndim)
        length = scale_number(length) 

    #Generate the x coordinate for the ends of the scalebar
    bar_xs = [sbx - length * 500, sbx + length * 500]
    #Plot the scalebar
    ax.plot(bar_xs, [sby, sby], transform=tmc, color='k', linewidth=linewidth)
    #Plot the scalebar label
    ax.text(sbx, sby, str(length) + ' km', transform=tmc,
            horizontalalignment='center', verticalalignment='bottom')

In [None]:
municipios = geopandas.read_file('deps/municipios/Municipios_IBGE.shp')

In [None]:
municipios.head()

In [None]:
producao = pd.read_excel("deps/Arrecadacao_2020.xlsx", index_col=0)
producao = producao.fillna(0)

In [None]:
mapa_arrecadacao = {
'AREIA FLUVIAL':'AREIA',
'AREIA QUARTZOSA':'AREIA',
'AREIA FLUVIA':'AREIA',
'BASALTO P/ BRITA':'BASALTO',
'BASALTO P/ REVESTIMENTO':'BASALTO',
'CALCÁRIO CALCÍTICO':'CALCÁRIO',
'CALCÁRIO DOLOMÍTICO':'CALCÁRIO',
'CALCÁRIO INDUSTRIAL':'CALCÁRIO',
'CARVÃO MINERAL':'CARVÃO',
}

In [None]:
for i in producao.columns:
    for j in mapa_arrecadacao:
        if i == j:
            producao[mapa_arrecadacao[j]] = producao[mapa_arrecadacao[j]] + producao[mapa_arrecadacao[i]]
            producao.drop(i, inplace=True, axis=1)

In [None]:
producao.head()

In [None]:
producao['TOTAL'] = producao.sum(axis=1)

In [None]:
for idx, row in producao.iterrows():
    if row["TOTAL"] != 0:
        print(idx, row["TOTAL"]/1000)

In [None]:
municipios['arrecadacao'] = 0
for index, row in producao.iterrows():
    for index1, row1 in municipios.iterrows():
        if index.upper() == row1['NOME']:
            municipios.loc[index1, 'arrecadacao'] = row['TOTAL']

In [None]:
_ = plt.hist(municipios['arrecadacao'], cumulative=False, bins=50)

In [None]:
np.max(municipios['arrecadacao'])

In [None]:
municipios['ccode'] = float('nan')

In [None]:
for idx, v in enumerate(municipios['arrecadacao']):
    if v==0:
        municipios.loc[idx, 'ccode'] = 0
    if v > 0 and v < 1000:
        municipios.loc[idx, 'ccode'] = 1
    elif v >= 1000 and v < 2000:
        municipios.loc[idx, 'ccode'] = 2
    elif v >= 2000 and v < 4000:
        municipios.loc[idx, 'ccode']= 3
    elif v >= 4000 and v < 10000:
        municipios.loc[idx, 'ccode'] = 4
    elif v >= 10000 and v < 50000:
        municipios.loc[idx, 'ccode'] = 5
    elif v >= 50000 and v < 1000000:
        municipios.loc[idx, 'ccode'] = 6
    elif v >= 1000000 and v < 6000000:
        municipios.loc[idx, 'ccode'] = 7
    elif v >= 6000000 and v < np.max(municipios['arrecadacao'])+1:
        municipios.loc[idx, 'ccode'] = 8

In [None]:
#for i in municipios['ccode']:
#    print(i)

In [None]:
legenda = [
    'R$0',
    'entre 0 e 1.000',
    'entre 1.000 e 2.000',
    'entre 2.000 e 4.000',
    'entre 4.000 e 10.000',
    'entre 10.000 e 50.000',
    'entre 50.000 e 1.000.000',
    'entre 1.000.000 e 6.000.000',
    'entre 6.000.000 e 9.626.218',
]

In [None]:
lista_municipio = ['Caxias do Sul', 'Santa Maria', 'Porto Alegre', 'Pelotas']
def plot_municipios(municipios, lista_municipio, ax, c='black', delta=0.05):
    x = []
    y = []
    for index, row in municipios.iterrows():
        for m in lista_municipio:
            if row['NOME'] == m.upper():
                y.append(row['LATITUDESE'])
                x.append(row['LONGITUDES'])
    ax.scatter(x=x, y=y, marker='x', c=c)
    for i, txt in enumerate(lista_municipio[::-1]):
        ax.annotate(txt, (x[i]+delta, y[i]+delta), c=c)

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import colors, cm

cmap = cm.get_cmap('Spectral_r', 9)
colorscale = cmap

fig, ax = plt.subplots(figsize=(12,12), subplot_kw={'projection': ccrs.PlateCarree()})
municipios.plot(ax=ax, column='ccode', edgecolor=None, legend=False, cmap=colorscale)

divider = make_axes_locatable(ax)
ax_cb = divider.new_horizontal(size="5%", pad=0.5, axes_class=plt.Axes)
vmax = municipios['ccode'].values.max()
vmin = municipios['ccode'].values.min()
norm = matplotlib.colors.Normalize(vmin=vmin,vmax=vmax)
mappable = plt.cm.ScalarMappable(cmap=colorscale, norm=norm)
fig.add_axes(ax_cb)
labels = [0.45,1.3,2.2,3.1,4,4.9,5.75,6.65,7.5]
cbar = fig.colorbar(mappable, cax=ax_cb, ticks=labels)
cbar.ax.set_yticklabels(legenda)

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=1, color='gray', alpha=0.5, linestyle='--')

#x, y, arrow_length = 0.04, 0.12, 0.1
#ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
#arrowprops=dict(facecolor='black', width=5, headwidth=15),
#ha='center', va='center', fontsize=12,
#xycoords=ax.transAxes)

#scale_bar(ax, 100, location=(0.5,0.01))

plot_municipios(municipios, lista_municipio, ax, c='black', delta=0.05)

ax.set_title('ARRECADAÇÂO CFEM em R$', loc='center', fontsize='14')
plt.savefig('imagens/arrecadacao.png', bbox_inches='tight', facecolor='white', dpi = 300, bbox_extra_artists=([ax]))

In [None]:
idh = pd.read_csv('deps/idh_rs.csv', encoding='latin1', delimiter=';')

In [None]:
idh['cfem'] = float('nan')

In [None]:
idh['idh'] = np.where(idh['idh']=='-', float('nan'), idh['idh'])

In [None]:
idh['idh'] = pd.to_numeric(idh['idh'])

In [None]:
idh.head()

In [None]:
producao.head()

In [None]:
for idx, row in producao.iterrows():
    municipio_cfem = idx
    #print(idx)
    for idx1, row1 in idh.iterrows():
        municipio_idh = row1['municipio']
        
        if municipio_cfem == municipio_idh:
            idh.loc[idx1, 'cfem'] = row['TOTAL']

In [None]:
idh = idh.dropna()

In [None]:
idh.head()

In [None]:
ma = np.nanmax(idh['idh'])
mi = np.nanmin(idh['idh'])

In [None]:
xt = np.linspace(mi,ma, 10)

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(idh['idh'].values.reshape(-1, 1), idh['cfem'].values.reshape(-1, 1))

r_sq = model.score(idh['idh'].values.reshape(-1, 1), idh['cfem'].values.reshape(-1, 1))
print('coefficient of determination:', r_sq)

print('intercept:', model.intercept_)
print('slope:', model.coef_)

In [None]:
def yval(x, a, b):
    return a*x+b

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
plt.scatter(idh['idh'], idh['cfem'])
#plt.plot([mi,yval(mi,model.coef_ ,model.intercept_)], [ma,yval(ma,model.coef_ ,model.intercept_)], c='red')
plt.title('Relação CFEMxIDH')
plt.ylabel('CFEM')
#plt.ylim([0,0.2*1e7])
plt.xlabel('IDH')
plt.xticks(xt)
plt.grid(linestyle='--')
#plt.show()
plt.savefig('imagens/cfem_idh.png', bbox_inches='tight', facecolor='white', dpi = 300)