### Importações

In [30]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
from geobr import read_state
from geobr import read_municipality
import numpy as np

# Tratamento

O objetivo aqui é chegar a um DataFrame com colunas identificando o local, o ano e a razão entre idosas e idosos, preenchido com uma linha para cada ano e para cada cidade.

### Lendo o CSV

In [3]:
df = pd.read_csv('pop.csv')

### Obtendo intervalos de idade a partir dos 60 anos

In [4]:
ageRanges = df.columns[17:-2]

In [5]:
ageRanges

Index(['60-65', '65-70', '70-75', '75-80', '80-85', '85-90'], dtype='object')

In [6]:
df["old"] = ""

In [7]:
df["Ano"].unique()

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype=int64)

In [8]:
print(df["5-10"][1])

811.744905732241


## Calculando a projeção do total de idosos

In [9]:
for i in range(len(df)):
    total = 0
    for j in ageRanges:
        total += df[j][i]
    df["old"][i] = total

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["old"][i] = total


## Descartando intervalos de idade

In [10]:
df.columns

Index(['Ano', 'ARmaior', 'Sexo', 'Armenor', 'NomeMunic', '0-5', '5-10',
       '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50',
       '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90',
       '90+', 'Total', 'old'],
      dtype='object')

In [11]:
ageRanges = df.columns[5:-1]

In [12]:
ageRanges

Index(['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40',
       '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80',
       '80-85', '85-90', '90+', 'Total'],
      dtype='object')

In [13]:
for i in ageRanges:
    df = df.drop(columns = i)

## Agregando números de idosos e idosas

In [14]:
df[["f", "m", "ratio"]] = ""

Aqui, o valor de uma linha ímpar sempre será a quantidade de homens. Assim, esse iterador parte de cada linha par e calcula a coluna "m" e a razão com base na próxima.

In [15]:
for i in range(len(df)):
    if i % 2 == 0:
        nextValue = df["old"][i + 1]
        currentValue = df["old"][i]
        df["f"][i] = currentValue
        df["m"][i] = nextValue
        if nextValue > 0: # Para contornar os NaN
            df["ratio"][i] = (currentValue)/(nextValue)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["f"][i] = currentValue
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["m"][i] = nextValue
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ratio"][i] = (currentValue)/(nextValue)


In [16]:
df

Unnamed: 0,Ano,ARmaior,Sexo,Armenor,NomeMunic,old,f,m,ratio
0,2010,ACRE,f,1200013,Acrelândia - AC,338.301,338.301,519.566,0.651123
1,2010,ACRE,m,1200013,Acrelândia - AC,519.566,,,
2,2010,ACRE,f,1200054,Assis Brasil - AC,169.161,169.161,207.609,0.814806
3,2010,ACRE,m,1200054,Assis Brasil - AC,207.609,,,
4,2010,ACRE,f,1200104,Brasiléia - AC,728.267,728.267,862.886,0.84399
...,...,...,...,...,...,...,...,...,...
122535,2020,TOCANTINS,m,1721307,Tupiratins - TO,194.828,,,
122536,2020,TOCANTINS,f,1722081,Wanderlândia - TO,678.635,678.635,738.011,0.919547
122537,2020,TOCANTINS,m,1722081,Wanderlândia - TO,738.011,,,
122538,2020,TOCANTINS,f,1722107,Xambioá - TO,653.902,653.902,601.947,1.08631


## Descartando linhas e coluns redundantes

Agora que cada linha do DataFrame já apresenta informações relativas a ambos os sexos, podemos descartar colunas e linhas redundantes para manter o DataFrame limpo.

In [17]:
df = df.drop(df[df.Sexo == "m"].index)

In [18]:
df = df.drop(columns = ["Sexo", "old"])

In [19]:
df

Unnamed: 0,Ano,ARmaior,Armenor,NomeMunic,f,m,ratio
0,2010,ACRE,1200013,Acrelândia - AC,338.301,519.566,0.651123
2,2010,ACRE,1200054,Assis Brasil - AC,169.161,207.609,0.814806
4,2010,ACRE,1200104,Brasiléia - AC,728.267,862.886,0.84399
6,2010,ACRE,1200138,Bujari - AC,234.997,388.902,0.60426
8,2010,ACRE,1200179,Capixaba - AC,250.178,361.568,0.691925
...,...,...,...,...,...,...,...
122530,2020,TOCANTINS,1721208,Tocantinópolis - TO,1423.12,1186.91,1.19901
122532,2020,TOCANTINS,1721257,Tupirama - TO,97.7162,109.617,0.89143
122534,2020,TOCANTINS,1721307,Tupiratins - TO,148.155,194.828,0.760437
122536,2020,TOCANTINS,1722081,Wanderlândia - TO,678.635,738.011,0.919547


Temos o DataFrame planejado no início!

In [2]:
# Gerando shape do brasil dividido em estados com a biblioteca GEOBR
shape_br = read_state(code_state='all')
# shape_br.plot()

shape_counties = read_municipality(code_muni='all', year=2018)

In [20]:
shape_counties

Unnamed: 0,code_muni,name_muni,code_state,abbrev_state,geometry
0,1100015.0,Alta Floresta D'oeste,11,RO,"MULTIPOLYGON (((-62.23224 -11.90804, -62.20670..."
1,1100023.0,Ariquemes,11,RO,"MULTIPOLYGON (((-63.57327 -9.78326, -63.57016 ..."
2,1100031.0,Cabixi,11,RO,"MULTIPOLYGON (((-60.71834 -13.39058, -60.70904..."
3,1100049.0,Cacoal,11,RO,"MULTIPOLYGON (((-61.27873 -11.50596, -61.28097..."
4,1100056.0,Cerejeiras,11,RO,"MULTIPOLYGON (((-61.41347 -13.23417, -61.42603..."
...,...,...,...,...,...
5567,5222005.0,Vianópolis,52,GO,"POLYGON ((-48.53842 -16.75003, -48.54051 -16.7..."
5568,5222054.0,Vicentinópolis,52,GO,"POLYGON ((-50.00189 -17.78179, -50.01420 -17.7..."
5569,5222203.0,Vila Boa,52,GO,"POLYGON ((-47.07742 -15.06330, -47.07851 -15.0..."
5570,5222302.0,Vila Propício,52,GO,"POLYGON ((-48.91463 -15.20939, -48.91532 -15.1..."


In [26]:
# Cria a coluna com o poligono representante de cada cidade e adiciona ela ao data_city
shape_counties = shape_counties[['code_muni', 'geometry']]
df = df.merge(shape_counties,how='inner', left_on='Armenor', right_on='code_muni')
df

Unnamed: 0,Ano,ARmaior,Armenor,NomeMunic,f,m,ratio,code_muni,geometry
0,2010,ACRE,1200013,Acrelândia - AC,338.301,519.566,0.651123,1200013.0,"MULTIPOLYGON (((-67.09001 -9.95789, -67.10019 ..."
1,2011,ACRE,1200013,Acrelândia - AC,362.209,546.267,0.663062,1200013.0,"MULTIPOLYGON (((-67.09001 -9.95789, -67.10019 ..."
2,2012,ACRE,1200013,Acrelândia - AC,388.085,575.244,0.674644,1200013.0,"MULTIPOLYGON (((-67.09001 -9.95789, -67.10019 ..."
3,2013,ACRE,1200013,Acrelândia - AC,416.073,606.522,0.685998,1200013.0,"MULTIPOLYGON (((-67.09001 -9.95789, -67.10019 ..."
4,2014,ACRE,1200013,Acrelândia - AC,446.185,640.099,0.697055,1200013.0,"MULTIPOLYGON (((-67.09001 -9.95789, -67.10019 ..."
...,...,...,...,...,...,...,...,...,...
61265,2016,TOCANTINS,1722107,Xambioá - TO,595.447,596.111,0.998886,1722107.0,"MULTIPOLYGON (((-48.38646 -6.38016, -48.38268 ..."
61266,2017,TOCANTINS,1722107,Xambioá - TO,610.009,597.621,1.02073,1722107.0,"MULTIPOLYGON (((-48.38646 -6.38016, -48.38268 ..."
61267,2018,TOCANTINS,1722107,Xambioá - TO,624.716,599.008,1.04292,1722107.0,"MULTIPOLYGON (((-48.38646 -6.38016, -48.38268 ..."
61268,2019,TOCANTINS,1722107,Xambioá - TO,639.414,600.431,1.06493,1722107.0,"MULTIPOLYGON (((-48.38646 -6.38016, -48.38268 ..."


Limpando linhas com a coluna "ratio" em branco

In [43]:
desired_type = type(df.ratio[1])

In [46]:
for i in range(len(df.ratio)):
    if type(df.ratio[i]) != desired_type:
        df = df.drop([i])

In [None]:
#Faz a plotagem dos mapas por semana epidemiológica 
for ano in range(2010,2011):
    ano_x = df[df.Ano == ano]
    
    #Fazendo o typecasting para GeoDataFrame
    ano_x = gpd.GeoDataFrame(ano_x)
    copy = gpd.GeoDataFrame(df)
    copy2 = shape_br
    copy2['coords'] = copy2['geometry'].apply(lambda x: x.representative_point().coords[:])
    copy2['coords'] = [coords[0] for coords in copy2['coords']]
    
    fig, ax = plt.subplots(figsize=(20, 15))
    ano_x.plot(column='ratio', cmap='bwr_r', vmin=0.0, legend=True,vmax=df['ratio'].max(), legend_kwds={'label': f"(Em %)", 'orientation': "vertical"}, ax=ax)
    
    ax.annotate(f'Ano {ano}', xy=(0.1,.225),xycoords='figure fraction',horizontalalignment='left', verticalalignment='top',fontsize=35)
    copy2.geometry.boundary.plot(color=None,edgecolor='#888888',linewidth = 0.3,ax=ax)
    ax.set_title(f"Razão entre idosas e idosos por Município (em %)", fontdict={'fontsize':25})
    ax.axis('off')
    
    for i, j in copy2.iterrows():
        plt.annotate(s=j[1], xy=j['coords'], horizontalalignment='center', verticalalignment='center', fontsize=20)
    plt.show()
    # Salva cada mapa como um chart
    chart = ax.get_figure()
    chart.savefig(f'Ano_{ano}_idosos.png', dpi=120)