**Total Population**

Taken from https://memoria.ibge.gov.br/sinteses-historicas/historicos-dos-censos/dados-historicos-dos-censos-demograficos.html


In [1]:
#Uploading packages we'll need later

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
pd.options.display.max_columns = None
pd.options.display.max_rows = None 


In [2]:
#Manually input population data

years = [1900, 1920, 1940, 1950, 1960, 1970, 1980, 1991, 2000, 2010, 2017]
totalpop = [17438434, 30635605, 41165289, 51944397, 70199071, 93139037, 119002706, 146825475, 169799170, 192755799, 208164397]

yeararray = np.asarray(years)
poparray = np.asarray(totalpop)

yeararray.shape, poparray.shape

#Put the arrays together into one dataframe
raw_pop_df = pd.DataFrame({'Year':yeararray, 'Population':poparray})

raw_pop_df = raw_pop_df[['Year','Population']]
raw_pop_df.head()

Unnamed: 0,Year,Population
0,1900,17438434
1,1920,30635605
2,1940,41165289
3,1950,51944397
4,1960,70199071


In [3]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

#Clean dataframe and impute missing values
raw_years_df = pd.DataFrame(list(range(1900,2018)), columns=['Year'])
raw_years_df.head()

pop_df = pd.merge(raw_years_df, raw_pop_df, on='Year', how='outer')
pop_df = pop_df.interpolate().round(decimals=0)
pop_df['Population'] = pop_df['Population'].astype(int)


#pop_df.to_csv('Processed Data/Brazilian Pop 1900 to 2017.csv')
pop_df

#Read in raw migration data
migration_data_raw = pd.read_csv('Raw Data/Migrant Stock/Cleaned Migrant.csv', encoding='latin-1')
migration_data = migration_data_raw.loc[migration_data_raw['Country Name']=='Brazil'].drop(['Indicator Name','Country Code','Indicator Code'], axis=1)
migration_data.head()

#Melt the raw migration data to be in the correct format - year column and stock column
migrant_stock = pd.melt(migration_data, id_vars=["Country Name"], var_name="Year", value_name="Migrant Stock").drop(['Country Name'], axis=1)
migrant_stock['Year'] = migrant_stock['Year'].astype(int)
migrant_stock

total_population = pd.merge(pop_df, migrant_stock, on='Year', how='left')
total_population['Migrant Stock'] = total_population['Migrant Stock'].interpolate()
total_population['Migrant Stock_toscale'] = total_population['Migrant Stock']/np.max(total_population['Population'])*10
total_population['Population_toscale'] = total_population['Population']/np.max(total_population['Population'])*10
total_population.fillna(0, inplace=True)
total_population['Year'] = total_population['Year'].astype(int)
total_population

pop_data = str(list(total_population['Population_toscale'].values))

for ch in ['[',',',']']:
    if ch in pop_data:
        pop_data=pop_data.replace(ch,'')
pop_data

#Save population data to a text file
textfile = open('Processed Data/Brazilian Pop 1900 to 2017.txt', 'w')
textfile.write(pop_data)
textfile.close()

migrant_stock = str(list(total_population['Migrant Stock_toscale'].values))

for ch in ['[',',',']']:
    if ch in migrant_stock:
        migrant_stock=migrant_stock.replace(ch,'')
migrant_stock

#Save migrant stock data to a text file
textfile = open('Processed Data/Migrant Stock in Brazil 1900 to 2017.txt', 'w')
textfile.write(migrant_stock)
textfile.close()

In [8]:
total_population

Unnamed: 0,Year,Population,Migrant Stock,Migrant Stock_toscale,Population_toscale
0,1900,17438434,0.0,0.0,0.838
1,1901,18098293,0.0,0.0,0.869
2,1902,18758151,0.0,0.0,0.901
3,1903,19418010,0.0,0.0,0.933
4,1904,20077868,0.0,0.0,0.965
5,1905,20737727,0.0,0.0,0.996
6,1906,21397585,0.0,0.0,1.028
7,1907,22057444,0.0,0.0,1.06
8,1908,22717302,0.0,0.0,1.091
9,1909,23377161,0.0,0.0,1.123


In [6]:
migrant_stock

'0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.067116136098912246 0.066326404509989292 0.065536672921066325 0.064746941332143371 0.063957209743220389 0.063167478154297435 0.062369186023679149 0.061570893893060884 0.060772601762442591 0.059974309631824319 0.059176017501206032 0.05858705031100972 0.057998083120813407 0.057409115930617087 0.056820148740420774 0.056231181550224461 0.055676821622863776 0.055122461695503097 0.054568101768142425 0.05401374184078174 0.053459381913421047 0.051842659722450044 0.050225937531479026 0.048609215340508015 0.046992493149537004 0.045375770958565986 0.043972601136014623 0.042569431313463274 0.041166261490911918 0.039763091668360562 0.038359921845809206 0.037812662075926456 0.037265402306043721 0.036718142536160972 0.03617088276627823 0.035623622996395488 0.0350

In [7]:
pop_data

'0.83772413781209676 0.86942307430218246 0.90112196275331358 0.9328208992433995 0.96451978769453062 0.99621872418461643 1.0279176126357477 1.0596165491258334 1.0913154375769647 1.1230143740670504 1.1547133105571363 1.1864121990082674 1.2181111354983534 1.2498100239494845 1.2815089604395702 1.3132078488907015 1.3449067853807872 1.3766056738319186 1.4083046103220043 1.4400034987731356 1.4717024352632213 1.4969941761943086 1.5222859171253957 1.5475777060954377 1.572869447026525 1.5981611879576121 1.6234529288886996 1.6487446698197865 1.6740364587898284 1.6993281997209158 1.7246199406520031 1.7499116815830904 1.7752034225141775 1.8004952114842192 1.8257869524153068 1.8510786933463939 1.8763704342774812 1.9016621752085685 1.9269539641786102 1.9522457051096975 1.9775374460407849 2.0293191635455319 2.0811008810502787 2.1328825505160713 2.1846642680208181 2.2364459855255654 2.2882277030303122 2.340009420535059 2.3917910900008517 2.4435728075055985 2.4953545250103457 2.5830480511996488 2.670741

In [None]:
#NOPE jk

#Just found some inflation data from the IPEA-DATA website: http://www.ipeadata.gov.br/Default.aspx on the Inflacao - IGP-DI yearly table
#According to the web, the IGP-DI index is cool: https://www.investing.com/economic-calendar/brazilian-igp-di-inflation-index-1560

#We're using it manually. 

In [10]:
inflation_raw = pd.read_csv('Raw Data/IPEA-Data/Inflation Rate.csv', dtype=str)

In [11]:
inflation_raw['Year'] = inflation_raw['Year'].astype(int)
inflation_raw['Inflation Rate'] = inflation_raw['Inflation Rate'].astype(float)
#inflation_raw.dtypes
#inflation_raw['Inflation Rate'] = inflation_data['Inflation Rate'].str.replace(",","")


inflation_data = pd.merge(raw_years_df, inflation_raw, on='Year', how='outer')
inflation_data['Inflation Rate'] = inflation_data['Inflation Rate'].fillna(0)
inflation_data['Inflation Rate_toscale'] = (inflation_data['Inflation Rate']/np.max(inflation_data['Inflation Rate']))*10
inflation_data['Inflation Rate_toscale'] = inflation_data['Inflation Rate_toscale'].round(decimals=2).astype(float)
inflation_data

Unnamed: 0,Year,Inflation Rate,Inflation Rate_toscale
0,1900,0.0,0.0
1,1901,0.0,0.0
2,1902,0.0,0.0
3,1903,0.0,0.0
4,1904,0.0,0.0
5,1905,0.0,0.0
6,1906,0.0,0.0
7,1907,0.0,0.0
8,1908,0.0,0.0
9,1909,0.0,0.0


In [12]:
inflation_numbers = str(list(inflation_data['Inflation Rate_toscale'].values))

for ch in ["[",']',',', "'"]:
    if ch in inflation_numbers:
        inflation_numbers = inflation_numbers.replace(ch,'')

inflation_numbers

textfile = open('Processed Data/Inflation in Brazil 1900 to 2017.txt', 'w')
textfile.write(inflation_numbers)
textfile.close()

In [23]:
unrest_data = pd.read_csv('Raw Data/Scale for Unrest.csv')
#, dtype=str)

In [24]:
unrest_data = unrest_data[:-1]
unrest_data

Unnamed: 0,Year,Event,Scale
0,1900,,2.0
1,1901,,2.0
2,1902,,2.0
3,1903,,2.0
4,1904,,2.0
5,1905,,2.0
6,1906,,2.0
7,1907,,2.0
8,1908,,2.0
9,1909,,2.0


In [26]:
unrest_scale_text = str(list(unrest_data['Scale'].values))
#unrest_scale_text

for ch in ["[",']',',', "'"]:
    if ch in unrest_scale_text:
        unrest_scale_text = unrest_scale_text.replace(ch,'')



textfile = open('Processed Data/Unrest Scale.txt', 'w')
textfile.write(unrest_scale_text)
textfile.close()

unrest_scale_text

'2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 3.0 2.0 3.0 3.0 2.0 4.0 3.0 3.0 4.0 3.0 2.0 3.0 3.0 3.0 4.0 3.0 6.5 8.0 8.0 8.0 6.5 6.0 7.0 8.0 7.0 9.0 8.0 6.0 5.0 4.0 4.0 5.0 6.0 8.0 7.0 6.0 5.0 4.0 3.0 4.0 4.0 5.0 6.0 5.0 4.0 3.0 3.0 4.0 7.0 6.0 6.0 8.0 9.0 8.0 7.0 4.0 3.0 3.0 4.0 3.0 4.0 5.0 5.0 4.0 3.0 4.0 3.0 3.0 4.0 5.0 4.0 4.0 5.0 7.0 7.0 6.0 7.0 8.0 9.0 8.0 6.0 4.0 4.0 5.0 4.0 3.0 4.0 4.0 7.0 4.0 2.0 3.0 6.0 4.0 5.0 3.0 4.0 3.0 3.0 6.0 6.0 4.0 3.0 5.0 5.0 4.0 3.0 3.0 2.0'