**Total Population**

Taken from https://memoria.ibge.gov.br/sinteses-historicas/historicos-dos-censos/dados-historicos-dos-censos-demograficos.html


In [1]:
#Uploading packages we'll need later

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
pd.options.display.max_columns = None
pd.options.display.max_rows = None 


In [2]:
#Manually input population data

years = [1900, 1920, 1940, 1950, 1960, 1970, 1980, 1991, 2000, 2010, 2017]
totalpop = [17438434, 30635605, 41165289, 51944397, 70199071, 93139037, 119002706, 146825475, 169799170, 192755799, 208164397]

yeararray = np.asarray(years)
poparray = np.asarray(totalpop)

yeararray.shape, poparray.shape

#Put the arrays together into one dataframe
raw_pop_df = pd.DataFrame({'Year':yeararray, 'Population':poparray})

raw_pop_df = raw_pop_df[['Year','Population']]
raw_pop_df.head()

Unnamed: 0,Year,Population
0,1900,17438434
1,1920,30635605
2,1940,41165289
3,1950,51944397
4,1960,70199071


In [3]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

#Clean dataframe and impute missing values
raw_years_df = pd.DataFrame(list(range(1900,2018)), columns=['Year'])
raw_years_df.head()

pop_df = pd.merge(raw_years_df, raw_pop_df, on='Year', how='outer')
pop_df = pop_df.interpolate().round(decimals=0)
pop_df['Population'] = pop_df['Population'].astype(int)
pop_df.head()

pop_df.to_csv('Processed Data/Brazilian Pop 1900 to 2017.csv')
pop_df.head()

Unnamed: 0,Year,Population
0,1900,17438434
1,1901,18098293
2,1902,18758151
3,1903,19418010
4,1904,20077868


In [4]:
raw_years_df.shape, raw_pop_df.shape, pop_df.shape

((118, 1), (11, 2), (118, 2))

In [5]:
pop_data = str(list(pop_df['Population'].values))
#pop_data.replace(('[', '').replace
#string="abc&def#ghi"
for ch in ['[',',',']']:
    if ch in pop_data:
        pop_data=pop_data.replace(ch,'')
pop_data

#Save population data to a text file
textfile = open('Processed Data/Brazilian Pop 1900 to 2017.txt', 'w')
textfile.write(pop_data)
textfile.close()

In [6]:
#Import migrant stock

#Read in raw migration data
migration_data_raw = pd.read_csv('Raw Data/Migrant Stock/Cleaned Migrant.csv', encoding='latin-1')
migration_data = migration_data_raw.loc[migration_data_raw['Country Name']=='Brazil'].drop(['Indicator Name','Country Code','Indicator Code'], axis=1)
migration_data.head()

#Melt the raw migration data to be in the correct format - year column and stock column
migrant_stock = pd.melt(migration_data, id_vars=["Country Name"], var_name="Year", value_name="Migrant Stock").drop(['Country Name'], axis=1)
migrant_stock['Year'] = migrant_stock['Year'].astype(int)

#Merge to raw years data to be complete
brazil_migrant_stock = pd.merge(raw_years_df, migrant_stock, on='Year', how='left')

#Impute missing data and clean up the dataset.
brazil_migrant_stock = brazil_migrant_stock.interpolate()
brazil_migrant_stock = brazil_migrant_stock.fillna(0).round(decimals=0).astype(int)

brazil_migrant_stock.to_csv('Processed Data/Migrant Stock in Brazil 1900 to 2017.csv')

brazil_migrant_stock.head()

Unnamed: 0,Year,Migrant Stock
0,1900,0
1,1901,0
2,1902,0
3,1903,0
4,1904,0


In [7]:
#Make a text file of migrant data
brazil_migrant_stock = str(list(brazil_migrant_stock['Migrant Stock'].values))

for ch in ['[',',',']']:
    if ch in brazil_migrant_stock:
        brazil_migrant_stock=brazil_migrant_stock.replace(ch,'')
brazil_migrant_stock

#Save migrant stock data to a text file
textfile = open('Processed Data/Migrant Stock in Brazil 1900 to 2017.txt', 'w')
textfile.write(brazil_migrant_stock)
textfile.close()

In [8]:
#Read in raw economic data
#econonics_raw = pd.read_csv('Raw Data/World Bank Indicators/World Bank Indicators.csv', dtype=str)
#economics_raw

In [9]:
#econonics_raw

In [10]:
#NOPE jk

#Just found some inflation data from the IPEA-DATA website: http://www.ipeadata.gov.br/Default.aspx on the Inflacao - IGP-DI yearly table
#According to the web, the IGP-DI index is cool: https://www.investing.com/economic-calendar/brazilian-igp-di-inflation-index-1560

#We're using it manually. 

In [11]:
inflation_raw = pd.read_csv('Raw Data/IPEA-Data/Inflation Rate.csv', dtype=str)

In [12]:
inflation_raw['Year'] = inflation_raw['Year'].astype(int)
inflation_raw.dtypes
#inflation_raw['Inflation Rate'] = inflation_data['Inflation Rate'].str.replace(",","")


inflation_data = pd.merge(raw_years_df, inflation_raw, on='Year', how='outer')
inflation_data['Inflation Rate'] = inflation_data['Inflation Rate'].fillna(0)
inflation_data

Unnamed: 0,Year,Inflation Rate
0,1900,0.0
1,1901,0.0
2,1902,0.0
3,1903,0.0
4,1904,0.0
5,1905,0.0
6,1906,0.0
7,1907,0.0
8,1908,0.0
9,1909,0.0


In [14]:
inflation_numbers = str(list(inflation_data['Inflation Rate'].values))

for ch in ["[",']',',', "'"]:
    if ch in inflation_numbers:
        inflation_numbers = inflation_numbers.replace(ch,'')

inflation_numbers

textfile = open('Processed Data/Inflation in Brazil 1900 to 2017.txt', 'w')
textfile.write(inflation_numbers)
textfile.close()