# Line Diagrams from excel
This script summarizes the regions and the countries in excel and then create line diagrams for various groups

In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import openpyxl
import matplotlib.pyplot as plt
import random 
%matplotlib inline

In [2]:
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from variables import base_dir, ancillary_POPdata_folder_path, ancillary_data_folder_path, city, country_Orig
from mainFunctions import createFolder, dfTOxls, generate_colors, unique, lineDiagram
current_path = os.path.dirname(os.path.abspath("__file__"))
image_path = current_path + "/IMAGES"
years_list=[1992,1994,1996,1998,2000,2002,2004,2006,2008,2010,2012,2014,2016,2018] #

### Create dataframe for sum of all countries and save to EXCEL 

In [None]:
#Remove the standard columns from the unique Attributes and write file
select = [ 'Oceania', 'EuropeNotEU', 'EuropeEUnoLocal',  'Central_Asia', 'Eastern_Asia', 'Southern-Eastern_Asia', 'Southern_Asia', 
'Western_Asia', 'Northern_America', 'Latin_America_and_the_Caribbean', 'Northern_Africa', 'Sub-Saharan_Africa', 'Others', 'Colonies']  

In [None]:
frame = pd.DataFrame(columns=select)
year = ['1992','1994','1996','1998','2000','2002','2004','2006','2008','2010','2012','2014','2016','2018']
frame['Year'] = year

In [None]:
nframe= frame.set_index('Year')
for year in years_list:
    pathI = ancillary_POPdata_folder_path + "/{0}/{0}.xlsx".format(year)
    dfI = pd.read_excel(pathI, header=0 )
    for x in select: 
        if x in dfI.columns:
            nframe.at['{}'.format(year), x ] = dfI['{}'.format(x)].sum()
        else:
            nframe.at['{}'.format(year), x ] = 0 

dfTOxls(ancillary_POPdata_folder_path + "/EXCEL/", '00SumsByRegion', nframe)

In [None]:
df = pd.read_excel(ancillary_POPdata_folder_path + "/EXCEL/00SumsByRegion.xlsx", header=0)
print(df.head())
ndf = df.set_index('Year')
print(ndf.head())

In [None]:
L_select = [ 'EuropeEUnoLocal',  'Eastern_Asia', 'Southern-Eastern_Asia', 'Southern_Asia', 
'Western_Asia', 'Northern_America', 'Latin_America_and_the_Caribbean', 'Northern_Africa', 'Sub-Saharan_Africa', 'Others', 'Colonies']

In [None]:
lframe = ndf.loc[:, L_select]
lframe

In [None]:
ax = plt.gca()
# Shink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# generate values and print them 
hex_values = generate_colors(len(L_select)) 
lines = [] 
for i, country in enumerate(lframe):
    #lines = nframe[i].plot(kind='line', x='Year', y='Population',ax=ax, ylabel='Population', title ='Population Change of Migrant Groups (1992-2018)', color='green')  
    axes = nframe[country].plot.line(color={ "{}".format(country): "{}".format(hex_values[i])})
plt.legend(bbox_to_anchor=(1.3,0.5), loc='center', borderaxespad=0., fontsize=7)
plt.savefig(image_path + '/PopChangeByGeogRegion.png', dpi=300)
plt.show()

### Get the 15 larger migrant populations

In [3]:
country_df = pd.read_excel(ancillary_POPdata_folder_path + "/EXCEL/00SumClean.xlsx", header=0)
country_ndf = country_df.iloc[:, 2:].set_index('Year')
#print(country_ndf.head())

In [None]:
gdf = country_ndf.iloc[-1, 23:].nlargest(16)
gdf.iloc[1:]
selectList= gdf.index.to_list()
top15 = country_ndf.loc[: ,selectList]
top15.drop('L10_NLD', inplace=True, axis=1)

In [None]:
top15.head(3)

In [None]:
lineDiagram(top15, "15LargestCountries", 15, "Population Change for 15 Largest Migrant Groups in 2018", image_path)

### Get the 15 largest groups with population <20.000

In [6]:
gdf = country_ndf.iloc[-1, 23:].nlargest(19)
gdf.iloc[1:]
selectList= gdf.index.to_list()
top15 = country_ndf.loc[: ,selectList]
top15.drop('L10_NLD', inplace=True, axis=1)
top15.drop('L10_SUR', inplace=True, axis=1)
top15.drop('L10_TUR', inplace=True, axis=1)
top15.drop('L10_MAR', inplace=True, axis=1)
lineDiagram(top15, "15RestLargestCountries", 15, "Population Change for 15 Largest Migrant Groups \n with Population < 20.000 in 2018", image_path)

      L10_GBR  L40_DEU  L40_OTH  L10_USA  L10_ITA  L10_GHA  L10_ANT  L10_FRA  \
Year                                                                           
1992     6452     8286     1835     3080     2135     3188     8489     1957   
1994     6349     8072     2008     3350     2154     3779     8108     2100   
1996     5274     7143     2313     3132     2009     3953     8095     1999   

      L10_IND  L40_CHN  L40_ESP  L40_YUG  L10_BRA  L10_BGR  L10_POL  
Year                                                                 
1992     1582     1586     2239     2746      801       93      996  
1994     1647     1573     2250     3307      866      103      940  
1996     1668     1524     2149     3742      891      108      861  


In [9]:
## Get Native and migrant population
country_ndf['totalMig'] = country_ndf['L1_TOTALPOP'] - country_ndf['L10_NLD']
selectList = ['L10_NLD','totalMig' ]
nativeMig = country_ndf.loc[: ,selectList]

lineDiagram(nativeMig, "NativeandForeignPopulationChange", 2, "Native and Foreign Population Change, Amsterdam", image_path)

      L10_NLD  totalMig
Year                   
1992   523766    196157
1994   520288    202062
1996   516745    198318
