### Abbildung 2 - Wortvektoren der Ländernamen differenziert nach UN-Regionalgruppen (1996-1999)

In [1]:
import pandas as pd
import umap
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

%matplotlib inline
matplotlib.style.use('seaborn-white')

In [3]:
country_groups_df = pd.read_csv("Country_Groups.csv", sep=";", engine="python", encoding="utf-8", names= ["Land","UN_AfricanGroup","UN_AsiaGroup","UN_EastEUGroup","UN_LatinAmerica","UN_WestEuropeGroup","ArabischeLiga","ASEAN","Benelux","BRICS","G4","G8","G8_5","G20","Next11","OPAC","Mercosur","P5","Visegrad"])
country_groups_df.set_index("Land", inplace=True)
country_groups_df.drop("Land",axis=0,inplace=True)

In [5]:
vector_df = pd.read_csv(r"Country_Vectors\year1996-1999_fixedModel_CountryVectors.csv")
vector_df.set_index("Country", inplace=True)

In [6]:
embedding = umap.UMAP(random_state=0,metric="cosine").fit_transform(vector_df)
vector_df["X_UMAP"] = list(embedding[:,0])
vector_df["Y_UMAP"] = list(embedding[:,1])

In [7]:
for column in country_groups_df.columns:
    groups_list = []
    for index, row in vector_df.iterrows():
        country_name = index
        country_name = country_name.replace("_"," ")
        if country_name in country_groups_df.index:
            group = country_groups_df[country_groups_df.index == country_name][column].values[0]
        else:
            print(country_name)
            group = None
        groups_list.append(group)
    vector_df[column] = groups_list

In [8]:
un_groups = {"UN_AfricanGroup":"Afrika","UN_AsiaGroup":"Asien","UN_EastEUGroup":"Osteuropa","UN_LatinAmerica":"Lateinamerika","UN_WestEuropeGroup":"Westeuropa"}
groups_list = []

for index, row in vector_df.iterrows():
    for group in un_groups:
        if row[group] == 1:
            groups_list.append(un_groups[group])
            
vector_df["UN-Regionalgruppen"] = groups_list

In [13]:
un_groups = ["UN_AfricanGroup","UN_AsiaGroup","UN_EastEUGroup","UN_LatinAmerica","UN_WestEuropeGroup"]

vector_df_africa = vector_df[vector_df.UN_AfricanGroup==1]
vector_df_asia = vector_df[vector_df.UN_AsiaGroup==1]
vector_df_easteu = vector_df[vector_df.UN_EastEUGroup==1]
vector_df_latinamerica = vector_df[vector_df.UN_LatinAmerica==1]
vector_df_westeu = vector_df[vector_df.UN_WestEuropeGroup==1]


fig = {
    'data': [
        {
  			'x': vector_df_westeu.X_UMAP, 
        	'y': vector_df_westeu.Y_UMAP, 
        	'text': vector_df_westeu.index, 
        	'mode': 'markers', 
        	'name': 'Westeuropa',
            "textposition":"middle right"},
  		{
  			'x': vector_df_africa.X_UMAP, 
        	'y': vector_df_africa.Y_UMAP, 
        	'text': vector_df_africa.index, 
        	'mode': 'markers', 
        	'name': 'Afrika',
            "textposition":"middle right"},
        {
  			'x': vector_df_easteu.X_UMAP, 
        	'y': vector_df_easteu.Y_UMAP, 
        	'text': vector_df_easteu.index, 
        	'mode': 'markers', 
        	'name': 'Osteuropa',
            "textposition":"middle right"},
      	{
  			'x': vector_df_asia.X_UMAP, 
        	'y': vector_df_asia.Y_UMAP, 
        	'text': vector_df_asia.index, 
        	'mode': 'markers', 
        	'name': 'Asien',
            "textposition":"middle right"},


      	{
  			'x': vector_df_latinamerica.X_UMAP, 
        	'y': vector_df_latinamerica.Y_UMAP, 
        	'text': vector_df_latinamerica.index, 
        	'mode': 'markers', 
        	'name': 'Lateinamerika',
            "textposition":"middle right"},

    ],
    'layout': {
        'xaxis': {'title': 'X_UMAP'},
        'yaxis': {'title': "Y_UMAP"},
        "plot_bgcolor": "rgb(255, 255, 255)",
        #"width": 2000,
        #"height": 1300, #or more
        "autosize": True
    }
}

plotly.offline.iplot(fig, filename='basic-scatter')

### Abbildung 9 - Wortvektoren der Ländernamen differenziert nach UN-Regionalgruppen (2013-2016)

In [14]:
import pandas as pd
import umap
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

%matplotlib inline
matplotlib.style.use('seaborn-white')

In [15]:
country_groups_df = pd.read_csv("Country_Groups.csv", sep=";", engine="python", encoding="utf-8", names= ["Land","UN_AfricanGroup","UN_AsiaGroup","UN_EastEUGroup","UN_LatinAmerica","UN_WestEuropeGroup","ArabischeLiga","ASEAN","Benelux","BRICS","G4","G8","G8_5","G20","Next11","OPAC","Mercosur","P5","Visegrad"])
country_groups_df.set_index("Land", inplace=True)
country_groups_df.drop("Land",axis=0,inplace=True)

In [16]:
vector_df = pd.read_csv(r"Country_Vectors\year2013-2016_fixedModel_CountryVectors.csv")
vector_df.set_index("Country", inplace=True)

In [17]:
embedding = umap.UMAP(random_state=0,metric="cosine").fit_transform(vector_df)
vector_df["X_UMAP"] = list(embedding[:,0])
vector_df["Y_UMAP"] = list(embedding[:,1])

In [18]:
for column in country_groups_df.columns:
    groups_list = []
    for index, row in vector_df.iterrows():
        country_name = index
        country_name = country_name.replace("_"," ")
        if country_name in country_groups_df.index:
            group = country_groups_df[country_groups_df.index == country_name][column].values[0]
        else:
            print(country_name)
            group = None
        groups_list.append(group)
    vector_df[column] = groups_list

In [19]:
un_groups = {"UN_AfricanGroup":"Afrika","UN_AsiaGroup":"Asien","UN_EastEUGroup":"Osteuropa","UN_LatinAmerica":"Lateinamerika","UN_WestEuropeGroup":"Westeuropa"}
groups_list = []

for index, row in vector_df.iterrows():
    for group in un_groups:
        if row[group] == 1:
            groups_list.append(un_groups[group])
            
vector_df["UN-Regionalgruppen"] = groups_list

In [20]:
un_groups = ["UN_AfricanGroup","UN_AsiaGroup","UN_EastEUGroup","UN_LatinAmerica","UN_WestEuropeGroup"]

vector_df_africa = vector_df[vector_df.UN_AfricanGroup==1]
vector_df_asia = vector_df[vector_df.UN_AsiaGroup==1]
vector_df_easteu = vector_df[vector_df.UN_EastEUGroup==1]
vector_df_latinamerica = vector_df[vector_df.UN_LatinAmerica==1]
vector_df_westeu = vector_df[vector_df.UN_WestEuropeGroup==1]


fig = {
    'data': [
        {
  			'x': vector_df_westeu.X_UMAP, 
        	'y': vector_df_westeu.Y_UMAP, 
        	'text': vector_df_westeu.index, 
        	'mode': 'markers', 
        	'name': 'Westeuropa',
            "textposition":"middle right"},
  		{
  			'x': vector_df_africa.X_UMAP, 
        	'y': vector_df_africa.Y_UMAP, 
        	'text': vector_df_africa.index, 
        	'mode': 'markers', 
        	'name': 'Afrika',
            "textposition":"middle right"},
        {
  			'x': vector_df_easteu.X_UMAP, 
        	'y': vector_df_easteu.Y_UMAP, 
        	'text': vector_df_easteu.index, 
        	'mode': 'markers', 
        	'name': 'Osteuropa',
            "textposition":"middle right"},
      	{
  			'x': vector_df_asia.X_UMAP, 
        	'y': vector_df_asia.Y_UMAP, 
        	'text': vector_df_asia.index, 
        	'mode': 'markers', 
        	'name': 'Asien',
            "textposition":"middle right"},


      	{
  			'x': vector_df_latinamerica.X_UMAP, 
        	'y': vector_df_latinamerica.Y_UMAP, 
        	'text': vector_df_latinamerica.index, 
        	'mode': 'markers', 
        	'name': 'Lateinamerika',
            "textposition":"middle right"},

    ],
    'layout': {
        'xaxis': {'title': 'X_UMAP'},
        'yaxis': {'title': "Y_UMAP"},
        "plot_bgcolor": "rgb(255, 255, 255)",
        #"width": 2000,
        #"height": 1300, #or more
        "autosize": True
    }
}

plotly.offline.iplot(fig, filename='basic-scatter')