In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [27]:
df = pd.read_excel("./symm_groups.xlsx",index_col=0)
df = df.iloc[:,[0,3,1,2]]
df.columns

Unnamed: 0,Moslem Arabs,Seljuks,Eastern Roman Empire,Armenia
p1,0.0,0.27,14.04,5.6
p1m1,0.0,0.0,9.65,0.0
p1g1,0.0,0.0,0.88,0.0
c1m1,0.0,0.55,4.39,2.4
p211,0.0,1.1,2.63,4.0
p2mm,5.74,8.79,9.65,1.6
p2mg,0.36,0.82,5.26,0.0
p2gg,0.0,0.55,0.88,6.4
c2mm,7.66,8.52,3.51,1.6
p3,0.36,0.82,0.0,0.8


# Histograms

In [20]:
%matplotlib notebook
fig, ax = plt.subplots(4,1,figsize=(6,12))
cols = df.columns.to_list() 
for i in [0,1,2,3]:
    ax[i].bar(range(0,17),df[cols[i]],color='k')
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['top'].set_visible(False)
    ax[i].set_xticklabels(df.index.to_list(),rotation=45,fontsize=8)
    ax[i].set_xticks(np.arange(df.shape[0]))
    for p in ax[i].patches:
        if np.ceil(p.get_height()) != 0:
            ax[i].annotate(np.round(p.get_height(),decimals=2),(p.get_x()+p.get_width()/2., p.get_height()),
                    ha='center',
                    va='center',
                    xytext=(0, 10),
                    textcoords='offset points',fontsize=8)
    ax[i].set_title(cols[i])
plt.subplots_adjust(hspace=.5)

<IPython.core.display.Javascript object>

# Correlation

In [96]:
# calculate the correlation matrix
corr = df.corr()
# plot the heatmap
fig = plt.figure()
ax = fig.add_subplot(111, aspect=1)
sns.heatmap(corr, 
        xticklabels=corr.columns.str.replace(' ','\n'),
        yticklabels=corr.columns.str.replace(' ','\n'),
        cmap='cool',
        ax=ax,
        annot=corr.values)
ax.set_title('Similarity')
plt.tight_layout()
#biraz deli bir renk oldu ama simdilik kalsin.

<IPython.core.display.Javascript object>

In [None]:
# Ermeni herkesten bambaska 
# Selcuk Arap cok benziyorlar
# Bizans da Araplara epey benzemis, Ermeni'den eser yok. 

# Geographical Distance

In [133]:
#bunlari medeniyetlerin baskenti olarak aldim.
capitals = ['Mecca','Konya','Istanbul','Yerevan']
# capitals = ['Paris','Madrid','Rome','Berlin']

In [134]:
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

geolocator = Nominatim(user_agent="bla")
Distance = np.zeros((len(capitals),len(capitals)))
for i,c1 in enumerate(capitals):
    for j,c2 in enumerate(capitals):
        l1 = geolocator.geocode(c1)
        l2 = geolocator.geocode(c2)
        d = geodesic(l1.point, l2.point).km
        Distance[i,j] = d

In [135]:
Distance

array([[   0.        , 1955.49920069, 2401.19531386, 2126.4947987 ],
       [1955.49920069,    0.        ,  461.57571281, 1071.64166835],
       [2401.19531386,  461.57571281,    0.        , 1317.5762036 ],
       [2126.4947987 , 1071.64166835, 1317.5762036 ,    0.        ]])

In [136]:
corr = Distance
fig = plt.figure()
ax = fig.add_subplot(111, aspect=1)
sns.heatmap(-corr, 
        xticklabels=capitals,
        yticklabels=capitals,
        cmap='cool',
        ax=ax,
        annot=Distance,)
ax.set_title('Geographical Distances')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Geographical Distances')

# MDS Analysis

## MDS analysis takes as input a distance matrix (for example a correlation matrix) and aims to recovers the coordinates that could have potentially generated these dissimilarities.


## The first MDS analysis is done using the geographical distance matrix (as a sanity check). If the algorithm works correctly, MDS should recover the geographical positions of these cities on 2-d, like in a map.

In [144]:
# MDS on geographical distances
from sklearn.datasets import load_digits
from sklearn import manifold

mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=1, dissimilarity="precomputed", n_jobs=1)

pos = mds.fit(Distance).embedding_

fig = plt.figure()
ax = fig.add_subplot(111, aspect=1)
ax.plot(-pos[:,0],pos[:,1],'o')

for i, txt in enumerate(capitals):
    ax.annotate(txt, (-pos[i,0], pos[i,1]))
    
# I would say it works. Let's do the MDS with symmetri distances

<IPython.core.display.Javascript object>

In [145]:
# MDS on symmetry similarities.
corr = df.corr()
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=1, dissimilarity="precomputed", n_jobs=1)
pos = mds.fit(1-corr).embedding_
fig = plt.figure()
ax  = fig.add_subplot(111, aspect=1)
ax.plot(-pos[:,0],pos[:,1],'o')

for i, txt in enumerate(df.columns):
    ax.annotate(txt, (-pos[i,0], pos[i,1]))
    

<IPython.core.display.Javascript object>

## simdi yukarida gordugumuz noktalari bize MDS analizi veriyor. 

## Bizim yukaridaki correlation matrisini input olarak aliyor ve output olarak da 4 tane yeni koordinat veriyor. Bu koordinatları da öyle bir seciyor ki, noktalarin arasindaki uzakliklar correlation matrisindeki uzakliklara orantili oluyor. Yani onun icin Armenia nokta taaa en uzaklarda kaliyor. Araplarla, Selcuklarda birbirlerine iki tane yakin nokta olarak duruyorlar, cunku birbirlerine cok benziyorlar.

## Yani, MDS oyle yeni birsey vermiyor bize, sadece korrelasyon matrisinden zar zor okuyabilecegimiz bir bilgiyi, bize gorsel olarak hemen anlamamizi sagliyor, bilgileri yeni bir representasyon haline sokarak. Yok ben MDS'e gerek kalmadan direkt correlasyon matrisinden herseyi cok kolayca gorebiliyorum dersen, MDS'i cikartiriz makaleden. Yok kalsin dersen, ben yazmaya baslarim o bahsettigimiz iki paragrafi.

## Asil bu MDS metodunun faydasini 4 tane medeniyetle degil de, daha cok medeniyetimiz olsaydi daha iyi anlardik. Cunku o zaman, mesela 12 x 12 'lik bir korelasyon matrisine bakmak iskence haline donusuyor (en azindan). MDS sayesinde kim kimin yaninda, kim uzakta, kimler kumelenmis bunlar hemen goruluyor.

## Onun icin bence istersen, biz 4 tane medeniyetle basliyalim paper'a, ama ilerleyen chapterlarda daha cok medeniyet ekliyebiliriz. Yani bu 4 medeniyeting sub-grouplarina bakabiliriz.