In [218]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.decomposition import PCA
from IPython.display import Markdown

# Load data

In [219]:
excel_file = "data_formatted.xlsx"
data = dict()
years = ["2009", "2014", "2019"]
for y in years:
    data[y] = pd.read_excel(excel_file, sheet_name="data"+y)

In [233]:
def scale_data(df):
    scaler = preprocessing.StandardScaler().fit(df.iloc[:,2:])
    scaler_stats = pd.DataFrame({"Mean values": scaler.mean_, "Std values": scaler.var_**0.5}, index=df.columns[2:])
    display(scaler_stats.transpose())
    df_norm = df.copy()
    df_norm.iloc[:,2:] = scaler.transform(df.iloc[:,2:])
    display(Markdown("### Scaled data"))
    display(df_norm)
    display(Markdown("### Statistics of scaled data"))
    display(df_norm.describe())
    return df_norm
    

In [248]:
def pca_data(df):
    pca = PCA()
    pca.fit(df.iloc[:,2:])
    pca_statistics = pd.DataFrame({"Eigenvalues": pca.explained_variance_, "Variability": pca.explained_variance_ratio_, "Cumulative": np.cumsum(pca.explained_variance_ratio_)}, index=["F{}".format(i+1) for i in range(pca.n_components_)])
    display(Markdown("### PCA - statistics"))
    display(pca_statistics.transpose())
    display(Markdown("### PCA - eigenvectors"))
    df_eigenvectors = pd.DataFrame(pca.components_, index = ["F{}".format(i+1) for i in range(pca.n_components_)], columns=df.columns[2:])
    display(df_eigenvectors.transpose())
    display(Markdown("### PCA - factor loadings"))
    factor_loadings = pd.DataFrame(pca.components_.T * np.sqrt(pca.explained_variance_), columns=["F{}".format(i+1) for i in range(pca.n_components_)], index=df.columns[2:])
    display(factor_loadings)

In [251]:
for y in years:
    display(Markdown("# Year: "+y))
    display(Markdown("### Initial data"))
    display(data[y])
    df_norm = scale_data(data[y])
    # display(Markdown("### PCA"))
    pca_data(df_norm)
    print("\n\n\n\n\n\n\n\n")

# Year: 2009

### Initial data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.103396,2.822793,7.734921,285.978307,0.0534,1.485577
1,AT12,Niederösterreich,0.041436,1.317665,3.512799,196.483975,0.0534,0.909626
2,AT13,Wien,0.033782,2.609577,143.330808,15314.588384,0.0534,1.592807
3,AT21,Kärnten,0.275524,4.282064,16.456176,315.481371,0.0534,1.872782
4,AT22,Steiermark,0.078238,2.255728,5.800320,241.368839,0.0534,1.295643
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,0.055534,1.866372,6.658430,343.673281,0.0332,1.024037
223,SK01,Bratislavský kraj,0.036121,1.283314,10.633845,672.189015,0.0260,1.136978
224,SK02,Západné Slovensko,0.022296,0.391466,2.772283,173.013407,0.0260,0.961510
225,SK03,Stredné Slovensko,0.037820,0.734019,3.154597,144.634021,0.0260,0.932517


Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
Mean values,0.080835,1.79921,14.551054,828.005004,0.033941,1.076086
Std values,0.103245,1.735795,25.429264,2298.623345,0.013912,0.628402


### Scaled data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.218519,0.589691,-0.268043,-0.235805,1.398707,0.651639
1,AT12,Niederösterreich,-0.381605,-0.277421,-0.434077,-0.274739,1.398707,-0.264895
2,AT13,Wien,-0.455734,0.466857,5.064234,6.302287,1.398707,0.822277
3,AT21,Kärnten,1.885689,1.430384,0.074918,-0.222970,1.398707,1.267813
4,AT22,Steiermark,-0.025150,0.263002,-0.344121,-0.255212,1.398707,0.349389
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,-0.245056,0.038692,-0.310376,-0.210705,-0.053293,-0.082827
223,SK01,Bratislavský kraj,-0.433079,-0.297210,-0.154043,-0.067787,-0.570838,0.096899
224,SK02,Západné Slovensko,-0.566986,-0.811008,-0.463197,-0.284950,-0.570838,-0.182330
225,SK03,Stredné Slovensko,-0.416622,-0.613662,-0.448163,-0.297296,-0.570838,-0.228467


### Statistics of scaled data

Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
count,227.0,227.0,227.0,227.0,227.0,227.0
mean,1.17625e-16,-1.408565e-16,7.825361e-17,7.042824e-17,1.252058e-16,-7.825361e-17
std,1.00221,1.00221,1.00221,1.00221,1.00221,1.00221
min,-0.7279116,-0.9431135,-0.5535928,-0.3543806,-1.505294,-1.712418
25%,-0.5184692,-0.5676591,-0.4402637,-0.2906949,-0.8439867,-0.5530497
50%,-0.3202872,-0.2481345,-0.3058283,-0.2264607,0.1623502,-0.2487055
75%,0.1594672,0.1925529,0.01047782,-0.08361661,0.3564295,0.3124693
max,7.455892,5.427844,7.207777,9.990145,5.934411,5.556568


### PCA - statistics

Unnamed: 0,F1,F2,F3,F4,F5,F6
Eigenvalues,3.025427,1.671527,0.832436,0.308782,0.120462,0.067915
Variability,0.502017,0.277361,0.138128,0.051237,0.019989,0.011269
Cumulative,0.502017,0.779377,0.917505,0.968742,0.988731,1.0


### PCA - eigenvectors

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.477099,-0.288641,-0.354725,0.278108,-0.532116,-0.450268
arrivals_pop,0.511076,-0.179526,-0.295112,0.310214,0.655327,0.306259
beds_area,0.360199,0.58057,-0.052728,0.032577,-0.43395,0.583987
arrivals+pop_area,0.21343,0.704416,0.033615,0.027402,0.309353,-0.600555
GDP_tour,0.311975,-0.145706,0.876876,0.332859,-0.038593,-0.015643
LQ_work,0.48842,-0.173101,0.119402,-0.844876,0.043557,-0.038889


### PCA - factor loadings

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.829854,-0.373176,-0.323644,0.15454,-0.184685,-0.117342
arrivals_pop,0.888952,-0.232105,-0.269254,0.17238,0.227449,0.079813
beds_area,0.626522,0.750604,-0.048108,0.018102,-0.150614,0.15219
arrivals+pop_area,0.371235,0.910722,0.030669,0.015227,0.107369,-0.156508
GDP_tour,0.542642,-0.18838,0.800044,0.184963,-0.013395,-0.004077
LQ_work,0.849545,-0.223798,0.108939,-0.469482,0.015117,-0.010135













# Year: 2014

### Initial data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.099859,3.056911,7.592857,308.471164,0.0581,1.236523
1,AT12,Niederösterreich,0.042166,1.396300,3.624974,206.005342,0.0581,0.836263
2,AT13,Wien,0.039706,3.506133,177.146465,20104.020202,0.0581,1.470143
3,AT21,Kärnten,0.266449,4.515711,15.812320,327.327746,0.0581,1.370454
4,AT22,Steiermark,0.088348,2.596880,6.606670,268.973848,0.0581,1.214808
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,0.069105,2.325564,8.549610,411.438356,0.0334,1.007536
223,SK01,Bratislavský kraj,0.043389,1.540740,13.276101,777.408709,0.0232,1.105905
224,SK02,Západné Slovensko,0.024118,0.403810,2.984302,173.706663,0.0232,1.133323
225,SK03,Stredné Slovensko,0.041380,0.820018,3.444637,151.506920,0.0232,0.934933


Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
Mean values,0.084752,2.102964,15.830003,984.249236,0.037922,1.1014
Std values,0.11146,2.003377,29.15167,2881.065038,0.018294,0.681579


### Scaled data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.135534,0.476169,-0.282562,-0.234558,1.102965,0.198250
1,AT12,Niederösterreich,-0.382072,-0.352737,-0.418673,-0.270124,1.102965,-0.389005
2,AT13,Wien,-0.404150,0.700401,5.533695,6.636355,1.102965,0.541012
3,AT21,Kärnten,1.630159,1.204340,-0.000607,-0.228013,1.102965,0.394750
4,AT22,Steiermark,0.032265,0.246541,-0.316391,-0.248268,1.102965,0.166389
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,-0.140388,0.111112,-0.249742,-0.198819,-0.247182,-0.137716
223,SK01,Bratislavský kraj,-0.371103,-0.280638,-0.087607,-0.071793,-0.804733,0.006609
224,SK02,Západné Slovensko,-0.544004,-0.848145,-0.440651,-0.281334,-0.804733,0.046836
225,SK03,Stredné Slovensko,-0.389132,-0.640392,-0.424860,-0.289040,-0.804733,-0.244237


### Statistics of scaled data

Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
count,227.0,227.0,227.0,227.0,227.0,227.0
mean,-2.2987000000000003e-17,-3.9126800000000004e-17,-7.825361e-18,3.1301440000000004e-17,-1.5650720000000002e-17,3.1301440000000004e-17
std,1.00221,1.00221,1.00221,1.00221,1.00221,1.00221
min,-0.698549,-0.9358453,-0.5271093,-0.3365569,-1.340419,-1.275175
25%,-0.5045467,-0.559578,-0.4237856,-0.2815608,-0.8156653,-0.5654861
50%,-0.3220109,-0.2702088,-0.3007521,-0.2242293,-0.02306874,-0.2815703
75%,0.1164222,0.1829456,-0.02611933,-0.08607866,0.6875351,0.2875036
max,7.452486,5.148362,7.558286,9.499009,5.328325,6.730998


### PCA - statistics

Unnamed: 0,F1,F2,F3,F4,F5,F6
Eigenvalues,3.087269,1.733809,0.798923,0.240198,0.115297,0.051052
Variability,0.512278,0.287695,0.132567,0.039857,0.019131,0.008471
Cumulative,0.512278,0.799973,0.932541,0.972397,0.991529,1.0


### PCA - eigenvectors

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.479388,-0.260109,-0.380891,-0.144525,-0.675462,-0.283402
arrivals_pop,0.505227,-0.117598,-0.358115,-0.411839,0.649505,0.105843
beds_area,0.339233,0.596709,-0.016607,0.013895,-0.266398,0.67633
arrivals+pop_area,0.204994,0.697931,0.083625,-0.013178,0.114061,-0.671337
GDP_tour,0.334881,-0.192924,0.835579,-0.382692,-0.077392,0.000137
LQ_work,0.495668,-0.195255,0.145651,0.814053,0.178683,-0.019115


### PCA - factor loadings

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.842314,-0.342497,-0.340449,-0.070832,-0.229356,-0.064034
arrivals_pop,0.887715,-0.154846,-0.320092,-0.201842,0.220542,0.023915
beds_area,0.596054,0.785712,-0.014843,0.00681,-0.090457,0.152815
arrivals+pop_area,0.360186,0.918996,0.074746,-0.006459,0.03873,-0.151687
GDP_tour,0.588407,-0.254031,0.746861,-0.187557,-0.026279,3.1e-05
LQ_work,0.87092,-0.2571,0.130187,0.398967,0.060673,-0.004319













# Year: 2019

### Initial data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.093391,3.435248,7.249735,344.298413,0.0582,1.066819
1,AT12,Niederösterreich,0.040682,1.764593,3.609563,245.291215,0.0582,0.905219
2,AT13,Wien,0.041763,4.058523,200.113636,24238.638889,0.0582,1.493316
3,AT21,Kärnten,0.249876,5.222878,14.963702,372.654532,0.0582,1.464011
4,AT22,Steiermark,0.103628,3.100366,7.926101,313.620970,0.0582,1.129360
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,0.128486,4.424386,16.224782,684.973118,0.0337,0.815784
223,SK01,Bratislavský kraj,0.047109,2.378620,15.375062,1102.687284,0.0261,0.861286
224,SK02,Západné Slovensko,0.028411,0.654860,3.495452,203.598599,0.0261,0.825069
225,SK03,Stredné Slovensko,0.049793,1.482717,4.120428,205.447294,0.0261,0.894229


Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
Mean values,0.089322,2.656022,17.147572,1163.419616,0.039929,1.099664
Std values,0.121922,2.724463,32.019829,3377.757206,0.020729,0.727022


### Scaled data

Unnamed: 0,GEO (Codes),GEO (Labels),beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
0,AT11,Burgenland (AT),0.033375,0.286011,-0.309116,-0.242504,0.881410,-0.045178
1,AT12,Niederösterreich,-0.398940,-0.327194,-0.422801,-0.271816,0.881410,-0.267454
2,AT13,Wien,-0.390074,0.514781,5.714149,6.831521,0.881410,0.541458
3,AT21,Kärnten,1.316851,0.942151,-0.068204,-0.234110,0.881410,0.501150
4,AT22,Steiermark,0.117338,0.163094,-0.287992,-0.251587,0.881410,0.040847
...,...,...,...,...,...,...,...,...
222,SI04,Zahodna Slovenija,0.321222,0.649069,-0.028819,-0.141646,-0.300498,-0.390470
223,SK01,Bratislavský kraj,-0.346227,-0.101819,-0.055357,-0.017980,-0.667130,-0.327883
224,SK02,Západné Slovensko,-0.499585,-0.734516,-0.426365,-0.284159,-0.667130,-0.377699
225,SK03,Stredné Slovensko,-0.324212,-0.430655,-0.406846,-0.283612,-0.667130,-0.282570


### Statistics of scaled data

Unnamed: 0,beds_pop,arrivals_pop,beds_area,arrivals+pop_area,GDP_tour,LQ_work
count,227.0,227.0,227.0,227.0,227.0,227.0
mean,-1.310748e-16,1.212931e-16,6.260288000000001e-17,8.607897000000001e-17,3.286651e-16,-6.260288000000001e-17
std,1.00221,1.00221,1.00221,1.00221,1.00221,1.00221
min,-0.6580526,-0.8403357,-0.5218545,-0.3393049,-1.231551,-1.148861
25%,-0.4724658,-0.5111542,-0.4231653,-0.2861604,-0.8456221,-0.5517145
50%,-0.3014552,-0.257911,-0.2915849,-0.2308767,-0.1509501,-0.3227944
75%,0.08392773,0.1188307,-0.01445677,-0.09639872,0.6932695,0.2889977
max,7.415082,7.248129,7.3531,9.038679,4.924015,6.502177


### PCA - statistics

Unnamed: 0,F1,F2,F3,F4,F5,F6
Eigenvalues,3.154623,1.771686,0.775385,0.202608,0.080783,0.041465
Variability,0.523454,0.29398,0.128662,0.033619,0.013404,0.00688
Cumulative,0.523454,0.817434,0.946096,0.979715,0.99312,1.0


### PCA - eigenvectors

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.487077,-0.232395,-0.377641,-0.257558,0.629544,-0.321676
arrivals_pop,0.510808,-0.11732,-0.35592,-0.30896,-0.685793,0.181286
beds_area,0.313193,0.61469,-0.007626,-0.010772,0.309851,0.654128
arrivals+pop_area,0.185986,0.701142,0.067264,-0.010487,-0.185873,-0.659262
GDP_tour,0.343084,-0.176112,0.841629,-0.377624,0.018018,-0.003714
LQ_work,0.501442,-0.178238,0.133366,0.833896,-0.049822,-0.033709


### PCA - factor loadings

Unnamed: 0,F1,F2,F3,F4,F5,F6
beds_pop,0.865109,-0.309329,-0.332535,-0.115932,0.178931,-0.065503
arrivals_pop,0.907259,-0.156158,-0.313409,-0.139069,-0.194918,0.036915
beds_area,0.556271,0.818182,-0.006716,-0.004849,0.088067,0.1332
arrivals+pop_area,0.330335,0.933253,0.05923,-0.00472,-0.052829,-0.134245
GDP_tour,0.609361,-0.234413,0.741104,-0.169976,0.005121,-0.000756
LQ_work,0.890624,-0.237243,0.117436,0.375353,-0.014161,-0.006864











