# Snu = "Putt" en, eller flere, av kolonnene inn i radene

## Pivot_table()
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html

In [1]:
import pandas as pd
import numpy as np
pop = pd.read_csv('../data/mock_sysselsatte/population_2019_30000.csv')
comp = pd.read_csv('../data/mock_sysselsatte/companies_2019_30000.csv')

### Oppgave: snu datasettet på ansatte per ensifrede nacekode per fylke

In [2]:
comp

Unnamed: 0.1,Unnamed: 0,work_id,nace,region_code,region,employee_points
0,0,118415099,11.070,3.0,Oslo,60
1,1,165189001,10.920,3.0,Oslo,54
2,2,205912967,10.130,2.0,Akershus (-2019),48
3,3,959013544,46.693,12.0,Hordaland (-2019),46
4,4,405013223,09.900,2.0,Akershus (-2019),88
...,...,...,...,...,...,...
1128,1128,706734220,51.210,50.0,Trøndelag - Trööndelage,15
1129,1129,156404021,74.200,18.0,Nordland,23
1130,1130,962214437,74.102,18.0,Nordland,12
1131,1131,999083400,43.341,10.0,Vest-Agder (-2019),21


In [3]:
comp2 = comp.copy()

In [4]:
# Lag ensiffret nacekode fra 5 sifret, ved å beholde det første tegnet med slicing
comp2['nace1'] = comp2['nace'].str[:1]
comp2

Unnamed: 0.1,Unnamed: 0,work_id,nace,region_code,region,employee_points,nace1
0,0,118415099,11.070,3.0,Oslo,60,1
1,1,165189001,10.920,3.0,Oslo,54,1
2,2,205912967,10.130,2.0,Akershus (-2019),48,1
3,3,959013544,46.693,12.0,Hordaland (-2019),46,4
4,4,405013223,09.900,2.0,Akershus (-2019),88,0
...,...,...,...,...,...,...,...
1128,1128,706734220,51.210,50.0,Trøndelag - Trööndelage,15,5
1129,1129,156404021,74.200,18.0,Nordland,23,7
1130,1130,962214437,74.102,18.0,Nordland,12,7
1131,1131,999083400,43.341,10.0,Vest-Agder (-2019),21,4


In [5]:
# Pivot table uten å filtrere ned først, siden vi definerer kolonnene vi er interesserte i med parametre
# Med "aggfunc" definerer vi hva som vil skje når det er flere verdier som overlapper på aksene
comp_piv = pd.pivot_table(comp2, 
               values = 'employee_points', 
               index = ['nace1'],
               columns = ['region'],
               aggfunc = np.sum  
                # Gjennomsnittlig ansatte per selskap isteden:
               # aggfunc = np.mean
              )
comp_piv

region,Akershus (-2019),Aust-Agder (-2019),Buskerud (-2019),Finnmark - Finnmárku (-2019),Hedmark (-2019),Hordaland (-2019),Møre og Romsdal,Nordland,Oppland (-2019),Oslo,Rogaland,Sogn og Fjordane (-2019),Telemark (-2019),Troms - Romsa (-2019),Trøndelag - Trööndelage,Vest-Agder (-2019),Vestfold (-2019),Østfold (-2019)
nace1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,1411.0,,,,,,,,,,,,,,,,,788.0
1,465.0,,,,518.0,,,,29.0,2206.0,,,,,,,,
2,,,804.0,,,,,,482.0,,,,184.0,,,,665.0,
3,,266.0,,,,,,,,,,,255.0,,,32.0,,
4,,,,,,1554.0,749.0,,,,1416.0,294.0,,,172.0,460.0,,
5,,,,,,,,,,,,,,,1192.0,,,
6,,,,,,,,86.0,,,,,,,,,,
7,,,,,,,,572.0,,,,,,179.0,,,,
8,,,,188.0,,,,,,,,,,294.0,,,,


## Pivot()
Støtter til forskjell fra pivot_table, ikke aggregeringsfunksjoner. \
Men er tilgjengelig under dataframen, ikke pandaspakke i seg selv. \
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html#pandas.DataFrame.pivot

In [8]:
comp_piv_simple = comp2.pivot(index = ['nace1', 'work_id'],
                            columns = 'region',
                            values = 'employee_points')
comp_piv_simple

Unnamed: 0_level_0,region,NaN,Akershus (-2019),Aust-Agder (-2019),Buskerud (-2019),Finnmark - Finnmárku (-2019),Hedmark (-2019),Hordaland (-2019),Møre og Romsdal,Nordland,Oppland (-2019),Oslo,Rogaland,Sogn og Fjordane (-2019),Telemark (-2019),Troms - Romsa (-2019),Trøndelag - Trööndelage,Vest-Agder (-2019),Vestfold (-2019),Østfold (-2019)
nace1,work_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
,587799322,0.0,,,,,,,,,,,,,,,,,,
0,2102749,,,,,,,,,,,,,,,,,,,6.0
0,8727971,,,,,,,,,,,,,,,,,,,3.0
0,11116001,,,,,,,,,,,,,,,,,,,4.0
0,22649902,,,,,,,,,,,,,,,,,,,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,890843093,,,,,4.0,,,,,,,,,,,,,,
8,949419282,,,,,1.0,,,,,,,,,,,,,,
8,951653677,,,,,,,,,,,,,,,5.0,,,,
8,958992622,,,,,,,,,,,,,,,21.0,,,,


In [9]:
# Alle selskapene på nace1 = 0
comp_piv_simple.loc['0'].head(30)

region,NaN,Akershus (-2019),Aust-Agder (-2019),Buskerud (-2019),Finnmark - Finnmárku (-2019),Hedmark (-2019),Hordaland (-2019),Møre og Romsdal,Nordland,Oppland (-2019),Oslo,Rogaland,Sogn og Fjordane (-2019),Telemark (-2019),Troms - Romsa (-2019),Trøndelag - Trööndelage,Vest-Agder (-2019),Vestfold (-2019),Østfold (-2019)
work_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2102749,,,,,,,,,,,,,,,,,,,6.0
8727971,,,,,,,,,,,,,,,,,,,3.0
11116001,,,,,,,,,,,,,,,,,,,4.0
22649902,,,,,,,,,,,,,,,,,,,4.0
25797167,,,,,,,,,,,,,,,,,,,16.0
26824146,,,,,,,,,,,,,,,,,,,14.0
45777390,,,,,,,,,,,,,,,,,,,18.0
50232617,,16.0,,,,,,,,,,,,,,,,,
62221914,,,,,,,,,,,,,,,,,,,24.0
69179718,,,,,,,,,,,,,,,,,,,6.0
