# Chapter 30

## Imports

In [1]:
import pandas as pd
import numpy as np

## 30.1 Melting Data

In [2]:
scores = pd.DataFrame ({
'age': [15, 16, 16, 15],
'name':['Adam', 'Bob', 'Dave', 'Fred'],
'test1': [95, 81, 89, None],
'test2': [80, 82, 84, 88],
'teacher': ['Ashby', 'Ashby', 'Jones', 'Jones']})

scores

Unnamed: 0,age,name,test1,test2,teacher
0,15,Adam,95.0,80,Ashby
1,16,Bob,81.0,82,Ashby
2,16,Dave,89.0,84,Jones
3,15,Fred,,88,Jones


In [3]:
scores.melt(id_vars=['name', 'age'],
            value_vars=['test1', 'test2'])

Unnamed: 0,name,age,variable,value
0,Adam,15,test1,95.0
1,Bob,16,test1,81.0
2,Dave,16,test1,89.0
3,Fred,15,test1,
4,Adam,15,test2,80.0
5,Bob,16,test2,82.0
6,Dave,16,test2,84.0
7,Fred,15,test2,88.0


In [4]:
scores.melt(id_vars=['name', 'age'],
            value_vars=['test1', 'test2'],
            var_name='test', value_name='score')

Unnamed: 0,name,age,test,score
0,Adam,15,test1,95.0
1,Bob,16,test1,81.0
2,Dave,16,test1,89.0
3,Fred,15,test1,
4,Adam,15,test2,80.0
5,Bob,16,test2,82.0
6,Dave,16,test2,84.0
7,Fred,15,test2,88.0


In [5]:
scores.melt(id_vars=['name', 'age', 'teacher'],
            value_vars=['test1', 'test2'],
            var_name='test', value_name='score')

Unnamed: 0,name,age,teacher,test,score
0,Adam,15,Ashby,test1,95.0
1,Bob,16,Ashby,test1,81.0
2,Dave,16,Jones,test1,89.0
3,Fred,15,Jones,test1,
4,Adam,15,Ashby,test2,80.0
5,Bob,16,Ashby,test2,82.0
6,Dave,16,Jones,test2,84.0
7,Fred,15,Jones,test2,88.0


## 30.2 Un-melting Data

In [6]:
melted = scores.melt(id_vars=['name', 'age', 'teacher'],
                     value_vars=['test1', 'test2'],
                     var_name='test', value_name='score'
                     )
melted

Unnamed: 0,name,age,teacher,test,score
0,Adam,15,Ashby,test1,95.0
1,Bob,16,Ashby,test1,81.0
2,Dave,16,Jones,test1,89.0
3,Fred,15,Jones,test1,
4,Adam,15,Ashby,test2,80.0
5,Bob,16,Ashby,test2,82.0
6,Dave,16,Jones,test2,84.0
7,Fred,15,Jones,test2,88.0


In [7]:
(melted
 .pivot_table(index=['name', 'age', 'teacher'],
              columns='test', values='score')
 .reset_index()
)

test,name,age,teacher,test1,test2
0,Adam,15,Ashby,95.0,80.0
1,Bob,16,Ashby,81.0,82.0
2,Dave,16,Jones,89.0,84.0
3,Fred,15,Jones,,88.0


In [8]:
(melted
 .groupby(['name', 'age', 'teacher', 'test'])
 .score
 .mean()
 .unstack()
 .reset_index()
)

test,name,age,teacher,test1,test2
0,Adam,15,Ashby,95.0,80.0
1,Bob,16,Ashby,81.0,82.0
2,Dave,16,Jones,89.0,84.0
3,Fred,15,Jones,,88.0


## 30.3 Transposing Data

In [9]:
jb2 = pd.read_csv('data/jet_brains_python_2020_survey_data_cleaned.csv',
                  index_col=0)
jb2

Unnamed: 0,age,are_you_datascientist,company_size,country_live,employment_status,first_learn_about_main_ide,how_often_use_main_ide,ide_main,is_python_main,job_team,main_purposes,missing_features_main_ide,nps_main_ide,python_years,python3_version_most,several_projects,team_size,use_python_most,years_of_coding,python3_ver
1,21,True,5000,India,Fully employed by a company / organization,School / University,Daily,VS Code,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",8.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",2,Software prototyping,3.0,3.6
2,30,False,5000,United States,Fully employed by a company / organization,Friend / Colleague,Daily,Vim,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",10.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",4,DevOps / System administration / Writing autom...,3.0,3.6
10,21,False,51,Other country,Fully employed by a company / organization,School / University,Daily,IntelliJ IDEA,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,1.0,Python 3_8,"Yes, I work on one main and several side projects",2,Web development,1.0,3.8
11,21,True,51,United States,Fully employed by a company / organization,Online learning platform / Online course,Daily,PyCharm Community Edition,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",9.0,3.0,Python 3_9,"Yes, I work on many different projects",2,Data analysis,3.0,3.9
13,30,True,5000,Belgium,Fully employed by a company / organization,Social network,Daily,VS Code,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,6.0,Python 3_7,"Yes, I work on many different projects",2,Data analysis,3.0,3.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54456,30,False,1001,Turkey,Fully employed by a company / organization,Friend / Colleague,Daily,PyCharm Community Edition,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",9.0,1.0,Python 3_6,"Yes, I work on many different projects",5,Machine learning,6.0,3.6
54457,21,False,2,Russian Federation,Fully employed by a company / organization,School / University,Daily,Vim,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",10.0,6.0,Python 3_6,"Yes, I work on many different projects",2,Data analysis,1.0,3.6
54459,21,False,1,Russian Federation,Self-employed (a person earning income directl...,Friend / Colleague,Daily,PyCharm Professional Edition,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,3.0,Python 3_7,"Yes, I work on many different projects",1,Web development,6.0,3.7
54460,30,True,51,Spain,Fully employed by a company / organization,Search engines,Daily,Other,Yes,Work on your own project(s) independently,Both for work and personal,Yes – Please list:,3.0,6.0,Python 3_7,"Yes, I work on many different projects",4,Data analysis,3.0,3.7


In [10]:
jb2.head().T

Unnamed: 0,1,2,10,11,13
age,21,30,21,21,30
are_you_datascientist,True,False,False,True,True
company_size,5000,5000,51,51,5000
country_live,India,United States,Other country,United States,Belgium
employment_status,Fully employed by a company / organization,Fully employed by a company / organization,Fully employed by a company / organization,Fully employed by a company / organization,Fully employed by a company / organization
first_learn_about_main_ide,School / University,Friend / Colleague,School / University,Online learning platform / Online course,Social network
how_often_use_main_ide,Daily,Daily,Daily,Daily,Daily
ide_main,VS Code,Vim,IntelliJ IDEA,PyCharm Community Edition,VS Code
is_python_main,Yes,Yes,Yes,Yes,Yes
job_team,Work in a team,Work on your own project(s) independently,Work in a team,Work in a team,Work in a team


## 30.4 Stacking & Unstacking

In [11]:
jb2

Unnamed: 0,age,are_you_datascientist,company_size,country_live,employment_status,first_learn_about_main_ide,how_often_use_main_ide,ide_main,is_python_main,job_team,main_purposes,missing_features_main_ide,nps_main_ide,python_years,python3_version_most,several_projects,team_size,use_python_most,years_of_coding,python3_ver
1,21,True,5000,India,Fully employed by a company / organization,School / University,Daily,VS Code,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",8.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",2,Software prototyping,3.0,3.6
2,30,False,5000,United States,Fully employed by a company / organization,Friend / Colleague,Daily,Vim,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",10.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",4,DevOps / System administration / Writing autom...,3.0,3.6
10,21,False,51,Other country,Fully employed by a company / organization,School / University,Daily,IntelliJ IDEA,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,1.0,Python 3_8,"Yes, I work on one main and several side projects",2,Web development,1.0,3.8
11,21,True,51,United States,Fully employed by a company / organization,Online learning platform / Online course,Daily,PyCharm Community Edition,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",9.0,3.0,Python 3_9,"Yes, I work on many different projects",2,Data analysis,3.0,3.9
13,30,True,5000,Belgium,Fully employed by a company / organization,Social network,Daily,VS Code,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,6.0,Python 3_7,"Yes, I work on many different projects",2,Data analysis,3.0,3.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54456,30,False,1001,Turkey,Fully employed by a company / organization,Friend / Colleague,Daily,PyCharm Community Edition,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",9.0,1.0,Python 3_6,"Yes, I work on many different projects",5,Machine learning,6.0,3.6
54457,21,False,2,Russian Federation,Fully employed by a company / organization,School / University,Daily,Vim,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",10.0,6.0,Python 3_6,"Yes, I work on many different projects",2,Data analysis,1.0,3.6
54459,21,False,1,Russian Federation,Self-employed (a person earning income directl...,Friend / Colleague,Daily,PyCharm Professional Edition,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",10.0,3.0,Python 3_7,"Yes, I work on many different projects",1,Web development,6.0,3.7
54460,30,True,51,Spain,Fully employed by a company / organization,Search engines,Daily,Other,Yes,Work on your own project(s) independently,Both for work and personal,Yes – Please list:,3.0,6.0,Python 3_7,"Yes, I work on many different projects",4,Data analysis,3.0,3.7


In [14]:
(jb2
 .groupby(['country_live', 'are_you_datascientist'])
 .size()
 .unstack()
)

are_you_datascientist,False,Other,True
country_live,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Algeria,12.0,1.0,5.0
Argentina,89.0,4.0,16.0
Armenia,15.0,,2.0
Australia,210.0,14.0,50.0
Austria,93.0,3.0,32.0
...,...,...,...
United States,2008.0,100.0,589.0
Uruguay,10.0,1.0,9.0
Uzbekistan,3.0,,1.0
Venezuela,16.0,2.0,4.0


In [17]:
(jb2
 .groupby(['country_live', 'are_you_datascientist'])
 .size()
 .unstack('country_live')
)

country_live,Algeria,Argentina,Armenia,Australia,Austria,Bangladesh,Belarus,Belgium,Brazil,Bulgaria,...,Tunisia,Turkey,Ukraine,United Arab Emirates,United Kingdom,United States,Uruguay,Uzbekistan,Venezuela,Viet Nam
are_you_datascientist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,12.0,89.0,15.0,210.0,93.0,22.0,41.0,92.0,301.0,30.0,...,6.0,65.0,165.0,13.0,521.0,2008.0,10.0,3.0,16.0,16.0
Other,1.0,4.0,,14.0,3.0,1.0,2.0,2.0,20.0,3.0,...,,4.0,9.0,,36.0,100.0,1.0,,2.0,3.0
True,5.0,16.0,2.0,50.0,32.0,10.0,4.0,41.0,108.0,8.0,...,1.0,30.0,23.0,4.0,142.0,589.0,9.0,1.0,4.0,14.0


## 30.5 Stacking

In [18]:
(jb2
 .pivot_table(index='country_live',
              aggfunc={'age': ['min', 'max'],
                       'company_size': ['min', 'max']}) 
)

Unnamed: 0_level_0,age,age,company_size,company_size
Unnamed: 0_level_1,max,min,max,min
country_live,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Algeria,60,18,5000,1
Argentina,60,18,5000,1
Armenia,30,18,5000,1
Australia,60,18,5000,1
Austria,50,18,5000,1
...,...,...,...,...
United States,60,18,5000,1
Uruguay,40,21,5000,2
Uzbekistan,21,21,5000,1
Venezuela,50,18,5000,1


In [24]:
(jb2
 .pivot_table(index='country_live',
              aggfunc={'age': ['min', 'max'],
                       'company_size': ['min', 'max']})
 .stack(1) 
 .swaplevel()
)

Unnamed: 0_level_0,Unnamed: 1_level_0,age,company_size
Unnamed: 0_level_1,country_live,Unnamed: 2_level_1,Unnamed: 3_level_1
max,Algeria,60,5000
min,Algeria,18,1
max,Argentina,60,5000
min,Argentina,18,1
max,Armenia,30,5000
...,...,...,...
min,Uzbekistan,21,1
max,Venezuela,50,5000
min,Venezuela,18,1
max,Viet Nam,60,5000


## 30.6 Flattening Hierarchical Indexes and Columns

In [26]:
(jb2
 .groupby(['country_live', 'age'])
 .mean()
 .reset_index()
)

  .mean()


Unnamed: 0,country_live,age,company_size,nps_main_ide,python_years,team_size,years_of_coding,python3_ver
0,Algeria,18,2.000000,10.000000,1.000000,2.500000,6.000000,3.650000
1,Algeria,21,725.428571,7.142857,1.571429,3.142857,2.428571,3.757143
2,Algeria,30,1.600000,8.400000,2.800000,1.800000,3.800000,3.700000
3,Algeria,40,1674.000000,8.000000,5.000000,6.000000,6.666667,3.766667
4,Algeria,60,5000.000000,6.000000,11.000000,13.000000,11.000000,3.900000
...,...,...,...,...,...,...,...,...
355,Viet Nam,18,51.000000,10.000000,1.000000,2.000000,1.000000,3.700000
356,Viet Nam,21,348.346154,8.923077,2.192308,4.884615,1.923077,3.711538
357,Viet Nam,30,266.250000,9.000000,3.250000,5.500000,3.500000,3.750000
358,Viet Nam,40,51.000000,9.000000,3.000000,2.000000,6.000000,3.800000


In [27]:
(jb2
 .groupby(['country_live', 'age'], as_index=False)
 .mean()
)

  .mean()


Unnamed: 0,country_live,age,company_size,nps_main_ide,python_years,team_size,years_of_coding,python3_ver
0,Algeria,18,2.000000,10.000000,1.000000,2.500000,6.000000,3.650000
1,Algeria,21,725.428571,7.142857,1.571429,3.142857,2.428571,3.757143
2,Algeria,30,1.600000,8.400000,2.800000,1.800000,3.800000,3.700000
3,Algeria,40,1674.000000,8.000000,5.000000,6.000000,6.666667,3.766667
4,Algeria,60,5000.000000,6.000000,11.000000,13.000000,11.000000,3.900000
...,...,...,...,...,...,...,...,...
355,Viet Nam,18,51.000000,10.000000,1.000000,2.000000,1.000000,3.700000
356,Viet Nam,21,348.346154,8.923077,2.192308,4.884615,1.923077,3.711538
357,Viet Nam,30,266.250000,9.000000,3.250000,5.500000,3.500000,3.750000
358,Viet Nam,40,51.000000,9.000000,3.000000,2.000000,6.000000,3.800000


In [28]:
(jb2
 .groupby(['country_live', 'age'])
 .mean()
 .unstack()
)

  .mean()


Unnamed: 0_level_0,company_size,company_size,company_size,company_size,company_size,company_size,nps_main_ide,nps_main_ide,nps_main_ide,nps_main_ide,...,years_of_coding,years_of_coding,years_of_coding,years_of_coding,python3_ver,python3_ver,python3_ver,python3_ver,python3_ver,python3_ver
age,18,21,30,40,50,60,18,21,30,40,...,30,40,50,60,18,21,30,40,50,60
country_live,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Algeria,2.00,725.428571,1.600000,1674.000000,,5000.000000,10.000,7.142857,8.400000,8.000000,...,3.800000,6.666667,,11.000000,3.650,3.757143,3.700000,3.766667,,3.900000
Argentina,51.00,459.789474,1229.568182,1641.350000,241.000000,2.000000,10.000,8.710526,8.659091,8.900000,...,5.363636,9.200000,11.000000,11.000000,3.700,3.760526,3.743182,3.730000,3.720000,3.800000
Armenia,11.00,1015.461538,337.666667,,,,10.000,9.000000,9.666667,,...,5.000000,,,,3.800,3.784615,3.766667,,,
Australia,4.25,1055.689655,1554.081818,1370.126984,1527.833333,628.888889,8.500,8.637931,8.836364,8.555556,...,6.327273,8.920635,10.066667,10.444444,3.750,3.739655,3.730000,3.720635,3.756667,3.777778
Austria,11.00,785.258065,853.741935,664.409091,767.750000,,10.000,8.677419,8.806452,9.000000,...,5.241935,7.772727,10.583333,,3.800,3.729032,3.729032,3.745455,3.700000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United States,707.40,1640.298805,1644.012476,1764.550209,1947.022727,1849.525000,9.225,8.948207,8.904990,8.920502,...,6.078695,8.667364,9.905303,10.441667,3.775,3.736388,3.733685,3.742469,3.742045,3.742500
Uruguay,,31.000000,408.076923,1001.000000,,,,9.500000,9.076923,10.000000,...,6.307692,3.000000,,,,3.750000,3.746154,3.800000,,
Uzbekistan,,1265.750000,,,,,,9.750000,,,...,,,,,,3.725000,,,,
Venezuela,2.00,25.100000,3.750000,13.400000,2525.500000,,8.000,9.800000,7.500000,8.600000,...,5.000000,10.000000,8.500000,,3.800,3.720000,3.650000,3.740000,3.800000,


In [29]:
def flatten_cols(df):
    cols = ['_'.join(map(str, vals))
            for vals in df.columns.to_flat_index()]
    df.columns = cols
    return df

In [35]:
(jb2
 .groupby(['country_live', 'age'])
 .mean()
 .unstack()
).columns#.to_flat_index()

  .mean()


MultiIndex([(   'company_size', 18),
            (   'company_size', 21),
            (   'company_size', 30),
            (   'company_size', 40),
            (   'company_size', 50),
            (   'company_size', 60),
            (   'nps_main_ide', 18),
            (   'nps_main_ide', 21),
            (   'nps_main_ide', 30),
            (   'nps_main_ide', 40),
            (   'nps_main_ide', 50),
            (   'nps_main_ide', 60),
            (   'python_years', 18),
            (   'python_years', 21),
            (   'python_years', 30),
            (   'python_years', 40),
            (   'python_years', 50),
            (   'python_years', 60),
            (      'team_size', 18),
            (      'team_size', 21),
            (      'team_size', 30),
            (      'team_size', 40),
            (      'team_size', 50),
            (      'team_size', 60),
            ('years_of_coding', 18),
            ('years_of_coding', 21),
            ('years_of_coding', 30),
 

In [36]:
(jb2
 .groupby(['country_live', 'age'])
 .mean()
 .unstack()
 .pipe(flatten_cols)
)

  .mean()


Unnamed: 0_level_0,company_size_18,company_size_21,company_size_30,company_size_40,company_size_50,company_size_60,nps_main_ide_18,nps_main_ide_21,nps_main_ide_30,nps_main_ide_40,...,years_of_coding_30,years_of_coding_40,years_of_coding_50,years_of_coding_60,python3_ver_18,python3_ver_21,python3_ver_30,python3_ver_40,python3_ver_50,python3_ver_60
country_live,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Algeria,2.00,725.428571,1.600000,1674.000000,,5000.000000,10.000,7.142857,8.400000,8.000000,...,3.800000,6.666667,,11.000000,3.650,3.757143,3.700000,3.766667,,3.900000
Argentina,51.00,459.789474,1229.568182,1641.350000,241.000000,2.000000,10.000,8.710526,8.659091,8.900000,...,5.363636,9.200000,11.000000,11.000000,3.700,3.760526,3.743182,3.730000,3.720000,3.800000
Armenia,11.00,1015.461538,337.666667,,,,10.000,9.000000,9.666667,,...,5.000000,,,,3.800,3.784615,3.766667,,,
Australia,4.25,1055.689655,1554.081818,1370.126984,1527.833333,628.888889,8.500,8.637931,8.836364,8.555556,...,6.327273,8.920635,10.066667,10.444444,3.750,3.739655,3.730000,3.720635,3.756667,3.777778
Austria,11.00,785.258065,853.741935,664.409091,767.750000,,10.000,8.677419,8.806452,9.000000,...,5.241935,7.772727,10.583333,,3.800,3.729032,3.729032,3.745455,3.700000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United States,707.40,1640.298805,1644.012476,1764.550209,1947.022727,1849.525000,9.225,8.948207,8.904990,8.920502,...,6.078695,8.667364,9.905303,10.441667,3.775,3.736388,3.733685,3.742469,3.742045,3.742500
Uruguay,,31.000000,408.076923,1001.000000,,,,9.500000,9.076923,10.000000,...,6.307692,3.000000,,,,3.750000,3.746154,3.800000,,
Uzbekistan,,1265.750000,,,,,,9.750000,,,...,,,,,,3.725000,,,,
Venezuela,2.00,25.100000,3.750000,13.400000,2525.500000,,8.000,9.800000,7.500000,8.600000,...,5.000000,10.000000,8.500000,,3.800,3.720000,3.650000,3.740000,3.800000,
