# Reshape a DataFrame

In [1]:
import pandas as pd
import seaborn as sns

In [4]:
ts = pd.read_excel('test_scores.xlsx')
ts.head(10)

Unnamed: 0,district,M,R,S
0,Fox Chapel,473,447,460
1,Tuscarora,536,450,493
2,Sautee Nacoochee,463,439,451
3,Fort Peck,559,448,504
4,North Pole,489,447,468
5,Elkport,454,431,443
6,Whitley Gardens,423,395,409
7,Waldenburg,500,451,476
8,North Beach,439,478,459
9,Ensenada,528,455,492


In [6]:
ts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   district  100 non-null    object
 1   M         100 non-null    int64 
 2   R         100 non-null    int64 
 3   S         100 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 3.2+ KB


## Unpivot columns

In [9]:
# if we want to collapse the scores into 2 columns:
# on with the test type and one with the score
# we'll need to reshap the dataset (or un-pivot it)
# we'll use the .melt() function
# id_vars should contains all the variables that we do not want to reshape

In [11]:
scores_pivot = pd.melt(frame = ts, id_vars = 'district', 
                       value_vars = ['M', 'R', 'S'], value_name = 'score',
                      var_name = 'test_type')
scores_pivot.head()

Unnamed: 0,district,test_type,score
0,Fox Chapel,M,473
1,Tuscarora,M,536
2,Sautee Nacoochee,M,463
3,Fort Peck,M,559
4,North Pole,M,489


In [12]:
# check which values are in the newly created left column that contains
# the former headers
scores_pivot['test_type'].unique()

array(['M', 'R', 'S'], dtype=object)

In [13]:
# for better readability it would be beneficial to give more context
# to the column test_type's content
mapping = {'M':'Math', 'R':'Reading', 'S':'Science'}
scores_pivot['test_type'] = scores_pivot['test_type'].map(mapping)

In [14]:
scores_pivot['test_type'].unique()

array(['Math', 'Reading', 'Science'], dtype=object)

In [15]:
scores_pivot.head()

Unnamed: 0,district,test_type,score
0,Fox Chapel,Math,473
1,Tuscarora,Math,536
2,Sautee Nacoochee,Math,463
3,Fort Peck,Math,559
4,North Pole,Math,489


## Pivot columns

In [18]:
# to cancel those changes and go back to the previous table
scores_pivot.pivot_table(index = 'district', columns = 'test_type',
                        values = 'score')

test_type,Math,Reading,Science
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aguadilla,513,416,465
Amalga,506,433,470
Angier,444,411,428
Arenzville,405,410,408
Asheville,489,433,461
...,...,...,...
Waldenburg,500,451,476
Whitley Gardens,423,395,409
Wiley,626,465,546
Woodlyn,454,421,438
