In [2]:
# Load necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Plotting pretty figures and avoid blurry images
%config InlineBackend.figure_format = 'retina'
# Larger scale for plots in notebooks
sns.set_context('talk')

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Enable multiple cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

id_vars = ['col_name']\
var_name\
value_name

In [3]:
df = pd.DataFrame({'New York': [25]})
df

Unnamed: 0,New York
0,25


In [4]:
df.melt()

Unnamed: 0,variable,value
0,New York,25


In [5]:
df = pd.DataFrame({'New york': [25], 'Paris': [27], 'London': [30]})
df

Unnamed: 0,New york,Paris,London
0,25,27,30


In [11]:
df.melt()

Unnamed: 0,variable,value
0,New york,25
1,Paris,27
2,London,30


In [12]:
df_larger = pd.DataFrame({
    'New york': [25, 27, 23, 25, 29],
    'Paris': [27, 22, 24, 26, 28],
    'London': [30, 31, 33, 29, 25]
    })
df_larger

Unnamed: 0,New york,Paris,London
0,25,27,30
1,27,22,31
2,23,24,33
3,25,26,29
4,29,28,25


In [20]:
df_larger.melt(var_name = 'city', value_name= 'temp')

Unnamed: 0,city,temp
0,New york,25
1,New york,27
2,New york,23
3,New york,25
4,New york,29
5,Paris,27
6,Paris,22
7,Paris,24
8,Paris,26
9,Paris,28


In [21]:
temperatures = pd.DataFrame({
    'city': ['New York', 'London', 'Paris', 'Berlin', 'Amsterdam'],
    'day1': [23, 25, 27, 26, 24],
    'day2': [22, 21, 25, 26, 23],
    'day3': [26, 25, 24, 27, 23],
    'day4': [23, 21, 22, 26, 27],
    'day5': [27, 26, 27, 24, 28]
})
temperatures

Unnamed: 0,city,day1,day2,day3,day4,day5
0,New York,23,22,26,23,27
1,London,25,21,25,21,26
2,Paris,27,25,24,22,27
3,Berlin,26,26,27,26,24
4,Amsterdam,24,23,23,27,28


In [24]:
temperatures.melt()

Unnamed: 0,variable,value
0,city,New York
1,city,London
2,city,Paris
3,city,Berlin
4,city,Amsterdam
5,day1,23
6,day1,25
7,day1,27
8,day1,26
9,day1,24


In [25]:
temperatures.melt(id_vars = ['city'])

Unnamed: 0,city,variable,value
0,New York,day1,23
1,London,day1,25
2,Paris,day1,27
3,Berlin,day1,26
4,Amsterdam,day1,24
5,New York,day2,22
6,London,day2,21
7,Paris,day2,25
8,Berlin,day2,26
9,Amsterdam,day2,23


In [27]:
temperatures.melt(id_vars=['city'],
                  var_name='date',
                  value_name='temperature')

Unnamed: 0,city,date,temperature
0,New York,day1,23
1,London,day1,25
2,Paris,day1,27
3,Berlin,day1,26
4,Amsterdam,day1,24
5,New York,day2,22
6,London,day2,21
7,Paris,day2,25
8,Berlin,day2,26
9,Amsterdam,day2,23


## Melt on Real Data

In [29]:
stocks = pd.read_csv('prices-split-adjusted.csv',
                     usecols=['date', 'symbol', 'open', 'close'],
                     parse_dates=['date'],
                     index_col='date')
stocks.head()

Unnamed: 0_level_0,symbol,open,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-01-05,WLTW,123.43,125.839996
2016-01-06,WLTW,125.239998,119.980003
2016-01-07,WLTW,116.379997,114.949997
2016-01-08,WLTW,115.480003,116.620003
2016-01-11,WLTW,117.010002,114.970001


In [33]:
stocks_small = stocks.loc['2016-02-01':'2016-03-01'].reset_index()
stocks_small.head()

Unnamed: 0,date,symbol,open,close
0,2016-02-01,WLTW,114.0,114.5
1,2016-02-02,WLTW,113.25,110.559998
2,2016-02-03,WLTW,113.379997,114.050003
3,2016-02-04,WLTW,114.080002,115.709999
4,2016-02-05,WLTW,115.120003,114.019997


In [38]:
stocks_small.melt()

Unnamed: 0,variable,value
0,date,2016-02-01 00:00:00
1,date,2016-02-02 00:00:00
2,date,2016-02-03 00:00:00
3,date,2016-02-04 00:00:00
4,date,2016-02-05 00:00:00
...,...,...
41995,close,54.7735
41996,close,97.39
41997,close,22.31
41998,close,42.1


In [40]:
melted = stocks_small.melt(id_vars=['symbol'])
melted

Unnamed: 0,symbol,variable,value
0,WLTW,date,2016-02-01 00:00:00
1,WLTW,date,2016-02-02 00:00:00
2,WLTW,date,2016-02-03 00:00:00
3,WLTW,date,2016-02-04 00:00:00
4,WLTW,date,2016-02-05 00:00:00
...,...,...,...
31495,YUM,close,54.7735
31496,ZBH,close,97.39
31497,ZION,close,22.31
31498,ZTS,close,42.1


In [41]:
print("Number of rows in melted table: " + str(melted.shape[0]))


Number of rows in melted table: 31500


In [42]:
print("Number of rows in the original table: " + str(stocks_small.shape[0]))


Number of rows in the original table: 10500
