# handle dict and list columns

---
* author:  [Prasert Kanawattanachai](prasert.k@chula.ac.th)
* YouTube: https://www.youtube.com/prasertcbs
* github: https://github.com/prasertcbs/pandas/blob/main/pandas_dict_list_columns.ipynb
* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)
---

In [1]:
import sys
import json
import ast # handle single/double quote in dict/json string
import pandas as pd

In [2]:
print(f'Python  version: {sys.version}')
print(f'pandas version: {pd.__version__}')
print(pd.Timestamp.now())

Python  version: 3.7.9 (default, Aug 31 2020, 17:10:11) [MSC v.1916 64 bit (AMD64)]
pandas version: 1.2.1
2021-02-21 11:59:01.565452


In [3]:
df=pd.read_csv('https://github.com/prasertcbs/basic-dataset/raw/master/dict_list_column.tsv', sep='\t')
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot']


In [4]:
type(df['price'][0])

str

In [5]:
for c in df.columns:
    print(f'{c:10}: {type(df[c][0])} {df[c][0]}')

menu      : <class 'str'> mocha
price     : <class 'str'> {"S":40, "M":50, "L":60}
price2    : <class 'str'> {'S':40, 'M':50, 'L':60}
orders    : <class 'str'> 70,10,100
orders2   : <class 'str'> [70,10,100]
serve     : <class 'str'> hot,cold,frappe
serve2    : <class 'str'> ['hot', 'cold', 'frappe']


## json.loads

In [6]:
j=json.loads('{"S":40, "M":50, "L":60}')
print(type(j))
print(j)

<class 'dict'>
{'S': 40, 'M': 50, 'L': 60}


In [7]:
j['M']

50

In [8]:
df['price_j']=df['price'].map(json.loads)
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}"


In [9]:
df['price_j']=df['price'].apply(lambda s: json.loads(s))
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}"


In [10]:
# json.loads("{'S':40, 'M':50, 'L':60}")

## ast.literal_eval

In [11]:
ast.literal_eval('{"S":40, "M":50, "L":60}')

{'S': 40, 'M': 50, 'L': 60}

In [12]:
k=ast.literal_eval("{'S':40, 'M':50, 'L':60}")
print(type(k))
print(k)

<class 'dict'>
{'S': 40, 'M': 50, 'L': 60}


In [13]:
df['price_j']=df['price2'].map(ast.literal_eval)
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}"


In [14]:
df['price_k']=df['price'].apply(lambda s: ast.literal_eval(s))
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}"


### access dict

In [15]:
print(type(df['price_j'][0]))

<class 'dict'>


In [16]:
df['price_j'][0]

{'S': 40, 'M': 50, 'L': 60}

In [17]:
df['price_j'][0]['M']

50

In [18]:
df.loc[0, 'price_j']['M']

50

In [19]:
df['price_j'].apply(lambda v: v['M'])

0    50
1    60
2    49
Name: price_j, dtype: int64

## list

In [20]:
df['orders'].str.split(',')

0    [70, 10, 100]
1     [40, 20, 80]
2     [120, 5, 50]
Name: orders, dtype: object

In [21]:
type(df['orders'][0])

str

In [22]:
df['x']=pd.eval(df['orders'].str.split(','))
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k,x
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]"


In [23]:
sum(df['x'][0])

180

In [24]:
df['c1']=df['orders'].str.split(',').apply(lambda v: list(map(int, v)))
df['c1']

0    [70, 10, 100]
1     [40, 20, 80]
2     [120, 5, 50]
Name: c1, dtype: object

In [25]:
df['c1'][0]

[70, 10, 100]

In [26]:
df['c2']=pd.eval(df['orders2'])
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k,x,c1,c2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]"


In [27]:
df['c2'][0]

[70, 10, 100]

In [28]:
df['c2'].apply(lambda v: v[1])

0    10
1    20
2     5
Name: c2, dtype: int64

In [29]:
df['d1']=df['serve'].str.split(',')
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k,x,c1,c2,d1
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot]


In [30]:
df['d2']=pd.eval(df['serve2'])
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k,x,c1,c2,d1,d2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot],[hot]


In [31]:
df['d2']=df['serve2'].map(ast.literal_eval)
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,price_k,x,c1,c2,d1,d2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot],[hot]


In [32]:
df['d2'][0]

['hot', 'cold', 'frappe']

In [33]:
df['d2'][0][2]

'frappe'

In [34]:
df.to_csv('out.tsv', sep='\t', index=False)