# handle list and dict columns

---
* author:  [Prasert Kanawattanachai](prasert.k@chula.ac.th)
* YouTube: https://www.youtube.com/prasertcbs
* github: https://github.com/prasertcbs/
* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)
---

In [1]:
import re
import json
import ast # handle single/double quote in dict/json string
import pandas as pd
import numpy as np

In [2]:
print(f'pandas version: {pd.__version__}')
print(f'numpy  version: {np.__version__}')
print(pd.Timestamp.now())

pandas version: 1.2.1
numpy  version: 1.19.2
2021-02-21 07:23:07.308281


In [3]:
df=pd.read_csv('https://github.com/prasertcbs/basic-dataset/raw/master/dict_list_column.tsv', sep='\t')
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot']


In [28]:
type(df['price'][0])

str

In [29]:
type(df['price2'][0])

str

In [31]:
type(df['orders2'][0])

str

In [32]:
type(df['serve2'][0])

str

## json.loads

In [5]:
json.loads('{"S":40, "M":50, "L":60}')

{'S': 40, 'M': 50, 'L': 60}

In [6]:
df['price_j']=df['price'].apply(lambda s: json.loads(s))
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}"


## ast.literal_eval

In [7]:
# credit: https://stackoverflow.com/questions/4162642/single-vs-double-quotes-in-json
ast.literal_eval('{"S":40, "M":50, "L":60}')

{'S': 40, 'M': 50, 'L': 60}

In [8]:
ast.literal_eval("{'S':40, 'M':50, 'L':60}")

{'S': 40, 'M': 50, 'L': 60}

In [9]:
df['price2'][0]

"{'S':40, 'M':50, 'L':60}"

In [10]:
j=ast.literal_eval(df['price'][0])
print(type(j))
print(j)

<class 'dict'>
{'S': 40, 'M': 50, 'L': 60}


In [11]:
df['price_j']=df['price2'].map(ast.literal_eval)
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}"


In [12]:
type(df['price_j'][0])

dict

## list

In [13]:
df['orders'].str.split(',')

0    [70, 10, 100]
1     [40, 20, 80]
2     [120, 5, 50]
Name: orders, dtype: object

In [14]:
type(df['orders'][0])

str

In [15]:
df['x']=pd.eval(df['orders'].str.split(','))
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,x
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]"


In [16]:
sum(df['x'][0])

180

In [17]:
df['c1']=df['orders'].str.split(',').apply(lambda v: list(map(int, v)))
df['c1']

0    [70, 10, 100]
1     [40, 20, 80]
2     [120, 5, 50]
Name: c1, dtype: object

In [18]:
df['c1'][0]

[70, 10, 100]

In [19]:
df['c2']=pd.eval(df['orders2'])
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,x,c1,c2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]"


In [20]:
df['c2'][0]

[70, 10, 100]

In [21]:
df['d1']=df['serve'].str.split(',')
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,x,c1,c2,d1
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot]


In [22]:
df['d2']=pd.eval(df['serve2'])
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,x,c1,c2,d1,d2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot],[hot]


In [23]:
df['d2']=df['serve2'].map(ast.literal_eval)
df

Unnamed: 0,menu,price,price2,orders,orders2,serve,serve2,price_j,x,c1,c2,d1,d2
0,mocha,"{""S"":40, ""M"":50, ""L"":60}","{'S':40, 'M':50, 'L':60}",7010100,"[70,10,100]","hot,cold,frappe","['hot', 'cold', 'frappe']","{'S': 40, 'M': 50, 'L': 60}","[70, 10, 100]","[70, 10, 100]","[70, 10, 100]","[hot, cold, frappe]","[hot, cold, frappe]"
1,latte,"{""S"":45, ""M"":60, ""L"":70}","{'S':45, 'M':60, 'L':70}",402080,"[40,20,80]","hot,cold","['hot', 'cold']","{'S': 45, 'M': 60, 'L': 70}","[40, 20, 80]","[40, 20, 80]","[40, 20, 80]","[hot, cold]","[hot, cold]"
2,espresso,"{""S"":39, ""M"":49, ""L"":59}","{'S':39, 'M':49, 'L':59}",120550,"[120,5,50]",hot,['hot'],"{'S': 39, 'M': 49, 'L': 59}","[120, 5, 50]","[120, 5, 50]","[120, 5, 50]",[hot],[hot]


In [24]:
df['d2'][0]

['hot', 'cold', 'frappe']

In [25]:
df['d2'][0][2]

'frappe'