## 23. Pandas TypeError: unhashable type: 'list'/'dict'

Topics

* apply value_counts for list/dict column
* value_counts for list column
* identify list/dict columns
* `TypeError: unhashable type: 'dict'`
* `TypeError: unhashable type: 'list'`
* Correct way to expand list column

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

In [2]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [[0.5, 0.1], [0.75, 0.25]],'col3': [{0:'a', 1:'b'}, {0:'c', 1:'d'}]})

In [3]:
df

Unnamed: 0,col1,col2,col3
0,1,"[0.5, 0.1]","{0: 'a', 1: 'b'}"
1,2,"[0.75, 0.25]","{0: 'c', 1: 'd'}"


## 1. TypeError: unhashable type: 'list'/'dict'

In [4]:
# TypeError: unhashable type: 'list'
df.col2.value_counts()

TypeError: unhashable type: 'list'

In [5]:
# TypeError: unhashable type: 'dict'
df.col3.value_counts()

TypeError: unhashable type: 'dict'

In [None]:
df.groupby('col3').transform({'col1': [min], 'col2': max})

## 2. How to detect if column contains list or dict

In [6]:
df.dtypes

col1    int64 
col2    object
col3    object
dtype: object

In [7]:
# detect list columns
df.applymap(lambda x: isinstance(x, list)).all()

col1    False
col2    True 
col3    False
dtype: bool

In [8]:
# detect dict columns
df.applymap(lambda x: isinstance(x, dict)).all()

col1    False
col2    False
col3    True 
dtype: bool

In [9]:
# detect dict or list columns
df.applymap(lambda x: isinstance(x, dict) or isinstance(x, list)).all()

col1    False
col2    True 
col3    True 
dtype: bool

## 3.1 Convert the column to string and apply value_counts

In [10]:
df['col2'].astype('str').value_counts()

[0.75, 0.25]    1
[0.5, 0.1]      1
Name: col2, dtype: int64

In [11]:
df['col3'].astype('str').value_counts()

{0: 'c', 1: 'd'}    1
{0: 'a', 1: 'b'}    1
Name: col3, dtype: int64

## 3.2 Convert the column to string and use group by

In [12]:
# TypeError: unhashable type: 'dict'
df[df.col3.notna()].groupby(['col3']).count()

TypeError: unhashable type: 'dict'

In [13]:
df[df.col2.notna()].astype('str').groupby(['col2']).count()

Unnamed: 0_level_0,col1,col3
col2,Unnamed: 1_level_1,Unnamed: 2_level_1
"[0.5, 0.1]",1,1
"[0.75, 0.25]",1,1


In [14]:
df[df.col3.notna()].astype('str').groupby(['col3']).count()

Unnamed: 0_level_0,col1,col2
col3,Unnamed: 1_level_1,Unnamed: 2_level_1
"{0: 'a', 1: 'b'}",1,1
"{0: 'c', 1: 'd'}",1,1


## 4. Convert list/dict column to tuple

In [15]:
# for list
df['col2'].apply(tuple).value_counts()

(0.5, 0.1)      1
(0.75, 0.25)    1
Name: col2, dtype: int64

In [16]:
# for dict
df['col3'].apply(tuple).value_counts()

(0, 1)    2
Name: col3, dtype: int64

## 5. Expand the list column

In [17]:
df.col2.apply(pd.Series)[0].value_counts()

0.75    1
0.50    1
Name: 0, dtype: int64

In [18]:
df.col2.apply(pd.Series)[1].value_counts()

0.10    1
0.25    1
Name: 1, dtype: int64

## 6. List column mixed: strings and list items

In [19]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [[0.5], 3],'col3': [{0:'a', 1:'b'}, {0:'c', 1:'d'}]})

In [20]:
df

Unnamed: 0,col1,col2,col3
0,1,[0.5],"{0: 'a', 1: 'b'}"
1,2,3,"{0: 'c', 1: 'd'}"


In [21]:
df.applymap(lambda x: x[0] if isinstance(x, list) else x)['col2'].value_counts()

3.0    1
0.5    1
Name: col2, dtype: int64

## Bonus Step #1: Correct way to expand list column

In [22]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [[0.5, 0.1], [0.75, 0.25]],'col3': [{0:'a', 1:'b'}, {0:'c', 1:'d'}]})

In [23]:
df

Unnamed: 0,col1,col2,col3
0,1,"[0.5, 0.1]","{0: 'a', 1: 'b'}"
1,2,"[0.75, 0.25]","{0: 'c', 1: 'd'}"


In [24]:
df.col2.str.split(',', expand=False)

0   NaN
1   NaN
Name: col2, dtype: float64

In [25]:
df.col2.astype('str').str.split(',', expand=True)

Unnamed: 0,0,1
0,[0.5,0.1]
1,[0.75,0.25]


In [26]:
df.col2.apply(pd.Series)

Unnamed: 0,0,1
0,0.5,0.1
1,0.75,0.25


In [27]:
df[['l1', 'l2']] = df.col2.apply(pd.Series)

In [28]:
df.set_index(['l1', 'l2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col2,col3
l1,l2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.5,0.1,1,"[0.5, 0.1]","{0: 'a', 1: 'b'}"
0.75,0.25,2,"[0.75, 0.25]","{0: 'c', 1: 'd'}"
