# Pandas Explode
You may find yourself with a dataset that contains a list of values in a single column. But what you really want is to bring the values from the list into many rows or many columns. So, how do you do that? Using the pandas `.explode()` method and others give you the tools to perform this. For more in depth information about it, you can find it [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.explode.html).  

Let's practice the different ways we can using `explode`.

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create a random array
array = np.random.rand(25, 5).round(decimals=2)
array

array([[0.41, 0.76, 0.46, 0.12, 0.47],
       [0.86, 0.55, 0.85, 0.09, 0.97],
       [0.53, 0.48, 0.75, 0.14, 0.85],
       [0.92, 0.18, 0.14, 0.58, 0.69],
       [0.68, 0.32, 0.83, 0.31, 0.56],
       [0.04, 0.06, 0.11, 0.56, 0.62],
       [0.7 , 0.66, 0.52, 0.74, 0.05],
       [0.6 , 0.22, 0.11, 0.95, 0.68],
       [0.25, 0.88, 0.78, 0.42, 0.77],
       [0.57, 0.17, 0.31, 0.02, 0.67],
       [0.72, 0.16, 0.2 , 0.88, 0.68],
       [0.66, 0.76, 0.8 , 0.02, 0.48],
       [0.04, 0.63, 0.75, 0.91, 0.06],
       [0.44, 0.24, 0.33, 0.9 , 0.3 ],
       [0.67, 0.6 , 0.54, 0.92, 0.75],
       [0.26, 0.43, 0.65, 0.67, 0.78],
       [0.28, 0.31, 0.5 , 0.17, 0.94],
       [0.69, 0.47, 0.37, 0.2 , 0.66],
       [0.82, 0.73, 0.32, 0.82, 0.08],
       [0.87, 0.1 , 0.29, 0.93, 0.66],
       [0.35, 0.03, 0.23, 0.6 , 0.55],
       [0.24, 0.66, 0.79, 0.52, 0.  ],
       [0.98, 0.59, 0.78, 0.02, 0.6 ],
       [0.21, 0.88, 0.44, 0.66, 0.13],
       [0.18, 0.65, 0.99, 0.38, 0.24]])

## Explode a list of values into rows

In [3]:
# create dataframe with single column
df = pd.DataFrame({"list_values": array.tolist()})
df.head()

Unnamed: 0,list_values
0,"[0.41, 0.76, 0.46, 0.12, 0.47]"
1,"[0.86, 0.55, 0.85, 0.09, 0.97]"
2,"[0.53, 0.48, 0.75, 0.14, 0.85]"
3,"[0.92, 0.18, 0.14, 0.58, 0.69]"
4,"[0.68, 0.32, 0.83, 0.31, 0.56]"


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   list_values  25 non-null     object
dtypes: object(1)
memory usage: 328.0+ bytes


In [5]:
# each value is a list
type(df.iloc[0][0])

list

In [6]:
# separate list into row with replicating index values
df.explode(column="list_values")

Unnamed: 0,list_values
0,0.41
0,0.76
0,0.46
0,0.12
0,0.47
...,...
24,0.18
24,0.65
24,0.99
24,0.38


## Explode a list of values into columns

In [7]:
# separate list into column by column
pd.DataFrame(df.list_values.values.tolist()).head()

Unnamed: 0,0,1,2,3,4
0,0.41,0.76,0.46,0.12,0.47
1,0.86,0.55,0.85,0.09,0.97
2,0.53,0.48,0.75,0.14,0.85
3,0.92,0.18,0.14,0.58,0.69
4,0.68,0.32,0.83,0.31,0.56


## Explode a string representation of a list into columns

In [8]:
# now each list is a actually a list of values for each row
df_str = pd.DataFrame({"list_values": [str(a) for a in array.tolist()]})
df_str.head()

Unnamed: 0,list_values
0,"[0.41, 0.76, 0.46, 0.12, 0.47]"
1,"[0.86, 0.55, 0.85, 0.09, 0.97]"
2,"[0.53, 0.48, 0.75, 0.14, 0.85]"
3,"[0.92, 0.18, 0.14, 0.58, 0.69]"
4,"[0.68, 0.32, 0.83, 0.31, 0.56]"


In [9]:
# each value is a str
type(df_str.iloc[0][0])

str

In [10]:
# This does not work as expected
pd.DataFrame(df_str.list_values.values.tolist()).head()

Unnamed: 0,0
0,"[0.41, 0.76, 0.46, 0.12, 0.47]"
1,"[0.86, 0.55, 0.85, 0.09, 0.97]"
2,"[0.53, 0.48, 0.75, 0.14, 0.85]"
3,"[0.92, 0.18, 0.14, 0.58, 0.69]"
4,"[0.68, 0.32, 0.83, 0.31, 0.56]"


In [11]:
# One option is to evaluate each string as a list, then proceed as usual
# 'eval' parses and evaluates the string of a list as a python expression
# turning it back into a list
pd.DataFrame(df_str.list_values.apply(lambda u: eval(u)).values.tolist()).head()

Unnamed: 0,0,1,2,3,4
0,0.41,0.76,0.46,0.12,0.47
1,0.86,0.55,0.85,0.09,0.97
2,0.53,0.48,0.75,0.14,0.85
3,0.92,0.18,0.14,0.58,0.69
4,0.68,0.32,0.83,0.31,0.56
