# Pandas Explode
You may find yourself with a dataset that contains a list of values in a single column. But what you really want is to bring the values from the list into many rows or many columns. So, how do you do that? Using the pandas `.explode()` method and others give you the tools to perform this. For more in depth information about it, you can find it [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.explode.html).  

Let's practice the different ways we can using `explode`.

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create a random array
array = np.random.rand(25, 5).round(decimals=2)
array

array([[0.9 , 0.5 , 0.43, 0.72, 0.33],
       [0.17, 0.75, 0.8 , 0.81, 0.88],
       [0.83, 0.02, 0.97, 0.64, 0.38],
       [0.25, 0.73, 0.6 , 0.24, 0.77],
       [0.64, 0.16, 0.74, 0.43, 0.94],
       [0.51, 0.03, 0.33, 0.5 , 0.13],
       [0.6 , 0.98, 0.87, 0.22, 0.23],
       [0.26, 0.01, 0.84, 0.61, 0.46],
       [0.04, 0.18, 0.18, 0.9 , 0.96],
       [0.22, 0.81, 0.32, 0.6 , 0.83],
       [0.56, 0.09, 0.02, 0.19, 0.28],
       [0.4 , 0.08, 0.53, 0.29, 0.42],
       [0.41, 0.73, 0.55, 0.68, 0.09],
       [0.83, 0.11, 0.4 , 0.48, 0.33],
       [0.1 , 0.59, 0.95, 0.36, 0.75],
       [0.23, 0.37, 0.08, 0.23, 0.99],
       [0.53, 0.61, 0.46, 0.78, 0.16],
       [0.62, 0.35, 0.72, 0.29, 0.99],
       [0.64, 0.04, 0.42, 0.64, 0.05],
       [0.32, 0.51, 0.3 , 0.9 , 0.54],
       [0.2 , 0.19, 0.69, 0.41, 0.12],
       [0.41, 0.01, 0.49, 0.16, 0.88],
       [0.44, 0.03, 0.93, 0.95, 0.01],
       [0.1 , 0.58, 0.47, 0.  , 0.83],
       [0.38, 0.16, 0.62, 0.89, 0.6 ]])

## Explode a list of values into rows

In [3]:
# create dataframe with single column
df = pd.DataFrame({"list_values": array.tolist()})
df.head()

Unnamed: 0,list_values
0,"[0.9, 0.5, 0.43, 0.72, 0.33]"
1,"[0.17, 0.75, 0.8, 0.81, 0.88]"
2,"[0.83, 0.02, 0.97, 0.64, 0.38]"
3,"[0.25, 0.73, 0.6, 0.24, 0.77]"
4,"[0.64, 0.16, 0.74, 0.43, 0.94]"


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   list_values  25 non-null     object
dtypes: object(1)
memory usage: 328.0+ bytes


In [7]:
# each value is a list
type(df.iloc[0][0])

list

In [8]:
# separate list into row with replicating index values
df.explode(column="list_values")

Unnamed: 0,list_values
0,0.9
0,0.5
0,0.43
0,0.72
0,0.33
...,...
24,0.38
24,0.16
24,0.62
24,0.89


## Explode a list of values into columns

In [12]:
# separate list into column by column
#there is three or more functions here *
pd.DataFrame(df.list_values.values.tolist()).head()

Unnamed: 0,0,1,2,3,4
0,0.9,0.5,0.43,0.72,0.33
1,0.17,0.75,0.8,0.81,0.88
2,0.83,0.02,0.97,0.64,0.38
3,0.25,0.73,0.6,0.24,0.77
4,0.64,0.16,0.74,0.43,0.94


In [13]:
pd.DataFrame(df.list_values.values.tolist()).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       25 non-null     float64
 1   1       25 non-null     float64
 2   2       25 non-null     float64
 3   3       25 non-null     float64
 4   4       25 non-null     float64
dtypes: float64(5)
memory usage: 1.1 KB


## Explode a string representation of a list into columns

In [14]:
# now each list is a actually a list of values for each row
df_str = pd.DataFrame({"list_values": [str(a) for a in array.tolist()]})
df_str.head()

Unnamed: 0,list_values
0,"[0.9, 0.5, 0.43, 0.72, 0.33]"
1,"[0.17, 0.75, 0.8, 0.81, 0.88]"
2,"[0.83, 0.02, 0.97, 0.64, 0.38]"
3,"[0.25, 0.73, 0.6, 0.24, 0.77]"
4,"[0.64, 0.16, 0.74, 0.43, 0.94]"


In [16]:
# each value is a str
type(df_str.iloc[0][0])

str

In [17]:
# This does not work as expected
pd.DataFrame(df_str.list_values.values.tolist()).head()

Unnamed: 0,0
0,"[0.9, 0.5, 0.43, 0.72, 0.33]"
1,"[0.17, 0.75, 0.8, 0.81, 0.88]"
2,"[0.83, 0.02, 0.97, 0.64, 0.38]"
3,"[0.25, 0.73, 0.6, 0.24, 0.77]"
4,"[0.64, 0.16, 0.74, 0.43, 0.94]"


In [18]:
# One option is to evaluate each string as a list, then proceed as usual
# 'eval' parses and evaluates the string of a list as a python expression
# turning it back into a list
pd.DataFrame(df_str.list_values.apply(lambda u: eval(u)).values.tolist()).head()

Unnamed: 0,0,1,2,3,4
0,0.9,0.5,0.43,0.72,0.33
1,0.17,0.75,0.8,0.81,0.88
2,0.83,0.02,0.97,0.64,0.38
3,0.25,0.73,0.6,0.24,0.77
4,0.64,0.16,0.74,0.43,0.94
