In [1]:
import pandas as pd

In [2]:
data = {
    'Coffee_Shop': ['Brewed Awakening', 'Coffee Cloud', 'Bean Dream', 'Espresso Express', 'Latte Love',
    'Mocha Magic', 'Cafe Comfort', 'Seattle Sip', 'Drip Drop', 'Grind Ground'],
    'Location': ['Downtown', 'Capitol Hill', 'Green Lake', 'Ballard', 'West Seattle',
    'Fremont', 'Queen Anne', 'Belltown', 'University District', 'Magnolia'],
    'Avg_Rating': [4.5, 4.2, 5.0, 4.8, 4.6, 4.3, 4.9, 4.0, 4.7, 4.6],
    'Coffee_Type': ['Espresso', 'Latte', 'Cappuccino', 'Americano', 'Cold Brew',
    'Macchiato', 'Espresso', 'Latte', 'Drip Coffee', 'Americano'],
    'Tables_Available': [5, 8, 6, 7, 5, 9, 7, 8, 6, 5]
}

In [3]:
coffee_shops_df = pd.DataFrame(data)

In [4]:
coffee_shops_df

Unnamed: 0,Coffee_Shop,Location,Avg_Rating,Coffee_Type,Tables_Available
0,Brewed Awakening,Downtown,4.5,Espresso,5
1,Coffee Cloud,Capitol Hill,4.2,Latte,8
2,Bean Dream,Green Lake,5.0,Cappuccino,6
3,Espresso Express,Ballard,4.8,Americano,7
4,Latte Love,West Seattle,4.6,Cold Brew,5
5,Mocha Magic,Fremont,4.3,Macchiato,9
6,Cafe Comfort,Queen Anne,4.9,Espresso,7
7,Seattle Sip,Belltown,4.0,Latte,8
8,Drip Drop,University District,4.7,Drip Coffee,6
9,Grind Ground,Magnolia,4.6,Americano,5


#### If you had a column in coffee_shops_df that contained lists of coffee varieties offered, how would you use explode to create a separate row for each coffee type?

In [5]:
coffee_varieties = [["Arabica", "Robusta"], ["Typica", "Bourbon", "Geisha"], "Hawaiian", "Sumatra"]

In [6]:
temp = coffee_shops_df.loc[0:3].copy()

In [7]:
temp

Unnamed: 0,Coffee_Shop,Location,Avg_Rating,Coffee_Type,Tables_Available
0,Brewed Awakening,Downtown,4.5,Espresso,5
1,Coffee Cloud,Capitol Hill,4.2,Latte,8
2,Bean Dream,Green Lake,5.0,Cappuccino,6
3,Espresso Express,Ballard,4.8,Americano,7


In [8]:
temp['Coffee_Varieties'] = coffee_varieties

In [9]:
temp

Unnamed: 0,Coffee_Shop,Location,Avg_Rating,Coffee_Type,Tables_Available,Coffee_Varieties
0,Brewed Awakening,Downtown,4.5,Espresso,5,"[Arabica, Robusta]"
1,Coffee Cloud,Capitol Hill,4.2,Latte,8,"[Typica, Bourbon, Geisha]"
2,Bean Dream,Green Lake,5.0,Cappuccino,6,Hawaiian
3,Espresso Express,Ballard,4.8,Americano,7,Sumatra


In [10]:
temp.explode('Coffee_Varieties')

Unnamed: 0,Coffee_Shop,Location,Avg_Rating,Coffee_Type,Tables_Available,Coffee_Varieties
0,Brewed Awakening,Downtown,4.5,Espresso,5,Arabica
0,Brewed Awakening,Downtown,4.5,Espresso,5,Robusta
1,Coffee Cloud,Capitol Hill,4.2,Latte,8,Typica
1,Coffee Cloud,Capitol Hill,4.2,Latte,8,Bourbon
1,Coffee Cloud,Capitol Hill,4.2,Latte,8,Geisha
2,Bean Dream,Green Lake,5.0,Cappuccino,6,Hawaiian
3,Espresso Express,Ballard,4.8,Americano,7,Sumatra


#### How does the index behave when you use the explode method on a DataFrame?

In [11]:
# index will be duplicated for each of the expanded item by default
# if ignore_index=True, the new proper index will be re-created

In [12]:
temp.explode('Coffee_Varieties', ignore_index=True)

Unnamed: 0,Coffee_Shop,Location,Avg_Rating,Coffee_Type,Tables_Available,Coffee_Varieties
0,Brewed Awakening,Downtown,4.5,Espresso,5,Arabica
1,Brewed Awakening,Downtown,4.5,Espresso,5,Robusta
2,Coffee Cloud,Capitol Hill,4.2,Latte,8,Typica
3,Coffee Cloud,Capitol Hill,4.2,Latte,8,Bourbon
4,Coffee Cloud,Capitol Hill,4.2,Latte,8,Geisha
5,Bean Dream,Green Lake,5.0,Cappuccino,6,Hawaiian
6,Espresso Express,Ballard,4.8,Americano,7,Sumatra


#### Can you use the explode method on columns with non-list data types?

In [13]:
# we can, but there will be no difference as the same row will be available in resulting DF

#### How would you handle columns with mixed data, some rows having lists and others having single values, when using explode?

In [14]:
# Only the list like values will be exploded and other column corresponding rows will be retained as normal

In [15]:
data = {'A' : [[1,2,3], "foo", 1, 3],
        'B' : ['bar', 'text', 'str', 'night']}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,"[1, 2, 3]",bar
1,foo,text
2,1,str
3,3,night


In [16]:
df.explode('A')

Unnamed: 0,A,B
0,1,bar
0,2,bar
0,3,bar
1,foo,text
2,1,str
3,3,night


#### What happens to the other columns in the DataFrame when you explode a specific column?

In [17]:
# for every time in the list, the column value will be repeated for each row.

#### How does the explode method compare to the melt method?

In [18]:
# Melt will create rows for each column value that is given in value_vars argument.
# basically the DF will go from longer with may rows as new row is created for each value of the column
# column value no need to have to be list value.

# explode will expand the rows for each listed column.

In [19]:
data = {'A' : ['a', 'b', 'c'],
        'B' : [1 ,3, 5],
        'C' : [2, 4, 6]}
df= pd.DataFrame(data)
df

Unnamed: 0,A,B,C
0,a,1,2
1,b,3,4
2,c,5,6


In [20]:
df.melt(id_vars=['A'], value_vars=['B', 'C'])

Unnamed: 0,A,variable,value
0,a,B,1
1,b,B,3
2,c,B,5
3,a,C,2
4,b,C,4
5,c,C,6


In [26]:
df.explode('A')

Unnamed: 0,A,B,C
0,a,1,2
1,b,3,4
2,c,5,6


#### In what scenarios might you prefer to use explode over manually splitting and stacking data?

In [None]:
# for Readability and simple list like data in column, explode can be used
# for customized expand and complex type we can use stacking.