In [9]:
# importar las librerías

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib as mpl
pd.set_option('display.notebook_repr_html', False)

In [10]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/iris.csv",
    sep = ',',         # separador de campos
    thousands = None,  # separador de miles para números
    decimal = '.')     # separador de los decimales para números

df.head()

   Sepal_Length  Sepal_Width  Petal_Length  Petal_Width Species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

## Melt & Pivot

In [11]:
##
## agrega una clave para identificar cada caso
##
df["id"] = list(range(150))
df.head()

   Sepal_Length  Sepal_Width  Petal_Length  Petal_Width Species  id
0           5.1          3.5           1.4          0.2  setosa   0
1           4.9          3.0           1.4          0.2  setosa   1
2           4.7          3.2           1.3          0.2  setosa   2
3           4.6          3.1           1.5          0.2  setosa   3
4           5.0          3.6           1.4          0.2  setosa   4

In [13]:
df_melt = pd.melt(
    df,
    id_vars = "id",   #columnas que no se apilan
    var_name = "Variables", #nombre de la columna que contiene las columnas apiladas
    value_name = "Values") #nombre de la columna que contiene los valores)
df_melt.head()

   id     Variables Values
0   0  Sepal_Length    5.1
1   1  Sepal_Length    4.9
2   2  Sepal_Length    4.7
3   3  Sepal_Length    4.6
4   4  Sepal_Length      5

In [14]:
df_melt.tail()

      id Variables     Values
745  145   Species  virginica
746  146   Species  virginica
747  147   Species  virginica
748  148   Species  virginica
749  149   Species  virginica

In [15]:
df_melt.pivot(
    index = "id",
    columns = "Variables",
    values = "Values").head(10)

Variables Petal_Length Petal_Width Sepal_Length Sepal_Width Species
id                                                                 
0                  1.4         0.2          5.1         3.5  setosa
1                  1.4         0.2          4.9           3  setosa
2                  1.3         0.2          4.7         3.2  setosa
3                  1.5         0.2          4.6         3.1  setosa
4                  1.4         0.2            5         3.6  setosa
5                  1.7         0.4          5.4         3.9  setosa
6                  1.4         0.3          4.6         3.4  setosa
7                  1.5         0.2            5         3.4  setosa
8                  1.4         0.2          4.4         2.9  setosa
9                  1.5         0.1          4.9         3.1  setosa

In [None]:
%%R -i df
library(dplyr)
library(reshape2)

df_melt <- melt(df,
                id = 'id',
                measured = c('Sepal_Length', 'Sepal_Width', 'Petal_Length',
                             'Petal_Width', 'Species'))
df_melt %>% head(10)

In [None]:
%%R
df_melt %>% dcast(id ~ variable, value.var = "value") %>% head(10)

In [None]:
%%R
library(tidyr)

df_melt <- df %>% gather(key, value, -id)
df_melt %>% head(10)

In [None]:
%%R
df_melt %>% tail(10)

In [None]:
%%R
df_melt %>% spread(key, value)  %>% head(10)

## Stack & Unstack

In [17]:
df

     Sepal_Length  Sepal_Width  Petal_Length  Petal_Width    Species   id
0             5.1          3.5           1.4          0.2     setosa    0
1             4.9          3.0           1.4          0.2     setosa    1
2             4.7          3.2           1.3          0.2     setosa    2
3             4.6          3.1           1.5          0.2     setosa    3
4             5.0          3.6           1.4          0.2     setosa    4
..            ...          ...           ...          ...        ...  ...
145           6.7          3.0           5.2          2.3  virginica  145
146           6.3          2.5           5.0          1.9  virginica  146
147           6.5          3.0           5.2          2.0  virginica  147
148           6.2          3.4           5.4          2.3  virginica  148
149           5.9          3.0           5.1          1.8  virginica  149

[150 rows x 6 columns]

In [16]:
df.stack().head(24)

0  Sepal_Length       5.1
   Sepal_Width        3.5
   Petal_Length       1.4
   Petal_Width        0.2
   Species         setosa
   id                   0
1  Sepal_Length       4.9
   Sepal_Width          3
   Petal_Length       1.4
   Petal_Width        0.2
   Species         setosa
   id                   1
2  Sepal_Length       4.7
   Sepal_Width        3.2
   Petal_Length       1.3
   Petal_Width        0.2
   Species         setosa
   id                   2
3  Sepal_Length       4.6
   Sepal_Width        3.1
   Petal_Length       1.5
   Petal_Width        0.2
   Species         setosa
   id                   3
dtype: object

In [18]:
df.stack().unstack().head(4)

  Sepal_Length Sepal_Width Petal_Length Petal_Width Species id
0          5.1         3.5          1.4         0.2  setosa  0
1          4.9           3          1.4         0.2  setosa  1
2          4.7         3.2          1.3         0.2  setosa  2
3          4.6         3.1          1.5         0.2  setosa  3

# Tablas dinámicas

In [20]:
df = pd.DataFrame({
    "key1" : ["a", "a", "b", "b", "c", "c"],
    "key2" : ["A", "B", "A", "B", "A", "B"],
    "values1" : [1, 2, 3, 4, 5, 6],
    "values2": [7, 8, 9, 10, 11, 12]
})
df

  key1 key2  values1  values2
0    a    A        1        7
1    a    B        2        8
2    b    A        3        9
3    b    B        4       10
4    c    A        5       11
5    c    B        6       12

In [21]:
pd.pivot_table(
    df,
    index = ["key1", "key2"],
    values = ["values1", "values2"])

           values1  values2
key1 key2                  
a    A           1        7
     B           2        8
b    A           3        9
     B           4       10
c    A           5       11
     B           6       12

In [22]:
pd.pivot_table(
    df,
    index = ["key2", "key1"],
    values = ["values1", "values2"])

           values1  values2
key2 key1                  
A    a           1        7
     b           3        9
     c           5       11
B    a           2        8
     b           4       10
     c           6       12

## Paneles de DataFrames

In [23]:
## se crean los DataFrames
df1 = pd.DataFrame({
    "colA": [1, 2],
    "colB": [3, 4]
})

df2 = pd.DataFrame({
    "colB": [5, 6],
    "colC": [7, 8]
})

df3 = pd.DataFrame({
    "colC": [9, 0],
    "colD": [1, 2]
})

In [24]:
## creación del panel como un diccionario
pdPanel = {"df1": df1,
          "df2": df2,
          "df3": df3}
print(pdPanel)

{'df1':    colA  colB
0     1     3
1     2     4, 'df2':    colB  colC
0     5     7
1     6     8, 'df3':    colC  colD
0     9     1
1     0     2}
