# Criando dataframes com Pandas

### Importando a biblioteca

In [1]:
import pandas as pd

### A partir de um arquivo

In [2]:
df_csv = pd.read_csv('cereal.csv')
df_csv.head()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
0,100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
1,100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
2,All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
3,All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
4,Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


In [3]:
df_csv.tail()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
72,Triples,G,C,110,2,1,250,0.0,21.0,3,60,25,3,1.0,0.75,39.106174
73,Trix,G,C,110,1,1,140,0.0,13.0,12,25,25,2,1.0,1.0,27.753301
74,Wheat Chex,R,C,100,3,1,230,3.0,17.0,3,115,25,1,1.0,0.67,49.787445
75,Wheaties,G,C,100,3,1,200,3.0,17.0,3,110,25,1,1.0,1.0,51.592193
76,Wheaties Honey Gold,G,C,110,2,1,200,1.0,16.0,8,60,25,1,1.0,0.75,36.187559


### A partir de uma planilha

In [4]:
df_excel = pd.read_excel('cereal.xlsx')
df_excel.head()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
0,100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
1,100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
2,All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
3,All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
4,Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


In [5]:
df_excel.tail()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
72,Triples,G,C,110,2,1,250,0.0,21.0,3,60,25,3,1.0,0.75,39.106174
73,Trix,G,C,110,1,1,140,0.0,13.0,12,25,25,2,1.0,1.0,27.753301
74,Wheat Chex,R,C,100,3,1,230,3.0,17.0,3,115,25,1,1.0,0.67,49.787445
75,Wheaties,G,C,100,3,1,200,3.0,17.0,3,110,25,1,1.0,1.0,51.592193
76,Wheaties Honey Gold,G,C,110,2,1,200,1.0,16.0,8,60,25,1,1.0,0.75,36.187559


### A partir da área de transferência

In [6]:
df_transferencia = pd.read_clipboard()
df_transferencia

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium
0,100% Bran,N,C,70,4,1,130
1,100% Natural Bran,Q,C,120,3,5,15
2,All-Bran,K,C,70,4,1,260
3,All-Bran with Extra Fiber,K,C,50,4,0,140
4,Almond Delight,R,C,110,2,2,200
5,Apple Cinnamon Cheerios,G,C,110,2,2,180
6,Apple Jacks,K,C,110,2,0,125
7,Basic 4,G,C,130,3,2,210
8,Bran Chex,R,C,90,2,1,200
9,Bran Flakes,P,C,90,3,0,210


### A partir de um dicionário

In [7]:
funcionarios = {
    'Nome': ['João Silva', 'Maria Souza', 'José Ramos', 'Pedro Ferreira'],
    'Idade': [35, 40, 54, 39],
    'Tempo de Empresa': [3, 10, 15, 7],
}

In [9]:
df_dicionario = pd.DataFrame(funcionarios)
df_dicionario

Unnamed: 0,Nome,Idade,Tempo de Empresa
0,João Silva,35,3
1,Maria Souza,40,10
2,José Ramos,54,15
3,Pedro Ferreira,39,7


### Combinando linhas de diferentes dataframes

In [13]:
mais_funcionarios = {
    'Nome': ['Paula Lima', 'Marta Nunes'],
    'Idade': [28, 45],
    'Tempo de Empresa': [5, 12],
}

In [14]:
df_mais_funcionarios = pd.DataFrame(mais_funcionarios)
df_mais_funcionarios

Unnamed: 0,Nome,Idade,Tempo de Empresa
0,Paula Lima,28,5
1,Marta Nunes,45,12


In [16]:
df_funcionarios = pd.concat([df_dicionario, df_mais_funcionarios], ignore_index=True)
df_funcionarios

Unnamed: 0,Nome,Idade,Tempo de Empresa
0,João Silva,35,3
1,Maria Souza,40,10
2,José Ramos,54,15
3,Pedro Ferreira,39,7
4,Paula Lima,28,5
5,Marta Nunes,45,12


### Combinando colunas de diferentes dataframes

In [18]:
import numpy as np

setores = ['Vendas', 'RH', 'TI']

dados_funcionarios = {
    'Setor': np.random.choice(setores, 6),
    'Salario': np.random.randint(2000, 10000, 6)
}

df_dados_funcionarios = pd.DataFrame(dados_funcionarios)
df_dados_funcionarios

Unnamed: 0,Setor,Salario
0,RH,3120
1,RH,2145
2,RH,3356
3,TI,3670
4,TI,6941
5,RH,7212


In [19]:
df_funcionarios_completo = pd.concat([df_funcionarios, df_dados_funcionarios], axis='columns')
df_funcionarios_completo

Unnamed: 0,Nome,Idade,Tempo de Empresa,Setor,Salario
0,João Silva,35,3,RH,3120
1,Maria Souza,40,10,RH,2145
2,José Ramos,54,15,RH,3356
3,Pedro Ferreira,39,7,TI,3670
4,Paula Lima,28,5,TI,6941
5,Marta Nunes,45,12,RH,7212


### Juntando dataframes com merge

In [23]:
funcionarios = {
    'id': ['FTI1', 'FVE2', 'FRH1', 'FVE1'],
    'Nome': ['João Silva', 'Maria Souza', 'José Ramos', 'Pedro Ferreira'],
    'Idade': [35, 40, 54, 39],
    'Tempo de Empresa': [3, 10, 15, 7],
}
df_funcionarios = pd.DataFrame(funcionarios)

In [24]:
dados_funcionarios = {
    'id': ['FVE1', 'FRH1', 'FTI1', 'FVE2'],
    'Setor': ['Vendas', 'RH', 'TI', 'Vendas'],
    'Salario': np.random.randint(2000, 10000, size=4)
}
df_dados_funcionarios = pd.DataFrame(dados_funcionarios)

In [25]:
df_funcionarios_completo = pd.merge(df_funcionarios, df_dados_funcionarios, on='id')
df_funcionarios_completo

Unnamed: 0,id,Nome,Idade,Tempo de Empresa,Setor,Salario
0,FTI1,João Silva,35,3,TI,8704
1,FVE2,Maria Souza,40,10,Vendas,4185
2,FRH1,José Ramos,54,15,RH,6625
3,FVE1,Pedro Ferreira,39,7,Vendas,9090
