In [2]:
import numpy as np
import pandas as pd

# Listing 3-35: Creating a Data Frame from a Dict of Series
dict1 = {
    'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
    'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])
}
df = pd.DataFrame(dict1)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [2]:
# set index for the DataFrame
pd.DataFrame(dict1, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [3]:
# Control the labels appearance of the DataFrame
pd.DataFrame(dict1, index=['d', 'b', 'a'], columns=['two', 'three', 'one'])

Unnamed: 0,two,three,one
d,4.0,,
b,2.0,,2.0
a,1.0,,1.0


In [5]:
# Listing 3-36: Creating a DataFrame from an Ndarray
#without index
ndarrdict = {
    'one': [1., 2., 3., 4.],
    'two': [4., 3., 2., 1.]
}
pd.DataFrame(ndarrdict)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [6]:
# Assign index
pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [7]:
data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [8]:
pd.DataFrame(data, index=['First', 'Second'])

Unnamed: 0,A,B,C
First,1,2.0,b'Hello'
Second,2,3.0,b'World'


In [9]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


### Creating Data Frames from a List of Dicts

In [10]:
# Listing 3-38: Creating a Data Frame from a List of Dictionaries
data2 = [
    {'A ': 1, 'B ': 2},
    {'A': 5, 'B': 10, 'C': 20}]
pd.DataFrame(data2)

Unnamed: 0,A,B,A.1,B.1,C
0,1.0,2.0,,,
1,,,5.0,10.0,20.0


In [11]:
pd.DataFrame(data2, index=['First', 'Second'])

Unnamed: 0,A,B,A.1,B.1,C
First,1.0,2.0,,,
Second,,,5.0,10.0,20.0


In [12]:
pd.DataFrame(data2, columns=['A', 'B'])

Unnamed: 0,A,B
0,,
1,5.0,10.0


### Creating Data Frames from a Dict of Tuples

In [14]:
# Listing 3-39: Creating a Data Frame from a Dictionary of Tuples
pd.DataFrame({
    ('a', 'b'): {
        ('A', 'B'): 1, 
        ('A', 'C'): 2
    },
    ('a', 'a'): {
        ('A', 'C'): 3,
        ('A', 'B'): 4
    },
    ('a', 'c'): {
        ('A', 'B'): 5,
        ('A', 'C'): 6
    },
    ('b', 'a'): {
        ('A', 'C'): 7,
        ('A', 'B'): 8
    },
    ('b', 'b'): {
        ('A', 'D'): 9,
        ('A', 'B'): 10
    }
})

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [15]:
# DataFrame Column Selection, Addition, Deletion
ndarrdict = {
    'one': [1., 2., 3., 4.],
    'two': [4., 3., 2., 1.]
}
df = pd.DataFrame(ndarrdict, index=['a', 'b', 'c', 'd'])
df

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [16]:
df['three'] = df['one'] * df['two']     # Add column
df['flag'] = df['one'] > 2              # Add column
df

Unnamed: 0,one,two,three,flag
a,1.0,4.0,4.0,False
b,2.0,3.0,6.0,False
c,3.0,2.0,6.0,True
d,4.0,1.0,4.0,True


In [17]:
df['Filler'] = 'HCT'
df['Slic'] = df['one'][:2]
df

Unnamed: 0,one,two,three,flag,Filler,Slic
a,1.0,4.0,4.0,False,HCT,1.0
b,2.0,3.0,6.0,False,HCT,2.0
c,3.0,2.0,6.0,True,HCT,
d,4.0,1.0,4.0,True,HCT,


In [18]:
# Delete columns
del df['two']
Three = df.pop('three')
df

Unnamed: 0,one,flag,Filler,Slic
a,1.0,False,HCT,1.0
b,2.0,False,HCT,2.0
c,3.0,True,HCT,
d,4.0,True,HCT,


In [19]:
df.insert(1, 'bar', df['one'])
df

Unnamed: 0,one,bar,flag,Filler,Slic
a,1.0,1.0,False,HCT,1.0
b,2.0,2.0,False,HCT,2.0
c,3.0,3.0,True,HCT,
d,4.0,4.0,True,HCT,


In [20]:
df = pd.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6]
})
df = df.assign(C=lambda x: x['A'] + x['B'])
df = df.assign(D=lambda x: x['A'] + x['C'])
df

Unnamed: 0,A,B,C,D
0,1,4,5,6
1,2,5,7,9
2,3,6,9,12


In [21]:
df = df.assign(A = lambda x: x['A'] * 2)
df

Unnamed: 0,A,B,C,D
0,2,4,5,6
1,4,5,7,9
2,6,6,9,12


In [22]:
df['B']

0    4
1    5
2    6
Name: B, dtype: int64

In [23]:
df.iloc[2]

A     6
B     6
C     9
D    12
Name: 2, dtype: int64

In [24]:
df[1:]

Unnamed: 0,A,B,C,D
1,4,5,7,9
2,6,6,9,12


In [25]:
df[df['C'] > 7]

Unnamed: 0,A,B,C,D
2,6,6,9,12


In [3]:
# Listing 3-44: Operations on Data Frames
df1 = pd.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6]
})
df2 = pd.DataFrame({
    "A": [7, 4, 6],
    "B": [10, 4, 15]
})
print(df1)
print()
print(df2)

A  B
0  1  4
1  2  5
2  3  6

   A   B
0  7  10
1  4   4
2  6  15


In [28]:
df1 + df2

Unnamed: 0,A,B
0,8,14
1,6,9
2,9,21


In [29]:
df1 - df2

Unnamed: 0,A,B
0,-6,-6
1,-2,1
2,-3,-9


In [30]:
df2 - df1.iloc[2]

Unnamed: 0,A,B
0,4,4
1,1,-2
2,3,9


In [31]:
df2

Unnamed: 0,A,B
0,7,10
1,4,4
2,6,15


In [32]:
df2 * 2 + 1

Unnamed: 0,A,B
0,15,21
1,9,9
2,13,31


In [4]:
df2

Unnamed: 0,A,B
0,7,10
1,4,4
2,6,15


In [5]:
df2[:].T

Unnamed: 0,0,1,2
A,7,4,6
B,10,4,15


In [6]:
df1

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [7]:
df1.T.dot(df1)

Unnamed: 0,A,B
A,14,32
B,32,77


In [8]:
# Listing 3-47: Creating a Panel from a 3D Ndarray
P1 = pd.Panel(np.random.randn(2, 5, 4), 
    items=['Item1', 'Item2'],
    major_axis=pd.date_range('10/05/2018', periods=5), 
    minor_axis=['A', 'B', 'C', 'D'])
P1

TypeError: Panel() takes no arguments