# DataFrame

In [9]:
import pandas as pd
import numpy as np

In [10]:
d = {"one":pd.Series([1.0,2.0,3.0],index=["a","b","c"]),"two":pd.Series([1.0,2.0,3.0,4.0],index=["a","b","c","d"])}

In [11]:
df = pd.DataFrame(d)

In [12]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [13]:
pd.DataFrame(d, index=["d","b","a"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [14]:
pd.DataFrame(d, index=["d","b","a"],columns=["two","three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [15]:
pd.DataFrame(d, index=["d","b","a"],columns=["one","two","three"])

Unnamed: 0,one,two,three
d,,4.0,
b,2.0,2.0,
a,1.0,1.0,


In [16]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [17]:
df.columns

Index(['one', 'two'], dtype='object')

## From dict of ndarrays / lists

In [18]:
d = {"one":[1.0,2.0,3.0,4.0],"two":[4.0,3.0,2.0,1.0]}

In [19]:
df = pd.DataFrame(d)

In [20]:
df

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [21]:
pd.DataFrame(d, index=["a","b","c","d"])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


## From structured or record array

In [22]:
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])

In [23]:
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [24]:
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]

In [25]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [26]:
df = pd.DataFrame(data)

In [27]:
df

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [28]:
pd.DataFrame(data, index=["first","second"])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [29]:
pd.DataFrame(data, columns=["C", "A", "B"])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


## From a list of dicts

In [30]:
data2 = [{"a":1,"b":2},{"a":5,"b":10,"c":20}]

In [31]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [32]:
pd.DataFrame(data2,index=["first","second"])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [33]:
pd.DataFrame(data2, columns=["a","b"])

Unnamed: 0,a,b
0,1,2
1,5,10


## From a dict of tuples

In [34]:
p = pd.DataFrame({("a", "b"): {("A", "B"): 1, ("A", "C"): 2},("a", "a"): {("A", "C"): 3, ("A", "B"): 4},("a", "c"): {("A", "B"): 5, ("A", "C"): 6},("b", "a"): {("A", "C"): 7, ("A", "B"): 8},("b", "b"): {("A", "D"): 9, ("A", "B"): 10},})

In [35]:
p

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [36]:
p.index

MultiIndex([('A', 'B'),
            ('A', 'C'),
            ('A', 'D')],
           )

In [37]:
p.columns

MultiIndex([('a', 'b'),
            ('a', 'a'),
            ('a', 'c'),
            ('b', 'a'),
            ('b', 'b')],
           )

## From a list of namedtuples

In [38]:
from collections import namedtuple

In [39]:
Point = namedtuple("Point", "x y")

In [40]:
pd.DataFrame([Point(0,0), Point(0,3),(2,3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [41]:
Point3D = namedtuple("Point3D","x y z")

In [42]:
pd.DataFrame([Point3D(0, 0, 0), Point3D(0, 3, 5), (2, 3)])

Unnamed: 0,x,y,z
0,0,0,0.0
1,0,3,5.0
2,2,3,


## From a list of dataclasses

In [43]:
from dataclasses import make_dataclass

In [44]:
Point = make_dataclass("Point", [("x", int),("y", int)])

In [45]:
pd.DataFrame([Point(0,0), Point(0,3), Point(2,3)])

Unnamed: 0,0
0,"Point(x=0, y=0)"
1,"Point(x=0, y=3)"
2,"Point(x=2, y=3)"


## Alternate constructors

### DataFrame.from_dict

字典转dataframe，dataframe其实就是一个table二维表

In [46]:
pd.DataFrame.from_dict(dict([("A", [1,2,3]),("B",[4,5,6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


当orient='index'时，dict中的keys就是dataframe中的行标签

In [47]:
pd.DataFrame.from_dict(dict([("A",[1,2,3]),("B",[4,5,6])]), orient="index", columns=["one","two","three"],)

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


## Column selection, addition, deletion

In [49]:
d = {"one":pd.Series([1.0,2.0,3.0],index=["a","b","c"]),"two":pd.Series([1.0,2.0,3.0,4.0],index=["a","b","c","d"])}

In [50]:
df = pd.DataFrame(d)

In [51]:
df["one"]

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [52]:
df["three"] = df["one"] * df["two"]

In [53]:
df["flag"] = df["one"] > 2

In [54]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [55]:
del df["two"]

In [56]:
three = df.pop("three")

In [57]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [58]:
df["foo"] = "bar"

In [59]:
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [60]:
df["one_trunc"] = df["one"][:2]

In [61]:
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [67]:
df.insert(1, "bars", df["one"])

In [68]:
df

Unnamed: 0,one,bars,bar,flag,foo,one_trunc
a,1.0,1.0,1.0,False,bar,1.0
b,2.0,2.0,2.0,False,bar,2.0
c,3.0,3.0,3.0,True,bar,
d,,,,False,bar,
