In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

<center><B> Data Frame by passing a dict of objects that can be converted to series like</B></center>

In [4]:
s = pd.Series(np.random.randn(5),index=["a","b","c","d","e"])

In [5]:
s

a   -0.910079
b    0.361288
c    1.628610
d    0.580028
e    1.178683
dtype: float64

In [6]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [7]:
pd.Series(np.random.randn(5))

0   -0.339341
1   -0.618730
2   -0.132320
3   -0.682298
4    0.747845
dtype: float64

In [8]:
d={"b":1,"a":0,"c":2}

In [9]:
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [10]:
d={"a":0.0,"b":1.0,"c":2.0}

In [11]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [12]:
pd.Series(d,index=["b","c","d","a"])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [13]:
pd.Series(5.0,index=["a","b","c","d","e"])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [14]:
s[0]

-0.9100794924478018

In [15]:
s[:3]

a   -0.910079
b    0.361288
c    1.628610
dtype: float64

In [16]:
s[s>s.median()]

c    1.628610
e    1.178683
dtype: float64

In [17]:
s[[4,3,1]]

e    1.178683
d    0.580028
b    0.361288
dtype: float64

In [18]:
np.exp(s)

a    0.402492
b    1.435177
c    5.096784
d    1.786088
e    3.250092
dtype: float64

In [19]:
s.dtype

dtype('float64')

In [20]:
s.array

<PandasArray>
[-0.9100794924478018,  0.3612883616827068,   1.628609814881112,
  0.5800279434362801,   1.178683205283506]
Length: 5, dtype: float64

In [21]:
s.to_numpy()

array([-0.91007949,  0.36128836,  1.62860981,  0.58002794,  1.17868321])

In [22]:
s

a   -0.910079
b    0.361288
c    1.628610
d    0.580028
e    1.178683
dtype: float64

### Series is like dict

In [23]:
s["a"]

-0.9100794924478018

In [24]:
s['e']

1.178683205283506

In [25]:
s

a   -0.910079
b    0.361288
c    1.628610
d    0.580028
e    1.178683
dtype: float64

In [26]:
"e" in s

True

In [27]:
'f' in s

False

In [29]:
s.get("f")

In [30]:
s.get("f",np.nan)

nan

### Vectorize operation and label alignment with Series

In [31]:
s+s

a   -1.820159
b    0.722577
c    3.257220
d    1.160056
e    2.357366
dtype: float64

In [32]:
s*3

a   -2.730238
b    1.083865
c    4.885829
d    1.740084
e    3.536050
dtype: float64

In [33]:
np.exp(s)

a    0.402492
b    1.435177
c    5.096784
d    1.786088
e    3.250092
dtype: float64

In [34]:
s =pd.Series(np.random.randn(5),name="something")

In [35]:
s

0   -0.709873
1   -0.732917
2   -1.226934
3    0.840858
4    1.104654
Name: something, dtype: float64

In [36]:
s.name

'something'

In [37]:
s2 = s.rename("different")

In [38]:
s2.name

'different'

## Data Frame
#### From dict of Series or dicts

In [39]:
d = { "one": pd.Series([1.0,2.0,3.0],index=["a","b","c"]),"two": pd.Series([1.0,2.0,3.0,4.0],index=["a","b","c","d"]),
                      }

In [40]:
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64,
 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [41]:
df = pd.DataFrame(d)

In [42]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [43]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [44]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [45]:
df.columns

Index(['one', 'two'], dtype='object')

#### From dict to ndarrays / lists

In [46]:
d = {"one": [ 1.0,2.0,3.0,4.0], "two":[4.0,3.0,2.0,1.0]}

In [47]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [48]:
pd.DataFrame(d,index=["a","b","c","d"])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


### From Structured to record array

In [49]:
data = np.zeros((2,),dtype=[("A","i4"),("B","f4"),("C","a10")])

In [50]:
data


array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [51]:
data[:]= [(1,2.0,"Hello"),(2,3.0,"World")]

In [52]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [53]:
pd.DataFrame(data,index=["first","second"])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [54]:
pd.DataFrame(data,columns=['C','A','B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


### From list of dicts

In [55]:
data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]

In [56]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [57]:
pd.DataFrame(data2, index=["first", "second"])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [58]:
pd.DataFrame(data2, columns=["a", "b"])

Unnamed: 0,a,b
0,1,2
1,5,10


### From listed namedtuples

In [65]:
from collections import namedtuple

In [66]:
Point = namedtuple("Point","x y")

In [67]:
pd.DataFrame([Point(0,0),Point(0,3),Point(2,3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [68]:
Point3D=namedtuple("Point3D","x y z")

In [69]:
pd.DataFrame([Point3D(0,0,0),Point3D(0,3,5),Point(2,3)])

Unnamed: 0,x,y,z
0,0,0,0.0
1,0,3,5.0
2,2,3,


### From a list of dataclasses

In [70]:
from dataclasses import make_dataclass

In [71]:
Point = make_dataclass("Point",[("x",int),("y",int)])

In [72]:
pd.DataFrame([Point(0,0),Point(0,3),Point(2,3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [73]:
pd.DataFrame.from_dict(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [74]:
pd.DataFrame.from_dict(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


### Column Selection 

In [76]:
df["one"]

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [78]:
df["three"] = df["one"] * df["two"]

In [79]:
df["three"]

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [80]:
df["flag"] = df["one"] > 2

In [81]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [82]:
del df["two"]

In [83]:
df

Unnamed: 0,one,three,flag
a,1.0,1.0,False
b,2.0,4.0,False
c,3.0,9.0,True
d,,,False


In [84]:
three = df.pop("three")

In [85]:
three


a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [86]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [87]:
df["foo"] = "bar"

In [88]:
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [89]:
df["one_trunc"] = df["one"][:2]

In [90]:
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


##### Ther insert function is available to insert at a particular location in the columns

In [92]:
df.insert(1,"bar",df["one"])

In [93]:
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,
