In [1]:
import numpy as np
import pandas as pd

- Series

In [4]:
ser = pd.Series([112,67,89,90,78],index=['A','B','C','D','E'])

In [5]:
ser

A    112
B     67
C     89
D     90
E     78
dtype: int64

In [6]:
ser.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

- **From dict**

- If data is a dict, if index is passed the values in data corresponding to the labels in the index will be pulled out. Otherwise, an index will be constructed from the sorted keys of the dict, if possible.
 

In [7]:
dict = {'a':1,'b':0,'c':90,'d':55,'e':21}

In [8]:
ser = pd.Series(dict)

In [9]:
ser

a     1
b     0
c    90
d    55
e    21
dtype: int64

In [11]:
pd.Series(dict,index=['d','f','e','a','b']) #NaN (not a number) is the standard missing data marker used in pandas

d    55.0
f     NaN
e    21.0
a     1.0
b     0.0
dtype: float64

In [12]:
 pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [13]:
ser[ser>13]

c    90
d    55
e    21
dtype: int64

In [14]:
ser.values

array([ 1,  0, 90, 55, 21], dtype=int64)

In [15]:
ser.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [16]:
ser[1:]

b     0
c    90
d    55
e    21
dtype: int64

In [17]:
ser['a']

1

In [18]:
ser['e']

21

In [21]:
'e' in ser

True

In [22]:
'f' in ser

False

In [24]:
ser+ser

a      2
b      0
c    180
d    110
e     42
dtype: int64

In [25]:
ser*ser

a       1
b       0
c    8100
d    3025
e     441
dtype: int64

In [26]:
np.exp(ser)

a    2.718282e+00
b    1.000000e+00
c    1.220403e+39
d    7.694785e+23
e    1.318816e+09
dtype: float64

In [31]:
ser1 = pd.Series(np.random.randint(1,50,10),name='Something')

In [32]:
ser1

0     4
1    47
2    43
3    49
4     2
5    26
6     4
7    27
8    36
9    12
Name: Something, dtype: int32

- DataFrame.

In [34]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [36]:
dataframe = pd.DataFrame(d)

In [38]:
dataframe

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [39]:
dataframe.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [40]:
dataframe.columns

Index(['one', 'two'], dtype='object')

In [44]:
dataframe.from_records(dataframe)

Unnamed: 0,one,two
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,,4.0


- DataFrame.from_items

DataFrame.from_items works analogously to the form of the dict constructor that takes a sequence of (key, value) pairs, where the keys are column (or row, in the case of orient='index') names, and the value are the column values (or row values). This can be useful for constructing a DataFrame with the columns in a particular order without having to pass an explicit list of columns:


#### Column selection, addition, deletion

In [52]:
dataframe['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [53]:
dataframe['three']=dataframe['one']*dataframe['two']

In [54]:
dataframe['three']

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [55]:
dataframe['flag']=dataframe['one']>2

In [56]:
dataframe

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [57]:
del dataframe['three']

In [58]:
dataframe.pop('two')

a    1.0
b    2.0
c    3.0
d    4.0
Name: two, dtype: float64

In [59]:
dataframe

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [62]:
dataframe.insert(1,'nine',dataframe['one'])

In [63]:
dataframe

Unnamed: 0,one,nine,flag
a,1.0,1.0,False
b,2.0,2.0,False
c,3.0,3.0,True
d,,,False


In [66]:
import seaborn as sns

In [68]:
data = sns.load_dataset('iris')

In [71]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [75]:
data.assign(Sepal_ratio = lambda x:data['sepal_width']/data['sepal_length']).head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Sepal_ratio
0,5.1,3.5,1.4,0.2,setosa,0.686275
1,4.9,3.0,1.4,0.2,setosa,0.612245
2,4.7,3.2,1.3,0.2,setosa,0.680851
3,4.6,3.1,1.5,0.2,setosa,0.673913
4,5.0,3.6,1.4,0.2,setosa,0.72


In [76]:
data.assign(Petal_Ratio = lambda x:data['petal_width']/data['petal_length']).head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Petal_Ratio
0,5.1,3.5,1.4,0.2,setosa,0.142857
1,4.9,3.0,1.4,0.2,setosa,0.142857
2,4.7,3.2,1.3,0.2,setosa,0.153846
3,4.6,3.1,1.5,0.2,setosa,0.133333
4,5.0,3.6,1.4,0.2,setosa,0.142857


In [83]:
data.query('sepal_length > 5').head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
10,5.4,3.7,1.5,0.2,setosa
14,5.8,4.0,1.2,0.2,setosa
15,5.7,4.4,1.5,0.4,setosa


### Indexing / Selection.

- Select column                    df[col]      
- Select row by label              df.loc[label] 
- Select row by integer location   df.iloc[loc]  
- Slice rows                       df[5:10]  
- Select rows by boolean vector    df[bool_vec]


- Select The Columns

In [84]:
data['sepal_width']

0      3.5
1      3.0
2      3.2
3      3.1
4      3.6
      ... 
145    3.0
146    2.5
147    3.0
148    3.4
149    3.0
Name: sepal_width, Length: 150, dtype: float64

In [87]:
data.loc[data['species']=='setosa'].head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [89]:
data.iloc[2:3]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
2,4.7,3.2,1.3,0.2,setosa


### Data alignment and arithmetic.

In [90]:
df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])

In [91]:
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])

In [92]:
dff = df+df2

In [93]:
dff

Unnamed: 0,A,B,C,D
0,3.735113,0.757158,0.990897,
1,-1.082291,-0.845555,2.609778,
2,-1.465417,1.877078,-0.174657,
3,-0.793792,-1.383604,3.045533,
4,-1.441123,2.03836,0.767816,
5,-1.934203,-0.560798,2.735263,
6,0.926401,-0.242038,-0.438761,
7,,,,
8,,,,
9,,,,


We can do the transpose,*,+np.exp(dff)

In [94]:
data.to_numpy()

array([[5.1, 3.5, 1.4, 0.2, 'setosa'],
       [4.9, 3.0, 1.4, 0.2, 'setosa'],
       [4.7, 3.2, 1.3, 0.2, 'setosa'],
       [4.6, 3.1, 1.5, 0.2, 'setosa'],
       [5.0, 3.6, 1.4, 0.2, 'setosa'],
       [5.4, 3.9, 1.7, 0.4, 'setosa'],
       [4.6, 3.4, 1.4, 0.3, 'setosa'],
       [5.0, 3.4, 1.5, 0.2, 'setosa'],
       [4.4, 2.9, 1.4, 0.2, 'setosa'],
       [4.9, 3.1, 1.5, 0.1, 'setosa'],
       [5.4, 3.7, 1.5, 0.2, 'setosa'],
       [4.8, 3.4, 1.6, 0.2, 'setosa'],
       [4.8, 3.0, 1.4, 0.1, 'setosa'],
       [4.3, 3.0, 1.1, 0.1, 'setosa'],
       [5.8, 4.0, 1.2, 0.2, 'setosa'],
       [5.7, 4.4, 1.5, 0.4, 'setosa'],
       [5.4, 3.9, 1.3, 0.4, 'setosa'],
       [5.1, 3.5, 1.4, 0.3, 'setosa'],
       [5.7, 3.8, 1.7, 0.3, 'setosa'],
       [5.1, 3.8, 1.5, 0.3, 'setosa'],
       [5.4, 3.4, 1.7, 0.2, 'setosa'],
       [5.1, 3.7, 1.5, 0.4, 'setosa'],
       [4.6, 3.6, 1.0, 0.2, 'setosa'],
       [5.1, 3.3, 1.7, 0.5, 'setosa'],
       [4.8, 3.4, 1.9, 0.2, 'setosa'],
       [5.0, 3.0, 1.6, 0.