## Pandas Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
labels = ['a','b','c','d']
myList = [4,6,5,7]
myDict = {1:'a',2:'b',3:'c'}

In [3]:
pd.Series(data = myList)

0    4
1    6
2    5
3    7
dtype: int64

In [4]:
pd.Series(data = myList, index= labels)

a    4
b    6
c    5
d    7
dtype: int64

In [5]:
ser = pd.Series(data= myDict)
ser

1    a
2    b
3    c
dtype: object

In [6]:
ser[2]

'b'

## Pandas dataframe

In [7]:
pd.DataFrame(data = np.random.randn(4,5))

Unnamed: 0,0,1,2,3,4
0,0.846022,-0.037016,0.52404,-0.309383,-1.742221
1,1.659816,-0.282499,0.749066,1.012935,-0.9229
2,0.315756,2.171569,-0.053831,-0.820986,-2.332707
3,0.422531,-0.776527,0.303038,-1.771827,0.527904


In [8]:
df = pd.DataFrame(data = np.random.randn(4,5), index= ['a','b','c','d'], columns= ['w','x','y','z','s'])

In [9]:
df

Unnamed: 0,w,x,y,z,s
a,0.774186,1.359844,-0.072398,0.605315,0.181029
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049
c,1.379808,-0.490831,-0.116647,0.127206,0.359928
d,0.010999,0.202676,0.717651,-0.218131,0.01705


In [10]:
df['w']

a    0.774186
b    1.755194
c    1.379808
d    0.010999
Name: w, dtype: float64

In [11]:
df[['w','s']]

Unnamed: 0,w,s
a,0.774186,0.181029
b,1.755194,-0.044049
c,1.379808,0.359928
d,0.010999,0.01705


In [12]:
df['new_col'] = df['w'] + df['s']

In [13]:
df

Unnamed: 0,w,x,y,z,s,new_col
a,0.774186,1.359844,-0.072398,0.605315,0.181029,0.955215
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049,1.711145
c,1.379808,-0.490831,-0.116647,0.127206,0.359928,1.739735
d,0.010999,0.202676,0.717651,-0.218131,0.01705,0.02805


In [20]:
df['new_col'] = [1,2,3,4]

In [21]:
df

Unnamed: 0,w,x,y,z,s,new_col
a,0.774186,1.359844,-0.072398,0.605315,0.181029,1
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049,2
c,1.379808,-0.490831,-0.116647,0.127206,0.359928,3
d,0.010999,0.202676,0.717651,-0.218131,0.01705,4


In [22]:
df.shape

(4, 6)

In [25]:
df.drop('new_col', axis=1, inplace = True)

In [26]:
df

Unnamed: 0,w,x,y,z,s
a,0.774186,1.359844,-0.072398,0.605315,0.181029
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049
c,1.379808,-0.490831,-0.116647,0.127206,0.359928
d,0.010999,0.202676,0.717651,-0.218131,0.01705


In [27]:
df.loc['a']

w    0.774186
x    1.359844
y   -0.072398
z    0.605315
s    0.181029
Name: a, dtype: float64

In [28]:
df.iloc[0]

w    0.774186
x    1.359844
y   -0.072398
z    0.605315
s    0.181029
Name: a, dtype: float64

In [29]:
df.loc[['a','b'],['w','z']]

Unnamed: 0,w,z
a,0.774186,0.605315
b,1.755194,-0.469223


In [30]:
df['w']>0

a    True
b    True
c    True
d    True
Name: w, dtype: bool

In [33]:
df[df['w']>0]

Unnamed: 0,w,x,y,z,s
a,0.774186,1.359844,-0.072398,0.605315,0.181029
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049
c,1.379808,-0.490831,-0.116647,0.127206,0.359928
d,0.010999,0.202676,0.717651,-0.218131,0.01705


In [34]:
df[df['w']>0]['x']

a    1.359844
b   -1.428184
c   -0.490831
d    0.202676
Name: x, dtype: float64

In [35]:
df['new'] = ['A','B','C','D']

In [36]:
df

Unnamed: 0,w,x,y,z,s,new
a,0.774186,1.359844,-0.072398,0.605315,0.181029,A
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049,B
c,1.379808,-0.490831,-0.116647,0.127206,0.359928,C
d,0.010999,0.202676,0.717651,-0.218131,0.01705,D


In [37]:
df.set_index('new')

Unnamed: 0_level_0,w,x,y,z,s
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,0.774186,1.359844,-0.072398,0.605315,0.181029
B,1.755194,-1.428184,-0.567568,-0.469223,-0.044049
C,1.379808,-0.490831,-0.116647,0.127206,0.359928
D,0.010999,0.202676,0.717651,-0.218131,0.01705


In [38]:
df

Unnamed: 0,w,x,y,z,s,new
a,0.774186,1.359844,-0.072398,0.605315,0.181029,A
b,1.755194,-1.428184,-0.567568,-0.469223,-0.044049,B
c,1.379808,-0.490831,-0.116647,0.127206,0.359928,C
d,0.010999,0.202676,0.717651,-0.218131,0.01705,D


## Group by

In [39]:
data = {'Company':['GOOGLE','GOOGLE','MICROSOFT','MICROSOFT','FACEBOOK','FACEBOOK'], 'Person':['P1','P2','P3','P4','P5','P6'],'Sales':[4500,5500,7600,3455,6700,1200]}