# Pandas - Series

In [2]:
import numpy as np
import pandas as pd 

In [3]:
labels = ['a','b','c']
my_data = [10,20,30]
arr = np.array(my_data)
d = {'a':10, 'b':20, 'c':30}

In [4]:
labels

['a', 'b', 'c']

In [5]:
my_data

[10, 20, 30]

In [6]:
arr

array([10, 20, 30])

In [7]:
d

{'a': 10, 'b': 20, 'c': 30}

In [8]:
pd.Series(my_data)

0    10
1    20
2    30
dtype: int64

In [9]:
pd.Series(my_data, labels)

a    10
b    20
c    30
dtype: int64

In [10]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [11]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int32

In [12]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [13]:
pd.Series(arr, labels)

a    10
b    20
c    30
dtype: int32

In [14]:
pd.Series(data=[sum,print,len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [15]:
ser1 = pd.Series([1,2,3,4],['USA', 'Germany', 'USSR', 'Japan'])

In [16]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [17]:
ser2 = pd.Series([1,2,5,4],['USA', 'Germany', 'Italy', 'Japan'])

In [18]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [19]:
ser1['USA']

1

In [23]:
ser2['Italy']

5

In [24]:
ser3 = pd.Series(data = labels)

In [25]:
ser3

0    a
1    b
2    c
dtype: object

In [26]:
ser3[2]

'c'

In [28]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [29]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [30]:
ser1 + ser2 

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

# Pandas - Dataframes

In [31]:
import numpy as np

In [32]:
import pandas as pd 

In [33]:
from numpy.random import randn

In [34]:
np.random.seed(101)

In [35]:
df = pd.DataFrame(randn(5,4), ['A','B', 'C', 'D', 'E'], ['W','X','Y','Z'])

In [37]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [38]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [39]:
type(df)

pandas.core.frame.DataFrame

In [40]:
type(df['W'])

pandas.core.series.Series

In [41]:
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [43]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [45]:
df['new'] = df['W'] + df['Y']

In [46]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [48]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [49]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [55]:
df.drop('new', axis=1,inplace=True)

KeyError: "['new'] not found in axis"

In [56]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [59]:
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [60]:
#rows refer to as 0 axis and columns refer to as 1 axis

In [61]:
df.shape

(5, 4)

In [62]:
df.loc['C']

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [63]:
df.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [64]:
#2 ways of selcting rows

In [65]:
df.loc['B','Y']

-0.8480769834036315

In [66]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
