# Pandas Basics

In [1]:
import pandas as pd
import numpy as np

# Series

### List to Series

In [2]:
l = [1,2,'ijk','xyz']

In [3]:
pd.Series(l)

0      1
1      2
2    ijk
3    xyz
dtype: object

### Numpy Array to Series

In [4]:
arr = np.arange(10,21)
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])

In [5]:
pd.Series(arr)

0     10
1     11
2     12
3     13
4     14
5     15
6     16
7     17
8     18
9     19
10    20
dtype: int32

In [6]:
arr = np.arange(1,7)
arr_l = ['Alakazam','Butterfree','Charizard','Dodrio','Exeggutor',"Farfetch'd"]

In [7]:
pd.Series(data=arr,index=arr_l)

Alakazam      1
Butterfree    2
Charizard     3
Dodrio        4
Exeggutor     5
Farfetch'd    6
dtype: int32

### Dictionary to Series

In [8]:
d = {'A':1,'b':2,'c':3}
d

{'A': 1, 'b': 2, 'c': 3}

In [9]:
pd.Series(d)

A    1
b    2
c    3
dtype: int64

### Indexing a Series

In [10]:
g5 = pd.Series(data=np.arange(1,6),index=['France','Germany','Japan','UK','USA'])

In [11]:
g5

France     1
Germany    2
Japan      3
UK         4
USA        5
dtype: int32

In [12]:
g5['France']

1

In [13]:
g5['France']+g5['USA']

6

In [14]:
g5['Germany':'UK']

Germany    2
Japan      3
UK         4
dtype: int32

In [15]:
nato_top5trp = pd.Series(data=np.arange(1,6),index=['USA','Turkey','France','Germany','Italy'])
nato_top5trp

USA        1
Turkey     2
France     3
Germany    4
Italy      5
dtype: int32

In [16]:
g5+nato_top5trp

France     4.0
Germany    6.0
Italy      NaN
Japan      NaN
Turkey     NaN
UK         NaN
USA        6.0
dtype: float64

# Dataframes

In [17]:
df = pd.DataFrame(data=np.arange(1,17).reshape((4,4)),index=['a','b','c','d'],columns=['I','J','K','L'])
df

Unnamed: 0,I,J,K,L
a,1,2,3,4
b,5,6,7,8
c,9,10,11,12
d,13,14,15,16


In [18]:
type(df)

pandas.core.frame.DataFrame

## Shape

In [19]:
df.shape

(4, 4)

### Add New Column

In [20]:
df['M'] = np.arange(100,104)
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [21]:
df['N'] = df['I'] + df['J']
df

Unnamed: 0,I,J,K,L,M,N
a,1,2,3,4,100,3
b,5,6,7,8,101,11
c,9,10,11,12,102,19
d,13,14,15,16,103,27


### Remove Column

In [22]:
df.drop(['N'],axis=1)

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [23]:
df

Unnamed: 0,I,J,K,L,M,N
a,1,2,3,4,100,3
b,5,6,7,8,101,11
c,9,10,11,12,102,19
d,13,14,15,16,103,27


In [24]:
df.drop(['N'],axis=1,inplace=True)

In [25]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


### Remove Row

In [26]:
df.drop('d',axis=0)

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102


### Select Columns

In [27]:
df['I']

a     1
b     5
c     9
d    13
Name: I, dtype: int32

In [28]:
type(df['I'])

pandas.core.series.Series

In [29]:
df.I #avoid, as it could be confused with method

a     1
b     5
c     9
d    13
Name: I, dtype: int32

In [30]:
cols_required = ['J','K']
df[cols_required]

Unnamed: 0,J,K
a,2,3
b,6,7
c,10,11
d,14,15


In [31]:
df[['J','K']]

Unnamed: 0,J,K
a,2,3
b,6,7
c,10,11
d,14,15


### Select Rows

In [32]:
df.loc['b']

I      5
J      6
K      7
L      8
M    101
Name: b, dtype: int32

In [33]:
type(df.loc['b'])

pandas.core.series.Series

In [34]:
df.iloc[1]

I      5
J      6
K      7
L      8
M    101
Name: b, dtype: int32

### Select Rows and Columns

In [35]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [36]:
df.loc['b','M']

101

In [37]:
df.iloc[1,4]

101

In [38]:
df.loc[['a','c'],['J','L']]

Unnamed: 0,J,L
a,2,4
c,10,12


In [39]:
df.iloc[[0,2],[1,3]]

Unnamed: 0,J,L
a,2,4
c,10,12


### Conditional Selection

#### Single Condition

In [40]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [41]:
df>7

Unnamed: 0,I,J,K,L,M
a,False,False,False,False,True
b,False,False,False,True,True
c,True,True,True,True,True
d,True,True,True,True,True


In [42]:
df[df>7]

Unnamed: 0,I,J,K,L,M
a,,,,,100
b,,,,8.0,101
c,9.0,10.0,11.0,12.0,102
d,13.0,14.0,15.0,16.0,103


In [43]:
df['I']>7

a    False
b    False
c     True
d     True
Name: I, dtype: bool

In [44]:
df[df['I']>7]

Unnamed: 0,I,J,K,L,M
c,9,10,11,12,102
d,13,14,15,16,103


In [45]:
df[df['I']>7]['M']

c    102
d    103
Name: M, dtype: int32

In [46]:
df[df['I']>7][['I','M']]

Unnamed: 0,I,M
c,9,102
d,13,103


#### Multiple Conditions

In [47]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [48]:
df[(df['I']>1) & (df['J']>6)]

Unnamed: 0,I,J,K,L,M
c,9,10,11,12,102
d,13,14,15,16,103


In [49]:
df[(df['I']>1) | (df['J']>6)]

Unnamed: 0,I,J,K,L,M
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [50]:
df[(df['I']>1) & (df['J']>6) & (df['K']>11)]

Unnamed: 0,I,J,K,L,M
d,13,14,15,16,103


### Index

In [51]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


#### Reset Index

In [52]:
df.reset_index()

Unnamed: 0,index,I,J,K,L,M
0,a,1,2,3,4,100
1,b,5,6,7,8,101
2,c,9,10,11,12,102
3,d,13,14,15,16,103


In [53]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [54]:
#df.reset_index(inplace=True)

#### Set New Index

In [55]:
new_index = 'ALPHA BRAVO CHARLIE DELTA'.split()
new_index

['ALPHA', 'BRAVO', 'CHARLIE', 'DELTA']

In [56]:
df['callsign'] = new_index
df

Unnamed: 0,I,J,K,L,M,callsign
a,1,2,3,4,100,ALPHA
b,5,6,7,8,101,BRAVO
c,9,10,11,12,102,CHARLIE
d,13,14,15,16,103,DELTA


In [57]:
df.set_index('callsign')

Unnamed: 0_level_0,I,J,K,L,M
callsign,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALPHA,1,2,3,4,100
BRAVO,5,6,7,8,101
CHARLIE,9,10,11,12,102
DELTA,13,14,15,16,103


In [58]:
df

Unnamed: 0,I,J,K,L,M,callsign
a,1,2,3,4,100,ALPHA
b,5,6,7,8,101,BRAVO
c,9,10,11,12,102,CHARLIE
d,13,14,15,16,103,DELTA


In [59]:
#df.set_index('callsign',inplace=True)

#### Multi-Index

In [60]:
outside = 'A A A B B B'.split()
outside

['A', 'A', 'A', 'B', 'B', 'B']

In [61]:
inside = list(map(int,'1 2 3 1 2 3'.split()))
inside

[1, 2, 3, 1, 2, 3]

In [62]:
multi_index = list(zip(outside,inside))
multi_index

[('A', 1), ('A', 2), ('A', 3), ('B', 1), ('B', 2), ('B', 3)]

In [63]:
multi_index = pd.MultiIndex.from_tuples(multi_index)
multi_index

MultiIndex([('A', 1),
            ('A', 2),
            ('A', 3),
            ('B', 1),
            ('B', 2),
            ('B', 3)],
           )

In [64]:
data = np.arange(0,12).reshape((6,2))
data

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])

In [65]:
df_multi_index = pd.DataFrame(data=data,index=multi_index,columns=['X','Y'])
df_multi_index

Unnamed: 0,Unnamed: 1,X,Y
A,1,0,1
A,2,2,3
A,3,4,5
B,1,6,7
B,2,8,9
B,3,10,11
