# Pandas Basics

In [1]:
import pandas as pd

import numpy as np
from numpy.random import randn
np.random.seed(101)

# Series

### List to Series

In [2]:
l = [1,2,'ijk','xyz']

In [3]:
pd.Series(l)

0      1
1      2
2    ijk
3    xyz
dtype: object

### Numpy Array to Series

In [4]:
arr = np.arange(10,21)
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])

In [5]:
pd.Series(arr)

0     10
1     11
2     12
3     13
4     14
5     15
6     16
7     17
8     18
9     19
10    20
dtype: int32

In [6]:
arr = np.arange(1,7)
arr_l = ['Alakazam','Butterfree','Charizard','Dodrio','Exeggutor',"Farfetch'd"]

In [7]:
pd.Series(data=arr,index=arr_l)

Alakazam      1
Butterfree    2
Charizard     3
Dodrio        4
Exeggutor     5
Farfetch'd    6
dtype: int32

### Dictionary to Series

In [8]:
d = {'A':1,'b':2,'c':3}
d

{'A': 1, 'b': 2, 'c': 3}

In [9]:
pd.Series(d)

A    1
b    2
c    3
dtype: int64

### Indexing a Series

In [10]:
g5 = pd.Series(data=np.arange(1,6),index=['France','Germany','Japan','UK','USA'])

In [11]:
g5

France     1
Germany    2
Japan      3
UK         4
USA        5
dtype: int32

In [12]:
g5['France']

1

In [13]:
g5['France']+g5['USA']

6

In [14]:
g5['Germany':'UK']

Germany    2
Japan      3
UK         4
dtype: int32

In [15]:
nato_top5trp = pd.Series(data=np.arange(1,6),index=['USA','Turkey','France','Germany','Italy'])
nato_top5trp

USA        1
Turkey     2
France     3
Germany    4
Italy      5
dtype: int32

In [16]:
g5+nato_top5trp

France     4.0
Germany    6.0
Italy      NaN
Japan      NaN
Turkey     NaN
UK         NaN
USA        6.0
dtype: float64

# Dataframes

In [18]:
df = pd.DataFrame(data=np.arange(1,17).reshape((4,4)),index=['a','b','c','d'],columns=['I','J','K','L'])
df

Unnamed: 0,I,J,K,L
a,1,2,3,4
b,5,6,7,8
c,9,10,11,12
d,13,14,15,16


In [20]:
type(df)

pandas.core.frame.DataFrame

## Add New Column

In [29]:
df['M'] = np.arange(100,104)
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [30]:
df['N'] = df['I'] + df['J']
df

Unnamed: 0,I,J,K,L,M,N
a,1,2,3,4,100,3
b,5,6,7,8,101,11
c,9,10,11,12,102,19
d,13,14,15,16,103,27


## Remove Column

In [33]:
df.drop(['N'],axis=1)

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


In [34]:
df

Unnamed: 0,I,J,K,L,M,N
a,1,2,3,4,100,3
b,5,6,7,8,101,11
c,9,10,11,12,102,19
d,13,14,15,16,103,27


In [35]:
df.drop(['N'],axis=1,inplace=True)

In [36]:
df

Unnamed: 0,I,J,K,L,M
a,1,2,3,4,100
b,5,6,7,8,101
c,9,10,11,12,102
d,13,14,15,16,103


## Indexing

In [19]:
df['I']

a     1
b     5
c     9
d    13
Name: I, dtype: int32

In [21]:
type(df['I'])

pandas.core.series.Series

In [24]:
df.I #avoid, as it could be confused with method

a     1
b     5
c     9
d    13
Name: I, dtype: int32

In [25]:
cols_required = ['J','K']
df[cols_required]

Unnamed: 0,J,K
a,2,3
b,6,7
c,10,11
d,14,15


In [26]:
df[['J','K']]

Unnamed: 0,J,K
a,2,3
b,6,7
c,10,11
d,14,15
