# Perkenalan Pandas Object

In [15]:
import numpy as np
import pandas as pd

## Pandas Series Object

In [3]:
data = pd.Series([0.25,0.5,0.75,1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
#ambil nilai dari series
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
#ambil index
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
#slicing
data[1:3]

1    0.50
2    0.75
dtype: float64

### Cara mendeklarasikan series pd.Series()

#### Cara 1: generelized NumPy

In [7]:
data = pd.Series([0.25,0.5,0.75,1.0], index = ['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [8]:
#index implicit
data[3]

#index explicit
data['c']

0.75

#### Cara 2: Series sbg Dictionary

In [26]:
population_dict = {'Jakarta': 750000, 'Bogor': 400000, 'Depok': 200000, 'Tanggerang': 150000, 'Bekasi': 234765}
population_dict = pd.Series(population_dict)
population_dict

Jakarta       750000
Bogor         400000
Depok         200000
Tanggerang    150000
Bekasi        234765
dtype: int64

In [14]:
!pip install pandas

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [17]:
population_dict['Jakarta':'Bekasi']

Jakarta       750000
Bogor         400000
Depok         200000
Tanggerang    150000
Bekasi        234765
dtype: int64

#### Cara 3: Konstruksi Objek Series

In [20]:
# sintaks umum >>> pd.Series([list or dict], index=[index])

In [19]:
pd.Series([2,4,6]) #default index

0    2
1    4
2    6
dtype: int64

In [22]:
pd.Series(6, index = [1,2,3,4,5]) #genereate series by scalar

1    6
2    6
3    6
4    6
5    6
dtype: int64

In [23]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

In [25]:
#filter by index
pd.Series({2:'a', 1:'b', 3:'c'}, index=[1,3])

1    b
3    c
dtype: object

## DataFrame Object

In [28]:
luas_dict = {'Jakarta': 32453, 'Bogor': 435355, 'Depok': '655453', 'Tanggerang': 763463, 'Bekasi': 44564}
luas = pd.Series(luas_dict)
luas

Jakarta        32453
Bogor         435355
Depok         655453
Tanggerang    763463
Bekasi         44564
dtype: object

In [29]:
daerah = pd.DataFrame({'populasi': population_dict, 'luas':luas})
daerah

Unnamed: 0,populasi,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453
Tanggerang,150000,763463
Bekasi,234765,44564


In [30]:
daerah.columns

Index(['populasi', 'luas'], dtype='object')

In [34]:
daerah

Unnamed: 0,populasi,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453
Tanggerang,150000,763463
Bekasi,234765,44564


In [37]:
daerah

Unnamed: 0,populasi,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453
Tanggerang,150000,763463
Bekasi,234765,44564


In [38]:
type(daerah)

pandas.core.frame.DataFrame

In [39]:
# dataframe bisa berupa sari Series
pd.DataFrame(population_dict, columns=['population'])

Unnamed: 0,population
Jakarta,750000
Bogor,400000
Depok,200000
Tanggerang,150000
Bekasi,234765


In [40]:
# buat list
bil_genap = [2 * i for i in range(10)]
bil_genap

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [41]:
data = [{'angka': i, 'kali_2': i * 2} for i in range (11)]
data

[{'angka': 0, 'kali_2': 0},
 {'angka': 1, 'kali_2': 2},
 {'angka': 2, 'kali_2': 4},
 {'angka': 3, 'kali_2': 6},
 {'angka': 4, 'kali_2': 8},
 {'angka': 5, 'kali_2': 10},
 {'angka': 6, 'kali_2': 12},
 {'angka': 7, 'kali_2': 14},
 {'angka': 8, 'kali_2': 16},
 {'angka': 9, 'kali_2': 18},
 {'angka': 10, 'kali_2': 20}]

In [42]:
pd.DataFrame(data)

Unnamed: 0,angka,kali_2
0,0,0
1,1,2
2,2,4
3,3,6
4,4,8
5,5,10
6,6,12
7,7,14
8,8,16
9,9,18


In [43]:
data = [{'a':1, 'b':2}, {'a':3, 'c':8}]
pd.DataFrame(data)

Unnamed: 0,a,b,c
0,1,2.0,
1,3,,8.0


#### Index

In [51]:
indA = pd.Index([1,3,5,7,7,9])
indB = pd.Index([2,3,5,7,11])

indA & indB #irisan


Int64Index([3, 5, 7, 7], dtype='int64')

In [49]:
indA | indB #union


Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [50]:
indA ^ indB #symetric difference

Int64Index([1, 2, 9, 11], dtype='int64')