# **Pandas Function**

In [51]:
import pandas as pd
import numpy as np

## 1. Membaca Data Awal CSV Files

In [52]:
df = pd.read_csv(
    'PT_ABC.csv',
    delimiter = ';'
)
df

Unnamed: 0,Data Pegawai PT.ABC 2020,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,,,,,
1,No,Nama,Usia,Kota,Gaji
2,1,Andi,22,Jakarta,11000000
3,2,Budi,28,Jakarta,15000000
4,3,Caca,21,Bandung,15000000
5,4,Deni,29,Semarang,12000000
6,5,Euis,35,Bandung,14000000
7,,,,,
8,2020 PT ABC,,,,


## 2. Menghilangkan baris kosong awal

In [53]:
df = pd.read_csv(
    'PT_ABC.csv',
    delimiter = ';', 
    header = 2
)
df

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22.0,Jakarta,11000000.0
1,2,Budi,28.0,Jakarta,15000000.0
2,3,Caca,21.0,Bandung,15000000.0
3,4,Deni,29.0,Semarang,12000000.0
4,5,Euis,35.0,Bandung,14000000.0
5,,,,,
6,2020 PT ABC,,,,


In [54]:
df = pd.read_csv(
    'PT_ABC.csv',
    delimiter = ';', 
    skiprows = 2
)
df

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22.0,Jakarta,11000000.0
1,2,Budi,28.0,Jakarta,15000000.0
2,3,Caca,21.0,Bandung,15000000.0
3,4,Deni,29.0,Semarang,12000000.0
4,5,Euis,35.0,Bandung,14000000.0
5,,,,,
6,2020 PT ABC,,,,


## 3. Menghilangkan baris kosong akhir

In [55]:
df = pd.read_csv(
    'PT_ABC.csv',
    delimiter = ';', 
    skiprows = 2,
    engine = 'python',
    skipfooter = 2
)
df

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000
1,2,Budi,28,Jakarta,15000000
2,3,Caca,21,Bandung,15000000
3,4,Deni,29,Semarang,12000000
4,5,Euis,35,Bandung,14000000


In [56]:
# df.describe()

## 4. Menganalisis Data

In [57]:
df['Nama']

0    Andi
1    Budi
2    Caca
3    Deni
4    Euis
Name: Nama, dtype: object

In [58]:
df['Gaji'].describe().loc['max']

15000000.0

In [59]:
df['Gaji'].max()

15000000

In [60]:
df[['Nama','Gaji']].max()

Nama        Euis
Gaji    15000000
dtype: object

In [61]:
df['Gaji'].min()

11000000

In [62]:
df['Gaji'].mean()

13400000.0

In [63]:
df['Gaji'].sum()

67000000

In [64]:
df['Gaji'].mode()

0    15000000
dtype: int64

In [65]:
df['Gaji'].std()

1816590.212458495

In [66]:
# Nilai tengah gaji
df['Gaji'].median()

14000000.0

In [67]:
# Menampilkan data yang memiliki gaji paling kecil
df[df['Gaji'] == df['Gaji'].min()]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000


In [68]:
df[df['Gaji'] == df['Gaji'].min()][['Nama','Usia']]

Unnamed: 0,Nama,Usia
0,Andi,22


In [69]:
df[df['Usia'] == df['Usia'].max()]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
4,5,Euis,35,Bandung,14000000


In [70]:
df[df['Usia'] != 35]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000
1,2,Budi,28,Jakarta,15000000
2,3,Caca,21,Bandung,15000000
3,4,Deni,29,Semarang,12000000


In [71]:
df[df['Usia'] >= 28]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000
3,4,Deni,29,Semarang,12000000
4,5,Euis,35,Bandung,14000000


In [72]:
df[df['Usia'] > 25][df['Kota'] == 'Jakarta']

  """Entry point for launching an IPython kernel.


Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000


In [73]:
df[(df['Usia'] > 25) & (df['Kota'] == 'Jakarta')]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000


In [74]:
df

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000
1,2,Budi,28,Jakarta,15000000
2,3,Caca,21,Bandung,15000000
3,4,Deni,29,Semarang,12000000
4,5,Euis,35,Bandung,14000000


In [75]:
df[(df['Usia'] > 25) & (df['Usia'] < 30)]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000
3,4,Deni,29,Semarang,12000000


In [77]:
df[df['Usia'].between(25, 30)]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000
3,4,Deni,29,Semarang,12000000


In [76]:
# yang usia lebih dari 25 atau yang tinggal di Jakarta
df[(df['Usia'] > 25) | (df['Kota'] == 'Jakarta')]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000
1,2,Budi,28,Jakarta,15000000
3,4,Deni,29,Semarang,12000000
4,5,Euis,35,Bandung,14000000


In [79]:
# yang usia lebih dari 25 dan yang tinggal di Jakarta
df[(df['Usia'] > 25) & (df['Kota'] == 'Jakarta')]

Unnamed: 0,No,Nama,Usia,Kota,Gaji
1,2,Budi,28,Jakarta,15000000


## 5. Membaca File Xlsx

In [82]:
df = pd.read_excel(
    'PT_ABC.xlsx',
    header = 2,
    skipfooter = 2
)
df

Unnamed: 0,No,Nama,Usia,Kota,Gaji
0,1,Andi,22,Jakarta,11000000
1,2,Budi,28,Jakarta,15000000
2,3,Caca,21,Bandung,15000000
3,4,Deni,29,Semarang,12000000
4,5,Euis,35,Bandung,14000000


In [84]:
df[['Gaji','Kota','Usia','Nama','No']]

Unnamed: 0,Gaji,Kota,Usia,Nama,No
0,11000000,Jakarta,22,Andi,1
1,15000000,Jakarta,28,Budi,2
2,15000000,Bandung,21,Caca,3
3,12000000,Semarang,29,Deni,4
4,14000000,Bandung,35,Euis,5


In [86]:
list(df.columns)

['No', 'Nama', 'Usia', 'Kota', 'Gaji']

In [87]:
df.columns.tolist()

['No', 'Nama', 'Usia', 'Kota', 'Gaji']

In [89]:
kol = df.columns.tolist()
kol = kol[-1:] + kol[:-1]
df[kol]

Unnamed: 0,Gaji,No,Nama,Usia,Kota
0,11000000,1,Andi,22,Jakarta
1,15000000,2,Budi,28,Jakarta
2,15000000,3,Caca,21,Bandung
3,12000000,4,Deni,29,Semarang
4,14000000,5,Euis,35,Bandung


# 6. Mengekspor Data ke CSV dan Xlsx

In [90]:
# Ekstrak to file baru csv
df.to_csv('filebaru.csv')

In [93]:
df.to_csv('filebaru.csv', index=False)

In [97]:
df.to_excel('filebaru.xlsx', index=False)

In [99]:
# df = pd.read_excel(
#     'PT_ABC.xlsx',
#     'Sheet2'
# )
# df

Unnamed: 0,id,nama,kota
0,20,Ali,Denpasar
1,21,Bambang,Palu
2,22,Cindy,Medan
3,23,Didi,Yogyakarta
4,24,Ello,Kediri


In [100]:
df = pd.read_excel(
    'PT_ABC.xlsx',
    'TestPandas' #nama sheet sudah diubah
)
df

Unnamed: 0,id,nama,kota
0,20,Ali,Denpasar
1,21,Bambang,Palu
2,22,Cindy,Medan
3,23,Didi,Yogyakarta
4,24,Ello,Kediri


## 7. Membaca Data Html

In [109]:
df = pd.read_html('a.html')
df

[   No  Nama  Usia
 0   1  Andi    21
 1   2  Budi    21
 2   3  Caca    21]

In [108]:
df[0]

Unnamed: 0,No,Nama,Usia
0,1,Andi,21
1,2,Budi,21
2,3,Caca,21


In [106]:
type(df)

list

In [107]:
type(df[0])

pandas.core.frame.DataFrame

In [131]:
df = pd.read_html('http://digidb.io/digimon-list/')
df = df[0]
df

Unnamed: 0,#,Digimon,Stage,Type,Attribute,Memory,Equip Slots,HP,SP,Atk,Def,Int,Spd
0,1,Kuramon,Baby,Free,Neutral,2,0,590,77,79,69,68,95
1,2,Pabumon,Baby,Free,Neutral,2,0,950,62,76,76,69,68
2,3,Punimon,Baby,Free,Neutral,2,0,870,50,97,87,50,75
3,4,Botamon,Baby,Free,Neutral,2,0,690,68,77,95,76,61
4,5,Poyomon,Baby,Free,Neutral,2,0,540,98,54,59,95,86
...,...,...,...,...,...,...,...,...,...,...,...,...,...
336,337,Alphamon NX,Mega,Vaccine,Neutral,6,1,980,104,109,114,109,116
337,338,Crusadermon NX,Mega,Virus,Dark,6,1,1030,100,118,99,109,121
338,339,Leopardmon NX,Mega,Data,Light,6,1,960,109,112,91,116,126
339,340,Omnimon NX,Mega,Vaccine,Light,6,1,1010,98,116,111,111,113


In [132]:
df.to_csv('digimonku.csv', index=False)

In [125]:
df.to_json('digimonku.json')

In [135]:
df.to_json('digimonku.json', orient = 'records')

## Webscraping

In [212]:
import requests
url = 'https://pokemondb.net/pokedex/all'
x = requests.get(url)
df = pd.read_html(x.text)
df[0]

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,1,Bulbasaur,Grass Poison,318,45,49,49,65,65,45
1,2,Ivysaur,Grass Poison,405,60,62,63,80,80,60
2,3,Venusaur,Grass Poison,525,80,82,83,100,100,80
3,3,Venusaur Mega Venusaur,Grass Poison,625,80,100,123,122,120,80
4,4,Charmander,Fire,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...,...
921,805,Stakataka,Rock Steel,570,61,131,211,53,101,13
922,806,Blacephalon,Fire Ghost,570,53,127,53,151,79,107
923,807,Zeraora,Electric,600,88,112,75,102,80,143
924,808,Meltan,Steel,300,46,65,65,55,35,34


In [213]:
df = df[0].set_index('#')
df

Unnamed: 0_level_0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Bulbasaur,Grass Poison,318,45,49,49,65,65,45
2,Ivysaur,Grass Poison,405,60,62,63,80,80,60
3,Venusaur,Grass Poison,525,80,82,83,100,100,80
3,Venusaur Mega Venusaur,Grass Poison,625,80,100,123,122,120,80
4,Charmander,Fire,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...
805,Stakataka,Rock Steel,570,61,131,211,53,101,13
806,Blacephalon,Fire Ghost,570,53,127,53,151,79,107
807,Zeraora,Electric,600,88,112,75,102,80,143
808,Meltan,Steel,300,46,65,65,55,35,34


In [214]:
df[df['Attack'] == df['Attack'].max()]

Unnamed: 0_level_0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
150,Mewtwo Mega Mewtwo X,Psychic Fighting,780,106,190,100,154,100,130


In [215]:
# digimon yang memiliki kecepatan maksimal
df[df['Speed'] == df['Speed'].max()]

Unnamed: 0_level_0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
386,Deoxys Speed Forme,Psychic,600,50,95,90,95,90,180


In [216]:
#mencari nama dan tipe digimon yang memiliki kecepatan diatas rata-rata
df[df['Speed'] > df['Speed'].mean()][['Name','Type','Speed']]

Unnamed: 0_level_0,Name,Type,Speed
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Venusaur,Grass Poison,80
3,Venusaur Mega Venusaur,Grass Poison,80
5,Charmeleon,Fire,80
6,Charizard,Fire Flying,100
6,Charizard Mega Charizard X,Fire Dragon,100
...,...,...,...
802,Marshadow,Fighting Ghost,125
803,Poipole,Poison,73
804,Naganadel,Poison Dragon,121
806,Blacephalon,Fire Ghost,107


In [217]:
#Hanya menampilkan karakter unik
df['Type'].unique()

array(['Grass Poison', 'Fire', 'Fire Flying', 'Fire Dragon', 'Water',
       'Bug', 'Bug Flying', 'Bug Poison', 'Normal Flying', 'Normal',
       'Dark Normal', 'Poison', 'Electric', 'Electric Psychic', 'Ground',
       'Ice Steel', 'Poison Ground', 'Fairy', 'Ice', 'Ice Fairy',
       'Normal Fairy', 'Poison Flying', 'Bug Grass', 'Ground Steel',
       'Dark', 'Fighting', 'Water Fighting', 'Psychic', 'Water Poison',
       'Rock Ground', 'Rock Electric', 'Water Psychic', 'Electric Steel',
       'Water Ice', 'Poison Dark', 'Ghost Poison', 'Grass Psychic',
       'Grass Dragon', 'Fire Ghost', 'Ground Rock', 'Grass',
       'Psychic Fairy', 'Ice Psychic', 'Water Flying', 'Water Dark',
       'Rock Water', 'Rock Flying', 'Ice Flying', 'Electric Flying',
       'Dragon', 'Dragon Flying', 'Psychic Fighting', 'Water Electric',
       'Fairy Flying', 'Psychic Flying', 'Electric Dragon', 'Water Fairy',
       'Rock', 'Grass Flying', 'Water Ground', 'Dark Flying', 'Ghost',
       'Normal Psychi

## Mengisi Data Kosong

In [232]:
df = pd.read_csv('PT_ABC.csv')
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21,15000000.0
1,2,Budi,,16000000.0
2,3,-,25,10000000.0
3,4,Deni,,12000000.0
4,5,Euis,n.a,


In [219]:
#Mengecek data yang kosong
df.isnull()

Unnamed: 0,no,nama,usia,gaji
0,False,False,False,False
1,False,False,True,False
2,False,False,False,False
3,False,False,True,False
4,False,False,False,True


In [220]:
df.isnull().sum()

no      0
nama    0
usia    2
gaji    1
dtype: int64

In [234]:
df = pd.read_csv(
    'PT_ABC.csv',
    na_values = ['-', 'n.a']
)
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000.0
1,2,Budi,,16000000.0
2,3,,25.0,10000000.0
3,4,Deni,,12000000.0
4,5,Euis,,


In [222]:
df.isnull()

Unnamed: 0,no,nama,usia,gaji
0,False,False,False,False
1,False,False,True,False
2,False,True,False,False
3,False,False,True,False
4,False,False,True,True


In [223]:
#mengisi data kosong
# df = df.fillna('Haha')
# df

#mengisi data kosong
df = df.fillna({
    'nama':'Anonim',
    'usia': 24,
    'gaji': 5000000
})


# mengisi data kosong dengan data sebelumnya, berdasarkan axis index (baris vertikal)
df = df.fillna(method = 'ffill', axis=0)
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000.0
1,2,Budi,24.0,16000000.0
2,3,Anonim,25.0,10000000.0
3,4,Deni,24.0,12000000.0
4,5,Euis,24.0,5000000.0


In [227]:
# mengisi data kosong dengan data setelahnya (bfill), berdasarkan axis index (baris vertikal)
df = df.fillna(method = 'bfill', axis='index')
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000.0
1,2,Budi,25.0,16000000.0
2,3,Deni,25.0,10000000.0
3,4,Deni,,12000000.0
4,5,Euis,,


In [237]:
# mengisi data kosong dengan data sebelumnya (ffil), berdasarkan axis column/1 (baris horizontal)
# df = df.fillna(method = 'ffill', axis=1)
df = df.fillna(0)
df['gaji'] = df['gaji'].astype('int64')
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000
1,2,Budi,0.0,16000000
2,3,0,25.0,10000000
3,4,Deni,0.0,12000000
4,5,Euis,0.0,0


In [251]:
# forward filling
df = pd.read_csv(
    'PT_ABC.csv',
    na_values = ['-', 'n.a']
)

df = pd.read_csv(
    'PT_ABC.csv',
    na_values = ['-', 'n.a']
)


df = df.interpolate().fillna({
    'nama': 'ANONIM'
})

# MENAMPILKAN SISA 
# df = df.dropna(thresh = 3) #ada 4 kolom, tapi minimal ada 3 yang terisi

# MENAMPILKAN DATA YANG TIDAK BOLEH NaN
df = df.dropna(subset = ['nama', 'usia'])

df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000.0
3,4,Deni,31.0,12000000.0


In [246]:
# Menghapus Data yang kosong
df = df.dropna()
df

Unnamed: 0,no,nama,usia,gaji
0,1,Andi,21.0,15000000.0
