# --Indexing Series--

Untuk melakukan indexing pada objek series, kita perlu menempatkan objek series tersebut sebagai dictionary dan sebagai array

## Series as Dictionary

In [3]:
import pandas as pd

In [4]:
listSeries = [12,33,42]

In [5]:
pd_listSeries = pd.Series(listSeries, index=['a','b','c'])

In [6]:
pd_listSeries

a    12
b    33
c    42
dtype: int64

#### Indexing Key

In [9]:
pd_listSeries.keys()

Index(['a', 'b', 'c'], dtype='object')

#### Indexing Values 

In [7]:
pd_listSeries.values

array([12, 33, 42], dtype=int64)

#### Checking Existing Keys 

In [8]:
'a' in pd_listSeries

True

## Series as Array 

#### slicing dengan explicit index 

In [11]:
pd_listSeries['a':'c']

a    12
b    33
c    42
dtype: int64

#### slicing dengan implicit index 

In [12]:
pd_listSeries[:3]

a    12
b    33
c    42
dtype: int64

## indexing menggunakan .loc dan .iloc objek series

#### .loc indexing 

In [12]:
pd_listSeries.loc['a']

12

In [14]:
pd_listSeries.loc['a':'c']

a    12
b    33
c    42
dtype: int64

#### .iloc indexing 

In [15]:
pd_listSeries.iloc['a']

TypeError: Cannot index by location index with a non-integer key

In [16]:
pd_listSeries.iloc[:2]

a    12
b    33
dtype: int64

## Filtering objek Series 

In [17]:
pd_listSeries[pd_listSeries>15]

b    33
c    42
dtype: int64

In [20]:
pd_listSeries[['b','c']]

b    33
c    42
dtype: int64

# --Indexing DataFrame--

In [34]:
kapal_dataFrame = pd.read_csv("kapal_titanic.csv")

In [44]:
print(kapal_dataFrame)

     survived  pclass     sex   age  sibsp  parch     fare embarked deck
0           0       3    male  22.0      1      0   7.2500        S  NaN
1           1       1  female  38.0      1      0  71.2833        C    C
2           1       3  female  26.0      0      0   7.9250        S  NaN
3           1       1  female  35.0      1      0  53.1000        S    C
4           0       3    male  35.0      0      0   8.0500        S  NaN
..        ...     ...     ...   ...    ...    ...      ...      ...  ...
886         0       2    male  27.0      0      0  13.0000        S  NaN
887         1       1  female  19.0      0      0  30.0000        S    B
888         0       3  female   NaN      1      2  23.4500        S  NaN
889         1       1    male  26.0      0      0  30.0000        C    C
890         0       3    male  32.0      0      0   7.7500        Q  NaN

[891 rows x 9 columns]


In [23]:
kapal_dataFrame.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### indexing operator as series 

In [30]:
kapal_dataFrame["sex"]

0        male
1      female
2      female
3      female
4        male
        ...  
886      male
887    female
888    female
889      male
890      male
Name: sex, Length: 891, dtype: object

#### indexing operator as dataframe

In [38]:
aa = kapal_dataFrame["sex"].head()
print(type(aa))
print(aa)

<class 'pandas.core.series.Series'>
0      male
1    female
2    female
3    female
4      male
Name: sex, dtype: object


In [39]:
bb = kapal_dataFrame[["sex"]].head()
print(type(bb))
print(bb)

<class 'pandas.core.frame.DataFrame'>
      sex
0    male
1  female
2  female
3  female
4    male


#### indexing using attribute 

In [40]:
kapal_dataFrame.sex

0        male
1      female
2      female
3      female
4        male
        ...  
886      male
887    female
888    female
889      male
890      male
Name: sex, Length: 891, dtype: object

#### indexing using .loc

In [48]:
kapal_dataFrame.loc[:]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


In [50]:
kapal_dataFrame.loc[:,["sex","fare"]].head()

Unnamed: 0,sex,fare
0,male,7.25
1,female,71.2833
2,female,7.925
3,female,53.1
4,male,8.05


In [52]:
kapal_dataFrame.loc[["sex","fare"]].head()

KeyError: "None of [Index(['sex', 'fare'], dtype='object')] are in the [index]"

 : yang ada didalam [ : , ["sex","fare"]]  untuk menandakan index karena method loc memerlukan nomor index

### loc[row_label, column_label]

### iloc[row_posisition, column_position] 

#### indexing using .iloc

In [55]:
kapal_dataFrame.iloc[:,[4,6]].head() # column 4 dan column 6

Unnamed: 0,sibsp,fare
0,1,7.25
1,1,71.2833
2,0,7.925
3,1,53.1
4,0,8.05


# Titanic Case

In [16]:
import pandas as pd

In [169]:
df = pd.read_csv('titanic.csv')

In [148]:
data_titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,1,3,"Braund, Mr. Owen Harris",female,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,1,3,"Allen, Mr. William Henry",female,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,1,2,"Montvila, Rev. Juozas",female,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,1,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",female,26.0,0,0,111369,30.0000,C148,C


### 1. Data orang-orang yang selamat

In [151]:
# boolean series orang orang yang selamat

survive = data_titanic.Survived == 1
survive

0      True
1      True
2      True
3      True
4      True
       ... 
886    True
887    True
888    True
889    True
890    True
Name: Survived, Length: 891, dtype: bool

In [152]:
# print(survive)
survive_all = data_titanic[data_titanic['Survived'] == True]
survive_all.head(20)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,1,3,"Braund, Mr. Owen Harris",female,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,1,3,"Allen, Mr. William Henry",female,35.0,0,0,373450,8.05,,S
5,6,1,3,"Moran, Mr. James",female,,0,0,330877,8.4583,,Q
6,7,1,1,"McCarthy, Mr. Timothy J",female,54.0,0,0,17463,51.8625,E46,S
7,8,1,3,"Palsson, Master. Gosta Leonard",female,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


### 2. Data laki-laki yang selamat

In [153]:
survive_men = survive_all.Sex == "male"

survive_men

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Sex, Length: 891, dtype: bool

In [154]:
men2 = data_titanic[data_titanic["Sex"] == "male"]

men2

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked


In [155]:
survive_men2 = men2["Survived"] == 1

survive_men2

Series([], Name: Survived, dtype: bool)

In [156]:
survive_men2.shape

(0,)

##### Cara kedua 

In [157]:
# cari penumpang berkelamin pria

In [158]:
pria = data_titanic.Sex == "male"

In [159]:
pria

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Sex, Length: 891, dtype: bool

In [160]:
# cari penumpang yang selamat

In [161]:
penumpang_selamat = data_titanic.Survived == 1

In [162]:
penumpang_selamat

0      True
1      True
2      True
3      True
4      True
       ... 
886    True
887    True
888    True
889    True
890    True
Name: Survived, Length: 891, dtype: bool

In [163]:
# tampilkan hasil keduanya

In [164]:
pria_selamat = data_titanic[(pria) & (penumpang_selamat)]

In [165]:
pria_selamat

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked


In [166]:
pria_selamat.shape

(0, 12)

### 3. Data Perempuan yang tidak selamat dengan umur lebih dari 40 tahun atau kurang dari 20 tahun 

In [172]:
data_titanic = pd.read_csv('titanic.csv')
data_not = data_titanic.Survived == 0
data_not

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888     True
889    False
890     True
Name: Survived, Length: 891, dtype: bool

In [180]:
data_not_female = data_titanic.Sex== "female"

lebih_40 = data_titanic.Age > 40
kurang_20 = data_titanic.Age < 20
k = data_titanic[(data_not_female) & (data_not) & (lebih_40 | kurang_20)]
k

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S
24,25,0,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909,21.075,,S
38,39,0,3,"Vander Planke, Miss. Augusta Maria",female,18.0,2,0,345764,18.0,,S
49,50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18.0,1,0,349237,17.8,,S
71,72,0,3,"Goodwin, Miss. Lillian Amy",female,16.0,5,2,CA 2144,46.9,,S
111,112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C
114,115,0,3,"Attalah, Miss. Malake",female,17.0,0,0,2627,14.4583,,C
119,120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2.0,4,2,347082,31.275,,S
132,133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,A/5. 3337,14.5,,S
147,148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9.0,2,2,W./C. 6608,34.375,,S


In [176]:
data_not_female_age = (data_titanic.Age >= 40) | (data_titanic.Age <= 20)
# data_not_female_age


lebih_40 = data_titanic.Age > 40

lebih_40

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool

In [175]:
kurang_20 = data_titanic.Age < 20

kurang_20

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887     True
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool

In [179]:
# print(data_titanic[(data_not) & (data_not_female) & (data_not_female_age)])



data_titanic[(data_not)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.0500,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S


In [178]:
data_titanic[(data_not) & (data_not_female) & (lebih_40 | kurang_20)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S
24,25,0,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909,21.075,,S
38,39,0,3,"Vander Planke, Miss. Augusta Maria",female,18.0,2,0,345764,18.0,,S
49,50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18.0,1,0,349237,17.8,,S
71,72,0,3,"Goodwin, Miss. Lillian Amy",female,16.0,5,2,CA 2144,46.9,,S
111,112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C
114,115,0,3,"Attalah, Miss. Malake",female,17.0,0,0,2627,14.4583,,C
119,120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2.0,4,2,347082,31.275,,S
132,133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47.0,1,0,A/5. 3337,14.5,,S
147,148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9.0,2,2,W./C. 6608,34.375,,S


# Data Transformation 

Data transformasi adalah mengubah format data kepada suatu format yang kita inginkan. didalam matematika dikenal dengan istilah mapping. dalam melakukan transformasi kita memerlukan terlebih dahulu suatu fungsi yang dapat memetakan bentuk data awal kita menjadi bentuk data akhir yang kita inginkan. 

In [187]:
import pandas as pd

data_titanic = pd.read_csv('titanic.csv')

In [191]:
def minus_minimum(x):
    return x - x.min()

setelah kita mendefinisikan fungsinya. sekarang kita akan menggunakan method apply() dari pandas. method apply ini akan menerima suatu fungsi kemudian ‘meng-apply’ fungsi tersebut ke dalam setiap sumbu dari dataframe, secara default sumbu yang ‘apply’ adalah sumbu column. 

In [198]:
# data_titanic['Age'].apply(minus_minimum)
(data_titanic[['Age']]).describe()

Unnamed: 0,Age
count,714.0
mean,29.699118
std,14.526497
min,0.42
25%,20.125
50%,28.0
75%,38.0
max,80.0
