In [1]:
import numpy as np
import pandas as pd

# Series

## How to make Pandas Series with customized Index

In [2]:
x = np.array([1,2,3,4,5,6,7,8,9,10])
x_series = pd.Series(x, index= list('abcdefghij'))
x_series

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
dtype: int32

In [3]:
x_df = pd.DataFrame(x.reshape(-1,2), columns=['A', 'B'], index=list('abcde'))
x_df

Unnamed: 0,A,B
a,1,2
b,3,4
c,5,6
d,7,8
e,9,10


## How to make a Pandas Series with column name

In [4]:
x = 'data_scientist marketing sales finance'.split()
y = pd.Series(
    data = np.array(x),
    name = 'Occupancy'
)
y

0    data_scientist
1         marketing
2             sales
3           finance
Name: Occupancy, dtype: object

# Dataframe

In [5]:
x = np.array(list(range(2,11,2)))
y = np.array(list(range(1,11,2)))
df = pd.DataFrame(np.vstack([x,y]).transpose(), columns=['Nilai', 'Begitulah'])
print(df)
print(df.shape)

   Nilai  Begitulah
0      2          1
1      4          3
2      6          5
3      8          7
4     10          9
(5, 2)


In [6]:
z = list(map(lambda a,b: [a,b], x,y))
df = pd.DataFrame(z, columns=['Nilai', 'Begitulah'])
df

Unnamed: 0,Nilai,Begitulah
0,2,1
1,4,3
2,6,5
3,8,7
4,10,9


In [7]:
df = pd.DataFrame()
df['X'] = x
df['Y'] = y
df['Z'] = list('david')
df

Unnamed: 0,X,Y,Z
0,2,1,d
1,4,3,a
2,6,5,v
3,8,7,i
4,10,9,d


In [8]:
data = [
    ['Andi', 'Jakarta', 'PNS'],
    ['Budi', 'Jakarta', 'DS'],
    ['Caca', 'Jakarta', 'BI'],
    ['Nama', 'Kota', 'Pekerjaan']
]
df_data = pd.DataFrame(data[0:3], index = range(1,len(data)), columns=data[-1])
df_data

Unnamed: 0,Nama,Kota,Pekerjaan
1,Andi,Jakarta,PNS
2,Budi,Jakarta,DS
3,Caca,Jakarta,BI


In [9]:
data = {
    'x' : [123],
    'y' : [123],
    'z' : [123]
}

df = pd.DataFrame(data)
df

Unnamed: 0,x,y,z
0,123,123,123


In [10]:
data = [
    {'nama': 'Andi', 'kota': 'Jakarta'},
    {'nama': 'Budi', 'kota': 'Jakarta'},
    {'nama': 'Caca', 'kota': 'Jakarta', 'job': 'NaN'},
    {'nama': 'Deni', 'kota': 'Jakarta', 'job': 'PNS'},
]
df = pd.DataFrame(data)
df

Unnamed: 0,nama,kota,job
0,Andi,Jakarta,
1,Budi,Jakarta,
2,Caca,Jakarta,
3,Deni,Jakarta,PNS


In [11]:
print(type(df.iloc[1,2]))
print(type(df.iloc[2,2]))

<class 'float'>
<class 'str'>


In [12]:
x = pd.Series(list(range(6)), name='Test')
y = pd.Series(list(range(6)), name='Try')
df = pd.DataFrame({
    x.name : x,
    y.name : y
})
df

Unnamed: 0,Test,Try
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4
5,5,5


In [18]:
df = pd.DataFrame({
    'A': np.arange(20,26),
    'B': [32, 4, 12, 54, 21, 18],
    'C': np.random.rand(6),
    'D': np.ones(6)
}, index=list('abcdef'))
df

Unnamed: 0,A,B,C,D
a,20,32,0.322643,1.0
b,21,4,0.139466,1.0
c,22,12,0.230732,1.0
d,23,54,0.972307,1.0
e,24,21,0.218774,1.0
f,25,18,0.030733,1.0


In [19]:
df.head()

Unnamed: 0,A,B,C,D
a,20,32,0.322643,1.0
b,21,4,0.139466,1.0
c,22,12,0.230732,1.0
d,23,54,0.972307,1.0
e,24,21,0.218774,1.0


In [20]:
df.head(3)

Unnamed: 0,A,B,C,D
a,20,32,0.322643,1.0
b,21,4,0.139466,1.0
c,22,12,0.230732,1.0


In [21]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [22]:
df.index

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [26]:
df.values

array([[2.00000000e+01, 3.20000000e+01, 3.22642878e-01, 1.00000000e+00],
       [2.10000000e+01, 4.00000000e+00, 1.39465909e-01, 1.00000000e+00],
       [2.20000000e+01, 1.20000000e+01, 2.30732236e-01, 1.00000000e+00],
       [2.30000000e+01, 5.40000000e+01, 9.72307202e-01, 1.00000000e+00],
       [2.40000000e+01, 2.10000000e+01, 2.18774269e-01, 1.00000000e+00],
       [2.50000000e+01, 1.80000000e+01, 3.07328212e-02, 1.00000000e+00]])

In [24]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,22.5,23.5,0.319109,1.0
std,1.870829,17.615334,0.334661,0.0
min,20.0,4.0,0.030733,1.0
25%,21.25,13.5,0.159293,1.0
50%,22.5,19.5,0.224753,1.0
75%,23.75,29.25,0.299665,1.0
max,25.0,54.0,0.972307,1.0


In [29]:
df.sort_index(ascending=False, axis=1)

Unnamed: 0,D,C,B,A
a,1.0,0.322643,32,20
b,1.0,0.139466,4,21
c,1.0,0.230732,12,22
d,1.0,0.972307,54,23
e,1.0,0.218774,21,24
f,1.0,0.030733,18,25


In [30]:
df.sort_values(by='C')

Unnamed: 0,A,B,C,D
f,25,18,0.030733,1.0
b,21,4,0.139466,1.0
e,24,21,0.218774,1.0
c,22,12,0.230732,1.0
a,20,32,0.322643,1.0
d,23,54,0.972307,1.0


In [32]:
df.sort_values(by=['A','B'], ascending=[True,True])

Unnamed: 0,A,B,C,D
a,20,32,0.322643,1.0
b,21,4,0.139466,1.0
c,22,12,0.230732,1.0
d,23,54,0.972307,1.0
e,24,21,0.218774,1.0
f,25,18,0.030733,1.0


## df.loc get element using index name and column name

In [35]:
df.loc['a', 'A']

20

## df.iloc (int-loc) using indexing

In [36]:
df.iloc[0,0]

20

In [None]:
df.iloc[::2].sort_values()