# Pandas - DataFrame Basics

 ## Import Library

In [1]:
# pip install pandas
# pip install numpy

# import library 

import pandas as pd
import numpy as np

In [2]:
#Creating DataFrame from Dictionary
df_dictionary = pd.DataFrame({
    "kode": ["P1", "P2", "P3", "P4", "P5"],
    "stok": [12, 8, 15, 20, 5],
    "lokasi": ["A1", "B2", "C3", "D4", "E5"]
})
df_dictionary

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [3]:
#Creating DataFrame from List of Numpy
df_numpy = pd.DataFrame([["P1", 12, "A1"],
                    ["P2", 8, "B2"],
                    ["P3", 15, "C3"],
                    ["P4", 20, "D4"],
                    ["P5", 5, "E5"]],
                   columns=["kode", "stok", "lokasi"])
df_numpy

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [4]:
#Saving a Dataframe
df_numpy.to_csv("Stok_barang.csv", index = False)
df_numpy.to_excel("Stok_barang.xlsx", index = False)

In [5]:
#Load a Dataframe
df_csv = pd.read_csv("Stok_barang.csv")
df_excel = pd.read_excel("Stok_barang.xlsx")

In [6]:
df_csv

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [7]:
df_excel

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [8]:
#Summary Info
df_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   kode    5 non-null      object
 1   stok    5 non-null      int64 
 2   lokasi  5 non-null      object
dtypes: int64(1), object(2)
memory usage: 252.0+ bytes


In [9]:
#Statistics of Numerical Columns
df_csv.describe()

Unnamed: 0,stok
count,5.0
mean,12.0
std,5.87367
min,5.0
25%,8.0
50%,12.0
75%,15.0
max,20.0


In [10]:
#Columns Name
df_csv.columns

Index(['kode', 'stok', 'lokasi'], dtype='object')

In [11]:
#DataFrame length
df_csv.shape

(5, 3)

In [13]:
#Values of a particular column
df_csv["stok"]

0    12
1     8
2    15
3    20
4     5
Name: stok, dtype: int64

In [14]:
#Values of a particular column
df_csv[["kode","lokasi"]]

Unnamed: 0,kode,lokasi
0,P1,A1
1,P2,B2
2,P3,C3
3,P4,D4
4,P5,E5


In [16]:
#First N rows
df_csv.head()

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [18]:
#Last N rows
df_csv.tail()

Unnamed: 0,kode,stok,lokasi
0,P1,12,A1
1,P2,8,B2
2,P3,15,C3
3,P4,20,D4
4,P5,5,E5


In [20]:
#Random N Rows
df_csv.sample()

Unnamed: 0,kode,stok,lokasi
4,P5,5,E5


In [21]:
#Reordering Columns
df_csv = df_csv[["kode","lokasi","stok"]]
df_csv

Unnamed: 0,kode,lokasi,stok
0,P1,A1,12
1,P2,B2,8
2,P3,C3,15
3,P4,D4,20
4,P5,E5,5


In [22]:
#Reordering Columns
df_csv = df_csv.reindex(columns=["lokasi","kode","stok"])
df_csv

Unnamed: 0,lokasi,kode,stok
0,A1,P1,12
1,B2,P2,8
2,C3,P3,15
3,D4,P4,20
4,E5,P5,5


In [23]:
#Get the value of specific row and column (via its name)
df_csv.loc[0,"lokasi"]

'A1'

In [24]:
#Get the value of specific row and column (via its index num)
df_csv.iloc[0,2]

12

In [25]:
df_csv.iloc[3,1]

'P4'

In [26]:
#Get the specific ranges of rows and columns (via its name)
df_csv.loc[0:2,["lokasi","stok"]]

Unnamed: 0,lokasi,stok
0,A1,12
1,B2,8
2,C3,15


In [27]:
#Get the specific ranges of rows and columns (via its index num)
df_csv.iloc[0:3, 0:2]

Unnamed: 0,lokasi,kode
0,A1,P1
1,B2,P2
2,C3,P3


In [29]:
#Filter dataframe with a criteria
df_csv[df_csv["stok"]>8]

Unnamed: 0,lokasi,kode,stok
0,A1,P1,12
2,C3,P3,15
3,D4,P4,20


In [30]:
#Filter dataframe with several criterias (AND)
df_csv[(df_csv["stok"]>8) & (df_csv["lokasi"]!="A1")]

Unnamed: 0,lokasi,kode,stok
2,C3,P3,15
3,D4,P4,20


In [31]:
#Filter dataframe with several criterias (OR)
df_csv[(df_csv["stok"]>8) | (df_csv["lokasi"]!="A1")]

Unnamed: 0,lokasi,kode,stok
0,A1,P1,12
1,B2,P2,8
2,C3,P3,15
3,D4,P4,20
4,E5,P5,5
