# Membuat Data Dummy

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Simulasi data penjualan
data = {
    'Tanggal': pd.date_range('2023-01-01', periods=200),
    'Produk': np.random.choice(['A', 'B', 'C'], size=200),
    'Jumlah': np.random.randint(1, 100, size=200),
    'Harga': np.random.uniform(10, 100, size=200)
}

df = pd.DataFrame(data)

## Simple Exploration

In [None]:
df.head() # cek 5 baris awal

Unnamed: 0,Tanggal,Produk,Jumlah,Harga
0,2023-01-01,C,14,43.474794
1,2023-01-02,B,25,59.960183
2,2023-01-03,B,38,24.906364
3,2023-01-04,B,19,41.583017
4,2023-01-05,C,3,65.267219


In [None]:
df.head(2) # cek 2 baris awal

Unnamed: 0,Tanggal,Produk,Jumlah,Harga
0,2023-01-01,C,14,43.474794
1,2023-01-02,B,25,59.960183


In [None]:
df.tail() # cek 5 baris terakhir

Unnamed: 0,Tanggal,Produk,Jumlah,Harga
360,2023-12-27,A,78,26.844277
361,2023-12-28,C,66,18.742321
362,2023-12-29,C,7,74.29335
363,2023-12-30,C,99,84.030741
364,2023-12-31,B,28,28.012613


In [None]:
df.sample() #ambil sample acak

Unnamed: 0,Tanggal,Produk,Jumlah,Harga
113,2023-04-24,A,41,49.572982


In [None]:
df.info() # cek informasi data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Tanggal  365 non-null    datetime64[ns]
 1   Produk   365 non-null    object        
 2   Jumlah   365 non-null    int64         
 3   Harga    365 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 11.5+ KB


In [None]:
df.shape #cek jumlah baris dan kolom

(365, 4)

In [None]:
print('Jumlah baris:', df.shape[0])
print('Jumlah kolom:', df.shape[1])

Jumlah baris: 365
Jumlah kolom: 4


In [None]:
# # mengatur opsi tampilan untuk menampilkan seluruh DataFrame
# pd.set_option('display.max_rows', None) #default 10 baris (head(5) & tail(5)
# pd.set_option('display.max_columns', None) #default 20 kolom
# pd.set_option('display.width', None) #default 80 karakter

Cara diatas adalah untuk menampilkan semua baris dan kolom . Cara tersebut tidak direkomendasikan karena akan memakan memori yang besar.

In [None]:
df.describe() #summary statistik

Unnamed: 0,Tanggal,Jumlah,Harga
count,365,365.0,365.0
mean,2023-07-02 00:00:00,51.021918,54.480055
min,2023-01-01 00:00:00,1.0,10.52962
25%,2023-04-02 00:00:00,25.0,33.442714
50%,2023-07-02 00:00:00,52.0,52.641156
75%,2023-10-01 00:00:00,77.0,74.969509
max,2023-12-31 00:00:00,99.0,99.628012
std,,30.098092,24.888523


In [None]:
# cek tipe data
df.dtypes

Unnamed: 0,0
Tanggal,datetime64[ns]
Produk,object
Jumlah,int64
Harga,float64


In [None]:
# cek tipe data di kolom Tanggal
df['Tanggal'].dtypes

dtype('<M8[ns]')

`dtype('<M8[ns]')` adalah kode yang menunjukkan tipe data Datetime64[ns]. Ini adalah tipe data khusus dalam Pandas yang digunakan untuk merepresentasikan tanggal dan waktu dengan presisi nanosecond.

Breakdown:

- M8: Ini adalah kode untuk tipe data datetime.
- [ns]: Ini menunjukkan bahwa presisi waktu adalah nanosecond.

In [None]:
# cek nama kolom
df.columns

Index(['Tanggal', 'Produk', 'Jumlah', 'Harga'], dtype='object')

In [None]:
# cek missing values
df.isnull().sum()

Unnamed: 0,0
Tanggal,0
Produk,0
Jumlah,0
Harga,0


In [None]:
df.isna().sum()

Unnamed: 0,0
Tanggal,0
Produk,0
Jumlah,0
Harga,0


In [None]:
df.nunique() #cek jumlah nilai yang unik

Unnamed: 0,0
Tanggal,365
Produk,3
Jumlah,98
Harga,365


In [None]:
print('Jenis Produk (Unik):', df['Produk'].unique())
print('Jumlah Produk yang Unik:', df['Produk'].nunique())

Jenis Produk (Unik): ['C' 'B' 'A']
Jumlah Produk yang Unik: 3
Jumlah Produk C: 124


In [None]:
# frekusi nilai unik pada setiap kolom
df['Produk'].value_counts()

Unnamed: 0_level_0,count
Produk,Unnamed: 1_level_1
C,124
A,123
B,118


In [None]:
# total produk C
print('Jumlah Produk C:', (df['Produk'] == 'C').sum())

Jumlah Produk C: 124


# Data Loading

### From CSV File

In [None]:
!wget https://raw.githubusercontent.com/FTDS-learning-materials/phase-0/main/src/teams.csv

--2024-12-12 05:41:26--  https://raw.githubusercontent.com/FTDS-learning-materials/phase-0/main/src/teams.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 24958 (24K) [text/plain]
Saving to: ‘teams.csv’


2024-12-12 05:41:26 (2.50 MB/s) - ‘teams.csv’ saved [24958/24958]



In [None]:
teams = pd.read_csv('teams.csv')
teams.head()

Unnamed: 0,division,conference,school_name,roster_url,id
0,FBS (Division I-A Teams),American Athletic,Cincinnati,http://espn.go.com/ncf/teams/roster?teamId=2132,1
1,FBS (Division I-A Teams),American Athletic,Connecticut,http://espn.go.com/ncf/teams/roster?teamId=41,2
2,FBS (Division I-A Teams),American Athletic,Houston,http://espn.go.com/ncf/teams/roster?teamId=248,3
3,FBS (Division I-A Teams),American Athletic,Louisville,http://espn.go.com/ncf/teams/roster?teamId=97,4
4,FBS (Division I-A Teams),American Athletic,Memphis,http://espn.go.com/ncf/teams/roster?teamId=235,5


In [None]:
# load data dari link
house = pd.read_csv('https://raw.githubusercontent.com/FTDS-learning-materials/phase-0/main/src/kc_house_data.csv')
house.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


### From TSV File

In [None]:
tsv_df = pd.read_csv('https://raw.githubusercontent.com/FTDS-learning-materials/phase-0/main/src/cities_world.tsv',
                     delimiter='\t')
tsv_df.head()

Unnamed: 0,City,Latitude,Longitude,Country,Population,Land_area,Density,Number
0,Tokyo/Yokohama,35.6895,139.6917,Japan,33200000,6993,4750,1
1,New York Metro,40.7128,-74.0059,USA,17800000,8683,2050,2
2,Sao Paulo,-23.5505,-46.6333,Brazil,17700000,1968,9000,3
3,Seoul/Incheon,37.5665,126.978,South Korea,17500000,1049,16700,4
4,Mexico City,23.6345,-102.5528,Mexico,17400000,2072,8400,5


### From Excel File

In [None]:
file = 'https://github.com/FTDS-learning-materials/phase-0/raw/main/src/data_penjualan.xlsx'
sales = pd.read_excel(file)
sales.head()

Unnamed: 0,Bulan,Penjualan
0,Januari,1000
1,Februari,200
2,Maret,5000
3,April,654
4,Mei,1200


Ingatlah bahwa file excel dapat memiliki lebih dari satu lembar (sheet). Jadi, bagaimana cara memeriksa lembar-lembar dalam file excel?

In [None]:
pd.ExcelFile(file).sheet_names

['2020', '2021']

Untuk membuka lembar tertentu, Anda dapat menambahkan `sheet_name` argumen saat menggunakan `pd.read_excel`. Perhatikan bahwa secara default, `pd.read_excel` akan membuka lembar pertama.

In [None]:
sales = pd.read_excel(file,sheet_name='2021')
sales.head()

Unnamed: 0,Bulan,Penjualan
0,Januari,1200
1,Februari,1500
2,Maret,4000
3,April,7600
4,Mei,1200


# Latihan - Dataset Superstore

Pada latihan ini kita akan melakukan explorasi dengan dataset superstore yang didapatkan dari Kaggle. Berikut adalah link datasetnya: https://www.kaggle.com/datasets/vivek468/superstore-dataset-final/data

In [None]:
#Buat folder di kaggle root
!mkdir ~/.kaggle
# copy-paster kaggle.json ke folder kaggle
# !cp '/content/drive/MyDrive/DTS Resources/kaggle/kaggle.json' ~/.kaggle

# !chmod 600 ~/.kaggle/kaggle.json #perizinan akses, hanya pemilik file yang memiliki akses baca dan tulis ke file
# !ls ~/.kaggle #cek list data yang ada di folder kaggle

In [None]:
!kaggle datasets download vivek468/superstore-dataset-final

Dataset URL: https://www.kaggle.com/datasets/vivek468/superstore-dataset-final
License(s): other
Downloading superstore-dataset-final.zip to /content
100% 550k/550k [00:00<00:00, 956kB/s]
100% 550k/550k [00:00<00:00, 955kB/s]


In [None]:
!unzip superstore-dataset-final.zip

Archive:  superstore-dataset-final.zip
  inflating: Sample - Superstore.csv  


In [None]:
# read csv
df = pd.read_csv('Sample - Superstore.csv')
df.head()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 2944: invalid start byte

Di sini kita terdapat eror karena masalah encoding yang digunakan bukan `utf-8` maka kita butuh menyesuaikannya terlebih dahulu

In [None]:
!pip -q install chardet #library untuk cek unicode



In [None]:
import chardet
# membaca file dan menentukan encoding-nya
file_path= 'Sample - Superstore.csv'
with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())
    encoding = result['encoding']

# menampilkan hasil encoding
print('Encoding file:', encoding)

Encoding file: Windows-1252


In [None]:
# read csv
df =  pd.read_csv(file_path, encoding='Windows-1252')
df.head(2)

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Row ID         9994 non-null   int64  
 1   Order ID       9994 non-null   object 
 2   Order Date     9994 non-null   object 
 3   Ship Date      9994 non-null   object 
 4   Ship Mode      9994 non-null   object 
 5   Customer ID    9994 non-null   object 
 6   Customer Name  9994 non-null   object 
 7   Segment        9994 non-null   object 
 8   Country        9994 non-null   object 
 9   City           9994 non-null   object 
 10  State          9994 non-null   object 
 11  Postal Code    9994 non-null   int64  
 12  Region         9994 non-null   object 
 13  Product ID     9994 non-null   object 
 14  Category       9994 non-null   object 
 15  Sub-Category   9994 non-null   object 
 16  Product Name   9994 non-null   object 
 17  Sales          9994 non-null   float64
 18  Quantity

## Rename, Set Index, Reset Index

In [None]:
# ubah nama kolom
df.rename(columns={'Row ID': 'row_id',
                   'Order ID': 'order_id',
                   'Order Date': 'order_date'}
          , inplace=True)
df.head(2)

Unnamed: 0,row_id,order_id,order_date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582


In [None]:
# ubah semua nama kolom agar lower case dan tanpa spasi
df.columns = df.columns.str.lower().str.replace(' ', '_')

#replace juga tanda '-' menjadi '_'
df.columns = df.columns.str.replace('-', '_')
df.head(2)

Unnamed: 0,row_id,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,...,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582


In [None]:
# set row_id sebagi index
df.set_index('row_id', inplace=True)
df.head(2)

Unnamed: 0_level_0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582


In [None]:
#drop original index dan simpan hasilnya ke variabel df_re
df_re = df.reset_index(drop=True)
df_re.head()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [None]:
#Ubah objek df yg asli dengan inplace+True tanpa buat df baru
df.reset_index(inplace=True, drop=True)
df.head()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


## Slicing - Akses baris dan Kolom

In [None]:
df['city'] #single column

Unnamed: 0,city
0,Henderson
1,Henderson
2,Los Angeles
3,Fort Lauderdale
4,Fort Lauderdale
...,...
9989,Miami
9990,Costa Mesa
9991,Costa Mesa
9992,Costa Mesa


In [None]:
df[['country','city']] #multiple columns

Unnamed: 0,country,city
0,United States,Henderson
1,United States,Henderson
2,United States,Los Angeles
3,United States,Fort Lauderdale
4,United States,Fort Lauderdale
...,...,...
9989,United States,Miami
9990,United States,Costa Mesa
9991,United States,Costa Mesa
9992,United States,Costa Mesa


Akses baris dan kolom dengan iloc dan loc:

- `iloc`: iloc adalah singkatan dari "integer location" dan digunakan untuk mengakses data berdasarkan indeks baris dan kolom dalam DataFrame atau Series secara eksplisit menggunakan indeks integer. Indeks dimulai dari angka 0 untuk baris/kolom pertama.

- `loc`: loc adalah singkatan dari "label location" dan digunakan untuk mengakses data berdasarkan label baris dan kolom dalam DataFrame atau Series. Ini berarti Anda dapat menggunakan label yang ditetapkan untuk baris/kolom untuk mengakses elemen yang sesuai.

In [None]:
df.head(2)

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582


In [None]:
# Mengakses elemen pada baris index ke-0, kolom index ke-1
print(df.iloc[0, 1])
print(df.loc[0,'order_date'])

11/8/2016
11/8/2016


In [None]:
# mengakses kolom order_id hingga ship_date menggunakan loc
df.loc[:,'order_id':'ship_date'].head()

Unnamed: 0,order_id,order_date,ship_date
0,CA-2016-152156,11/8/2016,11/11/2016
1,CA-2016-152156,11/8/2016,11/11/2016
2,CA-2016-138688,6/12/2016,6/16/2016
3,US-2015-108966,10/11/2015,10/18/2015
4,US-2015-108966,10/11/2015,10/18/2015


In [None]:
# mengakses kolom order_id hingga ship_date menggunakan iloc
df.iloc[:,0:3].head()

Unnamed: 0,order_id,order_date,ship_date
0,CA-2016-152156,11/8/2016,11/11/2016
1,CA-2016-152156,11/8/2016,11/11/2016
2,CA-2016-138688,6/12/2016,6/16/2016
3,US-2015-108966,10/11/2015,10/18/2015
4,US-2015-108966,10/11/2015,10/18/2015


In [None]:
df.iloc[:5,] #baris ke 1-5 (index 0-4), kolom index awal-akhir

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [None]:
df.loc[:4,]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


Kita juga dapat menggunakan `np.r_` untuk menggabungkan dua daerah indeks, seperti contoh berikut ini:

In [None]:
#baris index 1-2 dan kolom index 0-1 dan kolom index 4-6
df.iloc[1:3, np.r_[0:2, 4:7]]

Unnamed: 0,order_id,order_date,customer_id,customer_name,segment
1,CA-2016-152156,11/8/2016,CG-12520,Claire Gute,Consumer
2,CA-2016-138688,6/12/2016,DV-13045,Darrin Van Huff,Corporate


In [None]:
df_new= df.set_index('segment')
df_new.head()

Unnamed: 0_level_0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Consumer,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
Consumer,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
Corporate,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
Consumer,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
Consumer,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [None]:
df_new.index.unique()

Index(['Consumer', 'Corporate', 'Home Office'], dtype='object', name='segment')

menggunakan `loc` akan memudahkan kita jika index yang digunakan dalam bentuk label atau nama.

In [None]:
df_new.loc['Corporate'].head()

Unnamed: 0_level_0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Corporate,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
Corporate,CA-2016-137330,12/9/2016,12/13/2016,Standard Class,KB-16585,Ken Black,United States,Fremont,Nebraska,68025,Central,OFF-AR-10000246,Office Supplies,Art,Newell 318,19.46,7,0.0,5.0596
Corporate,CA-2016-137330,12/9/2016,12/13/2016,Standard Class,KB-16585,Ken Black,United States,Fremont,Nebraska,68025,Central,OFF-AP-10001492,Office Supplies,Appliances,"Acco Six-Outlet Power Strip, 4' Cord Length",60.34,7,0.0,15.6884
Corporate,CA-2016-117590,12/8/2016,12/10/2016,First Class,GH-14485,Gene Hale,United States,Richardson,Texas,75080,Central,TEC-PH-10004977,Technology,Phones,GE 30524EE4,1097.544,7,0.2,123.4737
Corporate,CA-2016-117590,12/8/2016,12/10/2016,First Class,GH-14485,Gene Hale,United States,Richardson,Texas,75080,Central,FUR-FU-10003664,Furniture,Furnishings,"Electrix Architect's Clamp-On Swing Arm Lamp, ...",190.92,5,0.6,-147.963


## Query/Filtering

In [None]:
# cek customer_name `Darrin Van Huff`
df[df['customer_name'] == 'Darrin Van Huff']

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
2976,CA-2017-139773,12/4/2017,12/4/2017,Same Day,DV-13045,Darrin Van Huff,Corporate,United States,Philadelphia,Pennsylvania,19143,East,FUR-CH-10001797,Furniture,Chairs,"Safco Chair Connectors, 6/Carton",188.552,7,0.3,-2.6936
5298,CA-2016-106656,9/23/2016,9/26/2016,First Class,DV-13045,Darrin Van Huff,Corporate,United States,San Diego,California,92037,West,OFF-EN-10001509,Office Supplies,Envelopes,Poly String Tie Envelopes,4.08,2,0.0,1.9176
5299,CA-2016-106656,9/23/2016,9/26/2016,First Class,DV-13045,Darrin Van Huff,Corporate,United States,San Diego,California,92037,West,OFF-LA-10003148,Office Supplies,Labels,Avery 51,18.9,3,0.0,8.694
5617,CA-2017-125745,6/30/2017,7/4/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Memphis,Tennessee,38109,South,FUR-FU-10001591,Furniture,Furnishings,Advantus Panel Wall Certificate Holder - 8.5x11,19.52,2,0.2,5.368
7344,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,FUR-TA-10004289,Furniture,Tables,BoxOffice By Design Rectangular and Half-Moon ...,721.875,6,0.45,-420.0
7345,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,TEC-PH-10003555,Technology,Phones,Motorola HK250 Universal Bluetooth Headset,73.568,4,0.2,-16.5528
7346,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,OFF-AR-10001958,Office Supplies,Art,Stanley Bostitch Contemporary Electric Pencil ...,13.584,1,0.2,1.3584
7347,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,FUR-CH-10000225,Furniture,Chairs,"Global Geo Office Task Chair, Gray",64.784,1,0.2,-12.147


In [None]:
# cek customer_name `Darrin Van Huff` dengan .query
df.query('customer_name == "Darrin Van Huff"')

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
2976,CA-2017-139773,12/4/2017,12/4/2017,Same Day,DV-13045,Darrin Van Huff,Corporate,United States,Philadelphia,Pennsylvania,19143,East,FUR-CH-10001797,Furniture,Chairs,"Safco Chair Connectors, 6/Carton",188.552,7,0.3,-2.6936
5298,CA-2016-106656,9/23/2016,9/26/2016,First Class,DV-13045,Darrin Van Huff,Corporate,United States,San Diego,California,92037,West,OFF-EN-10001509,Office Supplies,Envelopes,Poly String Tie Envelopes,4.08,2,0.0,1.9176
5299,CA-2016-106656,9/23/2016,9/26/2016,First Class,DV-13045,Darrin Van Huff,Corporate,United States,San Diego,California,92037,West,OFF-LA-10003148,Office Supplies,Labels,Avery 51,18.9,3,0.0,8.694
5617,CA-2017-125745,6/30/2017,7/4/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Memphis,Tennessee,38109,South,FUR-FU-10001591,Furniture,Furnishings,Advantus Panel Wall Certificate Holder - 8.5x11,19.52,2,0.2,5.368
7344,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,FUR-TA-10004289,Furniture,Tables,BoxOffice By Design Rectangular and Half-Moon ...,721.875,6,0.45,-420.0
7345,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,TEC-PH-10003555,Technology,Phones,Motorola HK250 Universal Bluetooth Headset,73.568,4,0.2,-16.5528
7346,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,OFF-AR-10001958,Office Supplies,Art,Stanley Bostitch Contemporary Electric Pencil ...,13.584,1,0.2,1.3584
7347,CA-2017-168389,12/11/2017,12/17/2017,Standard Class,DV-13045,Darrin Van Huff,Corporate,United States,Jacksonville,Florida,32216,South,FUR-CH-10000225,Furniture,Chairs,"Global Geo Office Task Chair, Gray",64.784,1,0.2,-12.147


In [None]:
# cek profit yang nilainya dibawah 0 atau minus
df[df['profit'] < 0]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
14,US-2015-118983,11/22/2015,11/26/2015,Standard Class,HP-14815,Harold Pawlan,Home Office,United States,Fort Worth,Texas,76106,Central,OFF-AP-10002311,Office Supplies,Appliances,Holmes Replacement Filter for HEPA Air Cleaner...,68.8100,5,0.80,-123.8580
15,US-2015-118983,11/22/2015,11/26/2015,Standard Class,HP-14815,Harold Pawlan,Home Office,United States,Fort Worth,Texas,76106,Central,OFF-BI-10000756,Office Supplies,Binders,Storex DuraTech Recycled Plastic Frosted Binders,2.5440,3,0.80,-3.8160
23,US-2017-156909,7/16/2017,7/18/2017,Second Class,SF-20065,Sandra Flanagan,Consumer,United States,Philadelphia,Pennsylvania,19140,East,FUR-CH-10002774,Furniture,Chairs,"Global Deluxe Stacking Chair, Gray",71.3720,2,0.30,-1.0196
27,US-2015-150630,9/17/2015,9/21/2015,Standard Class,TB-21520,Tracy Blumstein,Consumer,United States,Philadelphia,Pennsylvania,19140,East,FUR-BO-10004834,Furniture,Bookcases,"Riverside Palais Royal Lawyers Bookcase, Royal...",3083.4300,7,0.50,-1665.0522
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9920,CA-2016-149272,3/15/2016,3/19/2016,Standard Class,MY-18295,Muhammed Yedwab,Corporate,United States,Bryan,Texas,77803,Central,OFF-BI-10004233,Office Supplies,Binders,"GBC Pre-Punched Binding Paper, Plastic, White,...",22.3860,7,0.80,-35.8176
9921,CA-2014-111360,11/24/2014,11/30/2014,Standard Class,AT-10435,Alyssa Tate,Home Office,United States,Akron,Ohio,44312,East,OFF-BI-10003350,Office Supplies,Binders,Acco Expandable Hanging Binders,5.7420,3,0.70,-4.5936
9931,CA-2015-104948,11/13/2015,11/17/2015,Standard Class,KH-16510,Keith Herrera,Consumer,United States,San Bernardino,California,92404,West,FUR-BO-10004357,Furniture,Bookcases,O'Sullivan Living Dimensions 3-Shelf Bookcases,683.3320,4,0.15,-40.1960
9937,CA-2016-164889,6/3/2016,6/6/2016,Second Class,CP-12340,Christine Phan,Corporate,United States,Los Angeles,California,90049,West,FUR-TA-10001676,Furniture,Tables,Hon 61000 Series Interactive Training Tables,71.0880,2,0.20,-1.7772


In [None]:
# cek profit yang nilainya dibawah 0 dan statenya Texas
df[(df['profit'] < 0) & (df['state'] == 'Texas')]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
14,US-2015-118983,11/22/2015,11/26/2015,Standard Class,HP-14815,Harold Pawlan,Home Office,United States,Fort Worth,Texas,76106,Central,OFF-AP-10002311,Office Supplies,Appliances,Holmes Replacement Filter for HEPA Air Cleaner...,68.8100,5,0.80,-123.8580
15,US-2015-118983,11/22/2015,11/26/2015,Standard Class,HP-14815,Harold Pawlan,Home Office,United States,Fort Worth,Texas,76106,Central,OFF-BI-10000756,Office Supplies,Binders,Storex DuraTech Recycled Plastic Frosted Binders,2.5440,3,0.80,-3.8160
36,CA-2016-117590,12/8/2016,12/10/2016,First Class,GH-14485,Gene Hale,Corporate,United States,Richardson,Texas,75080,Central,FUR-FU-10003664,Furniture,Furnishings,"Electrix Architect's Clamp-On Swing Arm Lamp, ...",190.9200,5,0.60,-147.9630
38,CA-2015-117415,12/27/2015,12/31/2015,Standard Class,SN-20710,Steve Nguyen,Home Office,United States,Houston,Texas,77041,Central,FUR-BO-10002545,Furniture,Bookcases,"Atlantic Metals Mobile 3-Shelf Bookcases, Cust...",532.3992,3,0.32,-46.9764
39,CA-2015-117415,12/27/2015,12/31/2015,Standard Class,SN-20710,Steve Nguyen,Home Office,United States,Houston,Texas,77041,Central,FUR-CH-10004218,Furniture,Chairs,"Global Fabric Manager's Chair, Dark Gray",212.0580,3,0.30,-15.1470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9835,CA-2016-126627,10/10/2016,10/12/2016,First Class,WB-21850,William Brown,Consumer,United States,La Porte,Texas,77571,Central,OFF-BI-10001597,Office Supplies,Binders,"Wilson Jones Ledger-Size, Piano-Hinge Binder, ...",16.3920,2,0.80,-26.2272
9903,CA-2014-122609,11/12/2014,11/18/2014,Standard Class,DP-13000,Darren Powers,Consumer,United States,Carrollton,Texas,75007,Central,FUR-FU-10004587,Furniture,Furnishings,"GE General Use Halogen Bulbs, 100 Watts, 1 Bul...",25.1280,3,0.60,-6.9102
9919,CA-2016-149272,3/15/2016,3/19/2016,Standard Class,MY-18295,Muhammed Yedwab,Corporate,United States,Bryan,Texas,77803,Central,FUR-CH-10000863,Furniture,Chairs,Novimex Swivel Fabric Task Chair,528.4300,5,0.30,-143.4310
9920,CA-2016-149272,3/15/2016,3/19/2016,Standard Class,MY-18295,Muhammed Yedwab,Corporate,United States,Bryan,Texas,77803,Central,OFF-BI-10004233,Office Supplies,Binders,"GBC Pre-Punched Binding Paper, Plastic, White,...",22.3860,7,0.80,-35.8176


In [None]:
# cek state yang berada di texas dan california
df[df['state'].isin(['Texas', 'California'])]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.620,2,0.0,6.8714
5,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.860,7,0.0,14.1694
6,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.280,4,0.0,1.9656
7,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,TEC-PH-10002275,Technology,Phones,Mitel 5320 IP Phone VoIP phone,907.152,6,0.2,90.7152
8,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-BI-10003910,Office Supplies,Binders,DXL Angle-View Binders with Locking Rings by S...,18.504,3,0.2,5.7825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9986,CA-2016-125794,9/29/2016,10/3/2016,Standard Class,ML-17410,Maris LaWare,Consumer,United States,Los Angeles,California,90008,West,TEC-AC-10003399,Technology,Accessories,Memorex Mini Travel Drive 64 GB USB 2.0 Flash ...,36.240,1,0.0,15.2208
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.960,2,0.0,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.600,4,0.0,13.3200


In [None]:
# cek state yang berada di texas dan california dengan query
df.query('state in ["Texas", "California"]')

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.620,2,0.0,6.8714
5,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.860,7,0.0,14.1694
6,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.280,4,0.0,1.9656
7,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,TEC-PH-10002275,Technology,Phones,Mitel 5320 IP Phone VoIP phone,907.152,6,0.2,90.7152
8,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-BI-10003910,Office Supplies,Binders,DXL Angle-View Binders with Locking Rings by S...,18.504,3,0.2,5.7825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9986,CA-2016-125794,9/29/2016,10/3/2016,Standard Class,ML-17410,Maris LaWare,Consumer,United States,Los Angeles,California,90008,West,TEC-AC-10003399,Technology,Accessories,Memorex Mini Travel Drive 64 GB USB 2.0 Flash ...,36.240,1,0.0,15.2208
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.960,2,0.0,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.600,4,0.0,13.3200


In [None]:
# cek state yang berada di texas dan california dengan query
list_state= ['Texas', 'California']
df.query('state in @list_state')

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.620,2,0.0,6.8714
5,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.860,7,0.0,14.1694
6,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.280,4,0.0,1.9656
7,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,TEC-PH-10002275,Technology,Phones,Mitel 5320 IP Phone VoIP phone,907.152,6,0.2,90.7152
8,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-BI-10003910,Office Supplies,Binders,DXL Angle-View Binders with Locking Rings by S...,18.504,3,0.2,5.7825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9986,CA-2016-125794,9/29/2016,10/3/2016,Standard Class,ML-17410,Maris LaWare,Consumer,United States,Los Angeles,California,90008,West,TEC-AC-10003399,Technology,Accessories,Memorex Mini Travel Drive 64 GB USB 2.0 Flash ...,36.240,1,0.0,15.2208
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.960,2,0.0,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.600,4,0.0,13.3200


In [None]:
# cek state yang berada selain  texas dan california
df[~df['state'].isin(['Texas', 'California'])]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.9600,2,0.00,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.3680,2,0.20,2.5164
12,CA-2017-114412,4/15/2017,4/20/2017,Standard Class,AA-10480,Andrew Allen,Consumer,United States,Concord,North Carolina,28027,South,OFF-PA-10002365,Office Supplies,Paper,Xerox 1967,15.5520,3,0.20,5.4432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9984,CA-2015-100251,5/17/2015,5/23/2015,Standard Class,DV-13465,Dianna Vittorini,Consumer,United States,Long Beach,New York,11561,East,OFF-LA-10003766,Office Supplies,Labels,Self-Adhesive Removable Labels,31.5000,10,0.00,15.1200
9985,CA-2015-100251,5/17/2015,5/23/2015,Standard Class,DV-13465,Dianna Vittorini,Consumer,United States,Long Beach,New York,11561,East,OFF-SU-10000898,Office Supplies,Supplies,Acme Hot Forged Carbon Steel Scissors with Nic...,55.6000,4,0.00,16.1240
9987,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-AC-10001539,Technology,Accessories,Logitech G430 Surround Sound Gaming Headset wi...,79.9900,1,0.00,28.7964
9988,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-PH-10004006,Technology,Phones,Panasonic KX - TS880B Telephone,206.1000,5,0.00,55.6470


In [None]:
# cek state yang berada selain texas dan california  dengan query
df.query('state not in ["Texas", "California"]')

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.9600,2,0.00,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.3680,2,0.20,2.5164
12,CA-2017-114412,4/15/2017,4/20/2017,Standard Class,AA-10480,Andrew Allen,Consumer,United States,Concord,North Carolina,28027,South,OFF-PA-10002365,Office Supplies,Paper,Xerox 1967,15.5520,3,0.20,5.4432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9984,CA-2015-100251,5/17/2015,5/23/2015,Standard Class,DV-13465,Dianna Vittorini,Consumer,United States,Long Beach,New York,11561,East,OFF-LA-10003766,Office Supplies,Labels,Self-Adhesive Removable Labels,31.5000,10,0.00,15.1200
9985,CA-2015-100251,5/17/2015,5/23/2015,Standard Class,DV-13465,Dianna Vittorini,Consumer,United States,Long Beach,New York,11561,East,OFF-SU-10000898,Office Supplies,Supplies,Acme Hot Forged Carbon Steel Scissors with Nic...,55.6000,4,0.00,16.1240
9987,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-AC-10001539,Technology,Accessories,Logitech G430 Surround Sound Gaming Headset wi...,79.9900,1,0.00,28.7964
9988,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-PH-10004006,Technology,Phones,Panasonic KX - TS880B Telephone,206.1000,5,0.00,55.6470


In [None]:
# cek quantity yang berjumlah antar 3 hingga 7
# df[(df['quantity'] >= 3) & (df['quantity'] <= 7)]
df[df['quantity'].between(3,7)]

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
5,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.8600,7,0.00,14.1694
6,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.2800,4,0.00,1.9656
7,CA-2014-115812,6/9/2014,6/14/2014,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,TEC-PH-10002275,Technology,Phones,Mitel 5320 IP Phone VoIP phone,907.1520,6,0.20,90.7152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9982,US-2016-157728,9/22/2016,9/28/2016,Standard Class,RC-19960,Ryan Crowe,Consumer,United States,Grand Rapids,Michigan,49505,Central,OFF-PA-10002195,Office Supplies,Paper,"RSVP Cards & Envelopes, Blank White, 8-1/2"" X ...",35.5600,7,0.00,16.7132
9985,CA-2015-100251,5/17/2015,5/23/2015,Standard Class,DV-13465,Dianna Vittorini,Consumer,United States,Long Beach,New York,11561,East,OFF-SU-10000898,Office Supplies,Supplies,Acme Hot Forged Carbon Steel Scissors with Nic...,55.6000,4,0.00,16.1240
9988,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-PH-10004006,Technology,Phones,Panasonic KX - TS880B Telephone,206.1000,5,0.00,55.6470
9989,CA-2014-110422,1/21/2014,1/23/2014,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,Florida,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.2480,3,0.20,4.1028


## Menambahkan Kolom dan menghapus kolom

Anggaplah kolom `sales` adalah harga total sebelum discount, maka kita dapat menghitung harga per unit dan membuat kolom baru.

In [None]:
# buatlah kolom unit_price
df['unit_price'] = df['sales'] / df['quantity']
df.head()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,...,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit,unit_price
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,...,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136,130.98
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,...,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582,243.98
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,...,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714,7.31
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,...,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031,191.5155
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,...,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164,11.184


In [None]:
# hapus kolom unit_price
df.drop(columns=['unit_price'], inplace=True)
df.head()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


## Menambahkan baris dan menghapus baris

In [None]:
# cek jumlah baris sebelum ditambahkan
df.shape

(9994, 20)

In [None]:
# misalkan kita ambil sample acak
data_dict= df.sample().to_dict(orient='list')
data_dict

{'order_id': ['CA-2016-143805'],
 'order_date': ['12/1/2016'],
 'ship_date': ['12/3/2016'],
 'ship_mode': ['Second Class'],
 'customer_id': ['JD-15895'],
 'customer_name': ['Jonathan Doherty'],
 'segment': ['Corporate'],
 'country': ['United States'],
 'city': ['Richmond'],
 'state': ['Virginia'],
 'postal_code': [23223],
 'region': ['South'],
 'product_id': ['OFF-AP-10002945'],
 'category': ['Office Supplies'],
 'sub_category': ['Appliances'],
 'product_name': ["Honeywell Enviracaire Portable HEPA Air Cleaner for 17' x 22' Room"],
 'sales': [2104.55],
 'quantity': [7],
 'discount': [0.0],
 'profit': [694.5015]}

In [None]:
new_data= pd.DataFrame(data_dict)
new_data

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-143805,12/1/2016,12/3/2016,Second Class,JD-15895,Jonathan Doherty,Corporate,United States,Richmond,Virginia,23223,South,OFF-AP-10002945,Office Supplies,Appliances,Honeywell Enviracaire Portable HEPA Air Cleane...,2104.55,7,0.0,694.5015


In [None]:
# kita coba masukan data baru dari data_dict
df.loc[len(df)] = new_data.values[0]
df.tail()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.96,2,0.0,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6,4,0.0,13.32
9993,CA-2017-119914,5/4/2017,5/9/2017,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,California,92683,West,OFF-AP-10002684,Office Supplies,Appliances,"Acco 7-Outlet Masterpiece Power Center, Wihtou...",243.16,2,0.0,72.948
9994,CA-2016-143805,12/1/2016,12/3/2016,Second Class,JD-15895,Jonathan Doherty,Corporate,United States,Richmond,Virginia,23223,South,OFF-AP-10002945,Office Supplies,Appliances,Honeywell Enviracaire Portable HEPA Air Cleane...,2104.55,7,0.0,694.5015


In [None]:
df.shape

(9995, 20)

In [None]:
# hapus 1 baris terakhir
df.drop(df.tail(1).index, inplace=True)
df.tail()

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
9989,CA-2014-110422,1/21/2014,1/23/2014,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,Florida,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.248,3,0.2,4.1028
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.96,2,0.0,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6,4,0.0,13.32
9993,CA-2017-119914,5/4/2017,5/9/2017,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,California,92683,West,OFF-AP-10002684,Office Supplies,Appliances,"Acco 7-Outlet Masterpiece Power Center, Wihtou...",243.16,2,0.0,72.948


In [None]:
df.drop(index=9993)

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,state,postal_code,region,product_id,category,sub_category,product_name,sales,quantity,discount,profit
0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.9600,2,0.00,41.9136
1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
2,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.6200,2,0.00,6.8714
3,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.3680,2,0.20,2.5164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9988,CA-2017-163629,11/17/2017,11/21/2017,Standard Class,RA-19885,Ruben Ausman,Corporate,United States,Athens,Georgia,30605,South,TEC-PH-10004006,Technology,Phones,Panasonic KX - TS880B Telephone,206.1000,5,0.00,55.6470
9989,CA-2014-110422,1/21/2014,1/23/2014,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,Florida,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.2480,3,0.20,4.1028
9990,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.9600,2,0.00,15.6332
9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,California,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.5760,2,0.20,19.3932


## Grouping & Aggregation

In [None]:
# melihat rata-rata profit berdasarkan segment.
df.groupby('segment')['profit'].mean()

Unnamed: 0_level_0,profit
segment,Unnamed: 1_level_1
Consumer,25.836873
Corporate,30.456667
Home Office,33.818664


In [None]:
# melihat rata-rata profit berdasarkan segment menggunakan .agg
df.groupby('segment').agg({'profit': 'mean'})

Unnamed: 0_level_0,profit
segment,Unnamed: 1_level_1
Consumer,25.836873
Corporate,30.456667
Home Office,33.818664


In [None]:
# rata-rata dan median profit berdasarkan segment
df.groupby('segment').agg({'profit': ['mean', 'median']})

Unnamed: 0_level_0,profit,profit
Unnamed: 0_level_1,mean,median
segment,Unnamed: 1_level_2,Unnamed: 2_level_2
Consumer,25.836873,8.3754
Corporate,30.456667,8.694
Home Office,33.818664,9.102


In [None]:
# melihat jumlah nilai unik pada segment dan rata-rata profitnya
df.groupby('segment').agg({'profit': ['nunique', 'mean']})

Unnamed: 0_level_0,profit,profit
Unnamed: 0_level_1,nunique,mean
segment,Unnamed: 1_level_2,Unnamed: 2_level_2
Consumer,4273,25.836873
Corporate,2644,30.456667
Home Office,1610,33.818664


In [None]:
# rata-rata profit berdasarkan segment dan city
df.groupby(['segment', 'city'])['profit'].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,profit
segment,city,Unnamed: 2_level_1
Consumer,Aberdeen,6.630000
Consumer,Abilene,-3.758400
Consumer,Akron,-11.610200
Consumer,Albuquerque,9.628450
Consumer,Alexandria,10.528933
...,...,...
Home Office,Wilmington,133.970367
Home Office,Wilson,-5.412000
Home Office,Woodstock,0.910000
Home Office,Yonkers,47.312600


**Sekarang cobalah lakukan proses untuk "Mencari city (kota) yang memiliki profit paling tinggi pada setiap segment."**