In [2]:
import pandas as pd
import math

# Load dataset
file_path = "../data/bank-additional-full.csv"
df = pd.read_csv(file_path, sep=';')

# Tarif tertinggi per menit
tarif_per_menit = 0.30

# Fungsi hitung biaya panggilan
def hitung_biaya(row):
    if row['contact'] == 'unknown' or row['duration'] == 0:
        return 0.0
    else:
        menit = math.ceil(row['duration'] / 60)
        return round(menit * tarif_per_menit, 2)

# Tambahkan kolom biaya panggilan
df['call_fee'] = df.apply(hitung_biaya, axis=1)

# Simpan ke file baru (opsional)
df.to_csv("bank-additional-biaya.csv", index=False, sep=';')

# Tampilkan contoh
print(df[['contact', 'duration', 'call_fee']].head(10))


     contact  duration  call_fee
0  telephone       261       1.5
1  telephone       149       0.9
2  telephone       226       1.2
3  telephone       151       0.9
4  telephone       307       1.8
5  telephone       198       1.2
6  telephone       139       0.9
7  telephone       217       1.2
8  telephone       380       2.1
9  telephone        50       0.3


In [21]:
import pandas as pd
import math

# Load dataset
file_path = "../data/bank-additional-full.csv"
df = pd.read_csv(file_path, sep=';')

# Tarif per menit
tarif_telephone = 0.10   # landline
tarif_cellular = 0.30    # mobile

# Fungsi hitung biaya panggilan satu kolom
def hitung_call_fee(row):
    if row['duration'] == 0 or row['contact'] == 'unknown':
        return 0.0
    menit = math.ceil(row['duration'] / 60)
    if row['contact'] == 'telephone':
        return round(menit * tarif_telephone, 2)
    elif row['contact'] == 'cellular':
        return round(menit * tarif_cellular, 2)
    else:
        return 0.0

def hitung_call_fee_cut_off(row):
    if row['duration_cut_off'] == 0 or row['contact'] == 'unknown':
        return 0.0
    menit = math.ceil(row['duration_cut_off'] / 60)
    if row['contact'] == 'telephone':
        return round(menit * tarif_telephone, 2)
    elif row['contact'] == 'cellular':
        return round(menit * tarif_cellular, 2)
    else:
        return 0.0

# Tambahkan kolom biaya panggilan tunggal
df['call_fee'] = df.apply(hitung_call_fee, axis=1)

# Simpan ke file baru (opsional)
df.to_csv("../data/revision/bank-additional-call-fee.csv", index=False, sep=';')

# Tampilkan contoh
print(df[['contact', 'duration', 'call_fee']].head(10))


     contact  duration  call_fee
0  telephone       261       0.5
1  telephone       149       0.3
2  telephone       226       0.4
3  telephone       151       0.3
4  telephone       307       0.6
5  telephone       198       0.4
6  telephone       139       0.3
7  telephone       217       0.4
8  telephone       380       0.7
9  telephone        50       0.1


In [22]:
def calc_duration_cutoff(row):
    if row['y'] == 'yes':
        return row['duration'] if row['duration'] < 300 else 300
    else:  # y == 'no'
        if row['duration'] <= 60:
            return 60
        elif row['duration'] <= 120:
            return 90
        elif row['duration'] <= 170:
            return 120
        else:
            return 170


In [23]:
df['duration_cut_off'] = df.apply(calc_duration_cutoff, axis=1)

In [24]:
print(df[['contact', 'duration', 'call_fee', 'duration_cut_off']].head(10))

     contact  duration  call_fee  duration_cut_off
0  telephone       261       0.5               170
1  telephone       149       0.3               120
2  telephone       226       0.4               170
3  telephone       151       0.3               120
4  telephone       307       0.6               170
5  telephone       198       0.4               170
6  telephone       139       0.3               120
7  telephone       217       0.4               170
8  telephone       380       0.7               170
9  telephone        50       0.1                60


In [28]:
# Tambahkan kolom biaya panggilan tunggal
df['call_fee_cut_off'] = df.apply(hitung_call_fee_cut_off, axis=1)

# Simpan ke file baru (opsional)
df.to_csv("../data/revision/bank-additional-call-fee.csv", index=False, sep=';')

# Tampilkan contoh
print(df[['contact', 'duration', 'call_fee', 'duration_cut_off', 'call_fee_cut_off']].head(10))

     contact  duration  call_fee  duration_cut_off  call_fee_cut_off
0  telephone       261       0.5               170               0.3
1  telephone       149       0.3               120               0.2
2  telephone       226       0.4               170               0.3
3  telephone       151       0.3               120               0.2
4  telephone       307       0.6               170               0.3
5  telephone       198       0.4               170               0.3
6  telephone       139       0.3               120               0.2
7  telephone       217       0.4               170               0.3
8  telephone       380       0.7               170               0.3
9  telephone        50       0.1                60               0.1


In [29]:
# Tampilkan baris di mana call_fee dan call_fee_cut_off berbeda
beda_fee = df[df['call_fee'] != df['call_fee_cut_off']]

# Tampilkan hasil
print(beda_fee[['contact', 'duration', 'duration_cut_off', 'call_fee', 'call_fee_cut_off']].head(10))

      contact  duration  duration_cut_off  call_fee  call_fee_cut_off
0   telephone       261               170       0.5               0.3
1   telephone       149               120       0.3               0.2
2   telephone       226               170       0.4               0.3
3   telephone       151               120       0.3               0.2
4   telephone       307               170       0.6               0.3
5   telephone       198               170       0.4               0.3
6   telephone       139               120       0.3               0.2
7   telephone       217               170       0.4               0.3
8   telephone       380               170       0.7               0.3
11  telephone       222               170       0.4               0.3


In [30]:
# Hitung total biaya call_fee dan call_fee_cut_off
total_call_fee = df['call_fee'].sum()
total_call_fee_cut_off = df['call_fee_cut_off'].sum()

# Tampilkan hasil
print(f"Total biaya aktual (call_fee): €{total_call_fee:,.2f}")
print(f"Total biaya setelah cut-off (call_fee_cut_off): €{total_call_fee_cut_off:,.2f}")
print(f"Penghematan biaya: €{(total_call_fee - total_call_fee_cut_off):,.2f}")


Total biaya aktual (call_fee): €45,359.00
Total biaya setelah cut-off (call_fee_cut_off): €24,857.60
Penghematan biaya: €20,501.40


In [31]:
# Hitung total durasi aktual dan cut-off (dalam detik)
total_duration = df['duration'].sum()
total_duration_cutoff = df['duration_cut_off'].sum()

# Tampilkan hasil dalam detik dan juga menit
print(f"Total durasi aktual (duration): {total_duration:,} detik ({total_duration/60:,.2f} menit)")
print(f"Total durasi setelah cut-off (duration_cut_off): {total_duration_cutoff:,} detik ({total_duration_cutoff/60:,.2f} menit)")
print(f"Penghematan waktu: {total_duration - total_duration_cutoff:,} detik ({(total_duration - total_duration_cutoff)/60:,.2f} menit)")


Total durasi aktual (duration): 10,638,243 detik (177,304.05 menit)
Total durasi setelah cut-off (duration_cut_off): 5,991,325 detik (99,855.42 menit)
Penghematan waktu: 4,646,918 detik (77,448.63 menit)
