<a href="https://colab.research.google.com/github/rmhyps1/statistics/blob/main/TUGAS4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def P_AgB(A, B, S):
    """
    Menghitung peluang kejadian A dengan syarat B (Conditional Probability)

    Parameters:
    A : set - Himpunan kejadian A
    B : set - Himpunan kejadian B (syarat)
    S : set - Ruang sampel (sample space)

    Returns:
    float - Peluang A dengan syarat B (P(A|B))
    """

    P_B = len(B) / len(S)

    if P_B == 0:
        print("P(B) = 0, tidak dapat menghitung P(A|B)")
        return None

    A_intersect_B = A & B

    P_AB = len(A_intersect_B) / len(S)

    P_AgB_result = P_AB / P_B

    return P_AgB_result

print("=" * 60)
print("ALGORITMA PELUANG BERSYARAT (Conditional Probability)")
print("=" * 60)

print("\CONTOH 1: Pelemparan Dadu")
print("-" * 60)

S_dadu = {1, 2, 3, 4, 5, 6}

A_genap = {2, 4, 6}

B_lebih3 = {4, 5, 6}

print(f"Ruang Sampel S = {S_dadu}")
print(f"A (angka genap) = {A_genap}")
print(f"B (angka > 3) = {B_lebih3}")

P_result_1 = P_AgB(A_genap, B_lebih3, S_dadu)
print(f"\n✓ P(A|B) = P(genap | lebih dari 3) = {P_result_1:.4f}")
print(f"✓ Dalam fraksi: {len(A_genap & B_lebih3)}/{len(B_lebih3)}")

print("\n" + "=" * 60)
print("CONTOH 2: Pengambilan Kartu Bridge")
print("-" * 60)

S_kartu = set(range(1, 53))

A_hati = set(range(1, 14))

B_angka = set(range(1, 41))

print(f"Total kartu S = {len(S_kartu)}")
print(f"A (kartu hati) = {len(A_hati)} kartu")
print(f"B (kartu angka ≤ 10) = {len(B_angka)} kartu")

P_result_2 = P_AgB(A_hati, B_angka, S_kartu)
print(f"\n✓ P(A|B) = P(hati | angka ≤ 10) = {P_result_2:.4f}")
print(f"✓ Dalam fraksi: {len(A_hati & B_angka)}/{len(B_angka)}")

print("\n" + "=" * 60)
print("CONTOH 3: Siswa Lulus Ujian")
print("-" * 60)

S_siswa = set(range(1, 101))

A_lulus = set(range(1, 61))

B_hadir = set(range(1, 81))

print(f"Total siswa S = {len(S_siswa)}")
print(f"A (siswa lulus) = {len(A_lulus)} siswa")
print(f"B (siswa hadir) = {len(B_hadir)} siswa")

P_result_3 = P_AgB(A_lulus, B_hadir, S_siswa)
print(f"\n✓ P(A|B) = P(lulus | hadir) = {P_result_3:.4f}")
print(f"✓ Dalam fraksi: {len(A_lulus & B_hadir)}/{len(B_hadir)}")
print(f"✓ Dalam persen: {P_result_3 * 100:.2f}%")

print("\n" + "=" * 60)

ALGORITMA PELUANG BERSYARAT (Conditional Probability)
\CONTOH 1: Pelemparan Dadu
------------------------------------------------------------
Ruang Sampel S = {1, 2, 3, 4, 5, 6}
A (angka genap) = {2, 4, 6}
B (angka > 3) = {4, 5, 6}

✓ P(A|B) = P(genap | lebih dari 3) = 0.6667
✓ Dalam fraksi: 2/3

CONTOH 2: Pengambilan Kartu Bridge
------------------------------------------------------------
Total kartu S = 52
A (kartu hati) = 13 kartu
B (kartu angka ≤ 10) = 40 kartu

✓ P(A|B) = P(hati | angka ≤ 10) = 0.3250
✓ Dalam fraksi: 13/40

CONTOH 3: Siswa Lulus Ujian
------------------------------------------------------------
Total siswa S = 100
A (siswa lulus) = 60 siswa
B (siswa hadir) = 80 siswa

✓ P(A|B) = P(lulus | hadir) = 0.7500
✓ Dalam fraksi: 60/80
✓ Dalam persen: 75.00%



  print("\CONTOH 1: Pelemparan Dadu")


In [None]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('/content/Most Streamed Spotify Songs 2024.csv', encoding='latin-1')

print("=" * 80)
print("ANALISIS SPOTIFY DENGAN PELUANG BERSYARAT (CONDITIONAL PROBABILITY)")
print("=" * 80)

# Tampilkan info dataset
print(f"\nDataset Info:")
print(f"Total lagu: {len(df)}")

# ===== FUNGSI PELUANG BERSYARAT =====
def P_AgB_df(condition_A, condition_B, data):
    """
    Menghitung P(A|B) dari dataframe
    A: kondisi A (boolean series)
    B: kondisi B (boolean series)
    """
    # P(B)
    P_B = condition_B.sum() / len(data)

    if P_B == 0:
        print("⚠️ P(B) = 0, tidak dapat menghitung P(A|B)")
        return None

    # A ∩ B
    A_and_B = (condition_A & condition_B).sum()

    # P(A|B)
    P_AgB_result = A_and_B / condition_B.sum()

    return P_AgB_result, A_and_B, condition_B.sum()


# ===== ANALISIS 1: P(Popular | Explicit) =====
print("\n" + "=" * 80)
print("\nANALISIS 1: Peluang Populer dengan Syarat Lagu Explicit")
print("=" * 80)

# Definisikan kondisi
popular_threshold = df['Spotify Popularity'].quantile(0.75)
is_explicit = df['Explicit Track'] == 1
is_popular = df['Spotify Popularity'] >= popular_threshold

result = P_AgB_df(is_popular, is_explicit, df)
if result:
    prob, count_both, count_explicit = result
    print(f"\nLagu Explicit yang Popular: {count_both} dari {count_explicit} lagu explicit")
    print(f"✓ P(Popular | Explicit) = {prob:.4f} ({prob*100:.2f}%)")

    # Bandingkan dengan P(Popular)
    P_popular = is_popular.sum() / len(df)
    print(f"✓ P(Popular) = {P_popular:.4f} ({P_popular*100:.2f}%)")
    print(f"→ Lagu explicit {prob/P_popular:.2f}x lebih mungkin popular")


# ===== ANALISIS 2: P(High Streams | High YouTube Views) =====
print("\n" + "=" * 80)
print("\nANALISIS 2: Peluang Spotify Streams Tinggi dengan Syarat YouTube Views Tinggi")
print("=" * 80)

# Bersihkan data (hapus NaN dan konversi ke numeric)
df['Spotify Streams'] = pd.to_numeric(df['Spotify Streams'], errors='coerce')
df['YouTube Views'] = pd.to_numeric(df['YouTube Views'], errors='coerce')

df_clean = df.dropna(subset=['Spotify Streams', 'YouTube Views'])

stream_threshold = df_clean['Spotify Streams'].quantile(0.75)
view_threshold = df_clean['YouTube Views'].quantile(0.75)

high_streams = df_clean['Spotify Streams'] >= stream_threshold
high_views = df_clean['YouTube Views'] >= view_threshold

result2 = P_AgB_df(high_streams, high_views, df_clean)
if result2:
    prob2, count_both2, count_high_views = result2
    print(f"\nLagu dengan High Streams dan High YouTube Views: {count_both2} dari {count_high_views}")
    print(f"✓ P(High Streams | High YouTube Views) = {prob2:.4f} ({prob2*100:.2f}%)")

    P_high_streams = high_streams.sum() / len(df_clean)
    print(f"✓ P(High Streams) = {P_high_streams:.4f} ({P_high_streams*100:.2f}%)")
    print(f"→ Lagu dengan high YouTube views {prob2/P_high_streams:.2f}x lebih mungkin high streams")


# ===== ANALISIS 3: P(TikTok Viral | Explicit) =====
print("\n" + "=" * 80)
print("ANALISIS 3: Peluang Viral di TikTok dengan Syarat Explicit")
print("=" * 80)

df['TikTok Posts'] = pd.to_numeric(df['TikTok Posts'], errors='coerce')
df_tiktok = df.dropna(subset=['TikTok Posts'])

tiktok_threshold = df_tiktok['TikTok Posts'].quantile(0.75)
is_tiktok_viral = df_tiktok['TikTok Posts'] >= tiktok_threshold
is_explicit_tiktok = df_tiktok['Explicit Track'] == 1

result3 = P_AgB_df(is_tiktok_viral, is_explicit_tiktok, df_tiktok)
if result3:
    prob3, count_both3, count_explicit_tiktok = result3
    print(f"\nLagu Explicit yang Viral di TikTok: {count_both3} dari {count_explicit_tiktok}")
    print(f"✓ P(Viral TikTok | Explicit) = {prob3:.4f} ({prob3*100:.2f}%)")

    P_viral = is_tiktok_viral.sum() / len(df_tiktok)
    print(f"✓ P(Viral TikTok) = {P_viral:.4f} ({P_viral*100:.2f}%)")
    print(f"→ Lagu explicit {prob3/P_viral:.2f}x lebih mungkin viral di TikTok")


# ===== ANALISIS 4: P(High Shazam | High Popularity) =====
print("\n" + "=" * 80)
print("ANALISIS 4: Peluang Shazam Tinggi dengan Syarat Popularity Tinggi")
print("=" * 80)

df['Shazam Counts'] = pd.to_numeric(df['Shazam Counts'], errors='coerce')
df_shazam = df.dropna(subset=['Shazam Counts', 'Spotify Popularity'])

shazam_threshold = df_shazam['Shazam Counts'].quantile(0.75)
pop_threshold = df_shazam['Spotify Popularity'].quantile(0.75)

high_shazam = df_shazam['Shazam Counts'] >= shazam_threshold
high_popularity = df_shazam['Spotify Popularity'] >= pop_threshold

result4 = P_AgB_df(high_shazam, high_popularity, df_shazam)
if result4:
    prob4, count_both4, count_high_pop = result4
    print(f"\nLagu High Shazam dengan High Popularity: {count_both4} dari {count_high_pop}")
    print(f"✓ P(High Shazam | High Popularity) = {prob4:.4f} ({prob4*100:.2f}%)")

    P_high_shazam = high_shazam.sum() / len(df_shazam)
    print(f"✓ P(High Shazam) = {P_high_shazam:.4f} ({P_high_shazam*100:.2f}%)")
    print(f"→ Lagu dengan high popularity {prob4/P_high_shazam:.2f}x lebih mungkin high shazam")

print("\n" + "=" * 80)
print("Analisis Selesai!")
print("=" * 80)

ANALISIS SPOTIFY DENGAN PELUANG BERSYARAT (CONDITIONAL PROBABILITY)

Dataset Info:
Total lagu: 4600


ANALISIS 1: Peluang Populer dengan Syarat Lagu Explicit

Lagu Explicit yang Popular: 347 dari 1651 lagu explicit
✓ P(Popular | Explicit) = 0.2102 (21.02%)
✓ P(Popular) = 0.2122 (21.22%)
→ Lagu explicit 0.99x lebih mungkin popular


ANALISIS 2: Peluang Spotify Streams Tinggi dengan Syarat YouTube Views Tinggi

Lagu dengan High Streams dan High YouTube Views: 0 dari 0
✓ P(High Streams | High YouTube Views) = nan (nan%)
✓ P(High Streams) = nan (nan%)
→ Lagu dengan high YouTube views nanx lebih mungkin high streams

ANALISIS 3: Peluang Viral di TikTok dengan Syarat Explicit

Lagu Explicit yang Viral di TikTok: 11 dari 49
✓ P(Viral TikTok | Explicit) = 0.2245 (22.45%)
✓ P(Viral TikTok) = 0.2529 (25.29%)
→ Lagu explicit 0.89x lebih mungkin viral di TikTok

ANALISIS 4: Peluang Shazam Tinggi dengan Syarat Popularity Tinggi

Lagu High Shazam dengan High Popularity: 5 dari 9
✓ P(High Shazam | Hi

  P_B = condition_B.sum() / len(data)
  P_AgB_result = A_and_B / condition_B.sum()
  P_high_streams = high_streams.sum() / len(df_clean)
