# Mengumpulkan Data Review Play Store Google

In [None]:
pip install google_play_scraper

In [None]:
pip install yfinance

In [None]:
pip install openpyxl

In [1]:
from google_play_scraper import app, reviews, Sort
import pandas as pd

# === Ambil detail aplikasi ===
package_name = "com.tokopedia.tkpd"

print("[INFO] Mengambil informasi aplikasi...")
app_info = app(package_name)

print("\n=== INFO APLIKASI ===")
print(f"Nama       : {app_info['title']}")
print(f"Developer  : {app_info['developer']}")
print(f"Rating     : {app_info['score']} ({app_info['ratings']} rating)")
print(f"Instalasi  : {app_info['installs']}")
print(f"Deskripsi  : {app_info['description'][:150]}...")

# === Ambil ulasan pengguna ===
print("\n[INFO] Mengambil ulasan pengguna...")
reviews_list, _ = reviews(
    package_name,
    lang='id',
    country='id',
    count=100,
    sort=Sort.NEWEST  # <-- Sekarang sudah benar
)

# Ubah ke DataFrame
df = pd.DataFrame(reviews_list)
df = df[['userName', 'score', 'content', 'at']]
df.rename(columns={
    'userName': 'User',
    'score': 'Rating',
    'content': 'Review',
    'at': 'Tanggal'
}, inplace=True)

# Simpan ke file
nama_file = f'ulasan_{package_name}.xlsx'
df.to_excel(nama_file, index=False)
print(f"\n[INFO] Ulasan disimpan ke file: {nama_file}")

[INFO] Mengambil informasi aplikasi...

=== INFO APLIKASI ===
Nama       : Tokopedia
Developer  : PT. Tokopedia
Rating     : 4.356745 (7353179 rating)
Instalasi  : 100,000,000+
Deskripsi  : Want to get more than just a hassle free shopping experience? Can't get enough of promos? You can get it in Tokopedia! Time to reveal the surprise:
...

[INFO] Mengambil ulasan pengguna...

[INFO] Ulasan disimpan ke file: ulasan_com.tokopedia.tkpd.xlsx


In [2]:
import pandas as pd

In [3]:
df = pd.read_excel('ulasan_com.tokopedia.tkpd.xlsx')
df

Unnamed: 0,User,Rating,Review,Tanggal
0,Sainurin Ratna Devi,1,Pengiriman lelet gak bisa pilih ekspedisi juga...,2025-10-15 10:42:08
1,Arobi Robet,5,ok,2025-10-15 10:35:47
2,tri rahmadi,5,👍bagus,2025-10-15 10:18:42
3,Miftahul Huda,1,saya sebenarnya dulu suka sekali dengan Tokope...,2025-10-15 10:02:23
4,Budi Santoso,5,keren,2025-10-15 09:53:26
...,...,...,...,...
95,Twenty Seven,2,klaim asuransi kerusakan dari solusiutama pemb...,2025-10-14 07:16:29
96,Cristiano Ronaldo,4,"sekarang ada error di bagian feed folowwing,,t...",2025-10-14 06:00:42
97,Doni Salman,5,sangat membantu,2025-10-14 05:56:54
98,Candra Andika,1,"pesanan di batalkan, dan uang refund saya belu...",2025-10-14 05:48:51


# Mengumpulkan Data Saham dari Yahoo Finance

In [4]:
import yfinance as yf
import pandas as pd

# Daftar saham Indonesia (gunakan kode Yahoo Finance .JK)
tickers = ["BBCA.JK", "TLKM.JK", "GOTO.JK"]
start_date = "2024-01-01"
end_date = "2024-12-31"

# Buat Excel writer
with pd.ExcelWriter("data_saham_per_saham.xlsx", engine='openpyxl') as writer:
    for ticker in tickers:
        print(f"[INFO] Mengambil data {ticker}...")
        data = yf.download(ticker, start=start_date, end=end_date)

        # Reset index (tanggal jadi kolom)
        data = data.reset_index()

        # Flatten MultiIndex kolom jika ada
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = [' '.join(col).strip() for col in data.columns.values]

        # Simpan ke sheet dengan nama saham (tanpa .JK)
        sheet_name = ticker.replace(".JK", "")
        data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"[INFO] Disimpan ke sheet: {sheet_name}")

print("[✅] Semua data saham berhasil disimpan ke 'data_saham_per_saham.xlsx'")

[INFO] Mengambil data BBCA.JK...


  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)


[INFO] Disimpan ke sheet: BBCA
[INFO] Mengambil data TLKM.JK...


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)


[INFO] Disimpan ke sheet: TLKM
[INFO] Mengambil data GOTO.JK...


[*********************100%***********************]  1 of 1 completed


[INFO] Disimpan ke sheet: GOTO
[✅] Semua data saham berhasil disimpan ke 'data_saham_per_saham.xlsx'


In [5]:
df = pd.read_excel('data_saham_per_saham.xlsx')
df

Unnamed: 0,Date,Close BBCA.JK,High BBCA.JK,Low BBCA.JK,Open BBCA.JK,Volume BBCA.JK
0,2024-01-02,8893.341797,8893.341797,8822.572499,8869.752031,30545200
1,2024-01-03,8822.573242,8893.342546,8798.983474,8893.342546,34603300
2,2024-01-04,8940.521484,8940.521484,8846.162419,8869.752185,56501800
3,2024-01-05,9034.880859,9058.470627,8940.521790,8964.111558,85290100
4,2024-01-08,9034.880859,9082.060394,8940.521790,9058.470627,50896000
...,...,...,...,...,...,...
232,2024-12-20,9361.940430,9458.955356,9337.686698,9361.940430,91576900
233,2024-12-23,9483.208984,9555.970179,9410.447790,9410.447790,43292100
234,2024-12-24,9458.955078,9604.477464,9458.955078,9555.970002,32415700
235,2024-12-27,9507.462891,9531.716622,9434.701695,9507.462891,24016700


# Mengumpulkan Data dari Database MySQL

In [6]:
import pandas as pd
import mysql.connector

conn = mysql.connector.connect(
    host="185.229.118.48",
    user="u1605427_analyst",
    password="BI-Pusat-2025%",
    database="u1605427_dataanalyst"
)

cursor = conn.cursor()
cursor.execute("SELECT * FROM penjualan")

# Ambil nama kolom
columns = [col[0] for col in cursor.description]

# Ambil data
rows = cursor.fetchall()

# Buat DataFrame
df = pd.DataFrame(rows, columns=columns)

df.head()

Unnamed: 0,id,nama_produk,harga,tanggal_transaksi
0,1,Produk A,10000.0,2025-06-01
1,2,Produk B,12000.0,2025-06-01
2,3,Produk C,15000.0,2025-06-01
3,4,Produk A,11000.0,2025-06-02
4,5,Produk B,13000.0,2025-06-02


# Mengumpulkan Data dari API

In [7]:
import requests
import pandas as pd

# Endpoint API untuk semua negara
url = "https://restcountries.com/v3.1/region/europe"

# Kirim permintaan GET
response = requests.get(url)
# Ubah hasil ke JSON
data = response.json()
# Ambil beberapa kolom penting
negara_list = []
for negara in data:
    nama = negara.get("name", {}).get("common", "N/A")
    ibu_kota = negara.get("capital", ["N/A"])[0]
    wilayah = negara.get("region", "N/A")
    populasi = negara.get("population", 0)
    area = negara.get("area", 0)
    mata_uang = ", ".join(negara.get("currencies", {}).keys())
    
    negara_list.append({
        "nama": nama,
        "ibu_kota": ibu_kota,
        "wilayah": wilayah,
        "populasi": populasi,
        "luas_area": area,
        "mata_uang": mata_uang
    })
# Buat DataFrame
df = pd.DataFrame(negara_list)
# Tampilkan 10 data pertama
df.head()
# Simpan ke file
nama_file = f'data_final.xlsx'
df.to_excel(nama_file, index=False)
print(f"\n[INFO] Ulasan disimpan ke file: {nama_file}")


[INFO] Ulasan disimpan ke file: data_final.xlsx


In [8]:
import pandas

In [9]:
data = pandas.read_excel("data_final.xlsx")

In [10]:
data.head(10000)

Unnamed: 0,nama,ibu_kota,wilayah,populasi,luas_area,mata_uang
0,Lithuania,Vilnius,Europe,2794700,65300.0,EUR
1,Croatia,Zagreb,Europe,4047200,56594.0,EUR
2,Denmark,Copenhagen,Europe,5831404,43094.0,DKK
3,Albania,Tirana,Europe,2837743,28748.0,ALL
4,Malta,Valletta,Europe,525285,316.0,EUR
5,Liechtenstein,Vaduz,Europe,38137,160.0,CHF
6,Monaco,Monaco,Europe,39244,2.02,EUR
7,Slovakia,Bratislava,Europe,5458827,49037.0,EUR
8,Greece,Athens,Europe,10715549,131990.0,EUR
9,Netherlands,Amsterdam,Europe,16655799,41850.0,EUR


## WEB Crawling 

# Web Sederhana

In [12]:
import requests
from bs4 import BeautifulSoup

# URL dari halaman web yang ditargetkan
url = 'http://quotes.toscrape.com/'

try:
    # Mengirim permintaan GET ke URL
    response = requests.get(url)
    
    # Memeriksa jika permintaan sukses (kode status 200)
    if response.status_code == 200:
        # Membuat objek BeautifulSoup dari konten HTML yang diterima
        soup = BeautifulSoup(response.text, 'html.parser')

        # Mencari semua elemen <div> dengan kelas 'quote'
        # Setiap <div> ini berisi satu kutipan dan informasinya
        quotes = soup.find_all('div', class_='quote')

        # Melakukan perulangan untuk setiap kutipan yang ditemukan
        for quote in quotes:
            # Mengambil teks kutipan, yang berada di dalam elemen <span> dengan kelas 'text'
            text = quote.find('span', class_='text').text.strip()
            
            # Mengambil nama penulis, yang berada di dalam elemen <small> dengan kelas 'author'
            author = quote.find('small', class_='author').text.strip()
            
            # Mencetak hasil
            print(f'"{text}"')
            print(f'- {author}\n')

    else:
        print(f"Gagal mengambil halaman. Kode status: {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"Error saat mencoba koneksi: {e}")

"“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”"
- Albert Einstein

"“It is our choices, Harry, that show what we truly are, far more than our abilities.”"
- J.K. Rowling

"“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”"
- Albert Einstein

"“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”"
- Jane Austen

"“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”"
- Marilyn Monroe

"“Try not to become a man of success. Rather become a man of value.”"
- Albert Einstein

"“It is better to be hated for what you are than to be loved for what you are not.”"
- André Gide

"“I have not failed. I've just found 10,000 ways that won't work.”"
- Thomas A. Edison

"“A woman is like a tea bag; you never know how strong it is until it's in h