In [68]:
import os
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
from supabase import create_client
import time

load_dotenv()



True

In [69]:
# Connection to Supabase
url_supabase = os.getenv("SUPABASE_URL")
key = os.getenv("SUPABASE_KEY")
supabase = create_client(url_supabase, key)

In [70]:
db_data = supabase.table("idx_key_stats").select("").execute()
df_db_data = pd.DataFrame(db_data.data)

df_db_data.head()

Unnamed: 0,symbol,recommendation_mean,point_summaries,updated_on,employee_num,holders_breakdown,intrinsic_value,forward_eps
0,ACRO.JK,,,2024-06-10T14:32:22+00:00,25.0,,,
1,RMBA.JK,,"[{'name': 'value', 'point': 5, 'maxpoint': 20}...",2024-05-24T06:24:51+00:00,978.0,"{'% of Float Held by Institutions': '0.00%', '...",234.772165,
2,AALI.JK,4.3,"[{'name': 'value', 'point': 16, 'maxpoint': 21...",2024-06-10T14:32:22+00:00,30025.0,,8158.263096,635.05
3,DGNS.JK,,"[{'name': 'value', 'point': 11.5, 'maxpoint': ...",2024-06-10T14:32:22+00:00,64.0,,8.90811,
4,EPAC.JK,,"[{'name': 'value', 'point': 13.5, 'maxpoint': ...",2024-06-10T14:32:22+00:00,160.0,,,


In [71]:
# Add new column for rating data
df_db_data['technical_rating'] = np.nan
df_db_data['analyst_rating']  = np.nan

cols = df_db_data.columns.tolist()
df_db_data.head()

Unnamed: 0,symbol,recommendation_mean,point_summaries,updated_on,employee_num,holders_breakdown,intrinsic_value,forward_eps,technical_rating,analyst_rating
0,ACRO.JK,,,2024-06-10T14:32:22+00:00,25.0,,,,,
1,RMBA.JK,,"[{'name': 'value', 'point': 5, 'maxpoint': 20}...",2024-05-24T06:24:51+00:00,978.0,"{'% of Float Held by Institutions': '0.00%', '...",234.772165,,,
2,AALI.JK,4.3,"[{'name': 'value', 'point': 16, 'maxpoint': 21...",2024-06-10T14:32:22+00:00,30025.0,,8158.263096,635.05,,
3,DGNS.JK,,"[{'name': 'value', 'point': 11.5, 'maxpoint': ...",2024-06-10T14:32:22+00:00,64.0,,8.90811,,,
4,EPAC.JK,,"[{'name': 'value', 'point': 13.5, 'maxpoint': ...",2024-06-10T14:32:22+00:00,160.0,,,,,


In [72]:
# Get symbol data
symbol_list = df_db_data['symbol'].tolist()
symbol_list

# Remove the .JK
for i in range (len(symbol_list)):
  symbol_list[i] = symbol_list[i].replace(".JK", "")

symbol_list[:5]

['ACRO', 'RMBA', 'AALI', 'DGNS', 'EPAC']

In [187]:
# Scraping data
BASE_URL = "https://www.tradingview.com/chart/?symbol=IDX%3A"
TECHNICAL_ENUM = ['sell', 'neutral', 'buy']
ANALYST_ENUM = ['strong_buy', 'buy', 'hold', 'sell', 'strong_sell']

def get_url_page(symbol:str) -> str:
    return f"{BASE_URL}{symbol}"

def scrap_page(url: str) :
    driver = webdriver.Chrome()
    driver.get(url)
    try:
        _ = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "button-vll9ujXF"))
        )
        print(f"Successfully get element from URL: {url}")
        return driver
    except:
      print(f"Fail scraping from URL: {url}")
      print("Loader did not disappear in time")
      driver.quit()
      return None
    
def scrap_rating_data(symbol: str) -> dict:
    url = get_url_page(symbol)
    driver = scrap_page(url)
    result_data = dict()
    result_data['symbol'] = symbol
    result_data['updated_on'] = (datetime.now()).strftime("%Y-%m-%d %H:%M:%S")
    technical_rating_dict = None
    analyst_rating_dict = None

    # Ignore if element is None
    if (driver is not None):
      items = driver.find_elements(By.CLASS_NAME, "button-vll9ujXF")
      for item in items:
        
        # Getting technical
        if (item.text == "More technicals"):
          technical_rating_dict = dict()

          item.click()
          try:
            _ = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "speedometerWrapper-kg4MJrFB"))
              )
            technical_data_wrapper = driver.find_elements(By.CLASS_NAME, "speedometerWrapper-kg4MJrFB")
            assert (len(technical_data_wrapper) == 3), "Difference in technical data wrapper detected"

            # Summary should be the middle one
            summary_technical_data_wrapper = technical_data_wrapper[1]
            technical_counters_data_wrapper = summary_technical_data_wrapper.find_element(By.CLASS_NAME, "countersWrapper-kg4MJrFB")
            technical_rating_data = technical_counters_data_wrapper.text.split("\n")

            # Insert the data to dictionary
            start_rating_data_idx = 1
            for enum in TECHNICAL_ENUM:
              technical_rating_dict[enum] = technical_rating_data[start_rating_data_idx]
              start_rating_data_idx +=2

          except:
            print("Failed to get Technical Data")

        # Getting Analyst Rating
        if (item.text == "See forecast"):
          analyst_rating_dict = dict()
          item.click()

          try:
            _ = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "container-zZSa1SHt"))
              )
            analyst_data_wrapper = driver.find_element(By.CLASS_NAME, "container-zZSa1SHt")

            # Get the Value
            analyst_data_values = analyst_data_wrapper.find_elements(By.CLASS_NAME,"value-GNeDL9vy")

            # Insert the data to dictionary
            for idx, enum in enumerate(ANALYST_ENUM):
               analyst_rating_dict[enum] = (analyst_data_values[idx]).text
          except:
            print("Failed to get Analyst Data")

      result_data['technical_rating'] = technical_rating_dict
      result_data['analyst_rating'] = analyst_rating_dict
      driver.quit()
      return result_data
       


In [188]:
scrap_rating_data("AMMN")

Successfully get element from URL: https://www.tradingview.com/chart/?symbol=IDX%3AAMMN
{'symbol': 'AMMN', 'updated_on': '2024-06-26 17:00:04', 'technical_rating': {'sell': '6', 'neutral': '7', 'buy': '13'}, 'analyst_rating': {'strong_buy': '1', 'buy': '0', 'hold': '0', 'sell': '0', 'strong_sell': '0'}}


{'symbol': 'AMMN',
 'updated_on': '2024-06-26 17:00:04',
 'technical_rating': {'sell': '6', 'neutral': '7', 'buy': '13'},
 'analyst_rating': {'strong_buy': '1',
  'buy': '0',
  'hold': '0',
  'sell': '0',
  'strong_sell': '0'}}