<a href="https://colab.research.google.com/github/ozturkergin/ozturkergin/blob/main/TEFAS_PowerBI_Import_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install bs4 --quiet

In [2]:
pip install urllib3 --quiet

In [3]:
pip install marshmallow --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/49.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m41.0/49.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m793.2 kB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
!pip install pandas_ta==0.3.14b --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m71.7/115.1 kB[0m [31m89.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone


In [5]:
pip install free-proxy --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for free-proxy (setup.py) ... [?25l[?25hdone


In [9]:
import requests
import pandas as pd
import pandas_ta as ta
import math
import concurrent.futures
import time

from datetime import datetime, timedelta, date
from typing import Dict, List, Optional, Union
from marshmallow import Schema, fields, EXCLUDE, pre_load, post_load
from bs4 import BeautifulSoup
from fp.fp import FreeProxy

# Special thanks to https://github.com/burakyilmaz321

class InfoSchema(Schema):
    code = fields.String(data_key="FONKODU", allow_none=True)
    fonunvantip = fields.String(data_key="FONUNVANTIP", allow_none=True)
    date = fields.Date(data_key="TARIH", allow_none=True)
    price = fields.Float(data_key="FIYAT", allow_none=True)
    title = fields.String(data_key="FONUNVAN", allow_none=True)
    market_cap = fields.Float(data_key="PORTFOYBUYUKLUK", allow_none=True)
    number_of_shares = fields.Float(data_key="TEDPAYSAYISI", allow_none=True)
    number_of_investors = fields.Float(data_key="KISISAYISI", allow_none=True)

    @pre_load
    def pre_load_hook(self, input_data, **kwargs):
        seconds_timestamp = int(input_data["TARIH"]) / 1000
        input_data["TARIH"] = date.fromtimestamp(seconds_timestamp).isoformat()
        return input_data

    @post_load
    def post_load_hool(self, output_data, **kwargs):
        output_data = {f: output_data.setdefault(f) for f in self.fields}
        return output_data

    class Meta:
        unknown = EXCLUDE

class tefas_get:
    root_url = "https://www.tefas.gov.tr"
    info_endpoint = "/api/DB/BindHistoryInfo"
    concurrently = False
    use_Proxy = False
    fon_type = "YAT"
    proxies = None

    @staticmethod
    def get_combobox_items(url, select_id):
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Failed to fetch the URL: {response.status_code}")

        soup = BeautifulSoup(response.content, 'html.parser')
        select_element = soup.find('select', id=select_id)

        if not select_element:
            raise Exception(f"Select element with id '{select_id}' not found")

        options = select_element.find_all('option')
        options = list(filter(None, options))

        items = []
        for option in options:
            value = option.get('value')
            items.append(value)

        items.remove('')

        return items

    def fetch_info(self, fonunvantip, start_date_initial, end_date_initial):
        counter = 1
        start_date = start_date_initial
        end_date = end_date_initial
        range_date = end_date_initial - start_date_initial
        range_interval = 90
        info_schema = InfoSchema(many=True)
        info_result = pd.DataFrame()

        if range_date.days > range_interval :
            counter = range_date.days / range_interval
            counter = math.ceil(counter)
            end_date = start_date + timedelta(days=range_interval)

        while counter > 0:
            counter -= 1

            data = {
                    "fontip": self.fon_type,
                    "bastarih": self._parse_date(start_date),
                    "bittarih": self._parse_date(end_date),
                    "fonunvantip": fonunvantip,
                    "fonkod": "",
                  }

            info = self._do_post(data)
            info = info_schema.load(info)
            info = pd.DataFrame(info, columns=info_schema.fields.keys())
            info['fonunvantip'] = fonunvantip
            info = info[info['price'] != 0]
            info_result = pd.concat([info_result, info])
            info_result = info_result.reset_index(drop=True)
            info = info.reset_index(drop=True)

            if counter > 0 :
                start_date = end_date + timedelta(days=1)
                end_date = end_date + timedelta(days=range_interval)
                if end_date > end_date_initial :
                    end_date = end_date_initial

        return info_result

    def fetch_info_serial(self, fonunvantips, start_date_initial, end_date_initial):
        merged = pd.DataFrame()

        for fonunvantip in fonunvantips:
            info = self.fetch_info(fonunvantip, start_date_initial, end_date_initial)
            if not info.empty :
                merged = pd.concat([merged, info])
            print(f"{fonunvantip} - {len(info)} records added total records: {len(merged)} " )

        return merged

    def fetch_info_concurrently(self, fonunvantips, start_date_initial, end_date_initial):
        with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
            merged = pd.DataFrame()
            # Submit all tasks to the executor
            self.concurrently = True
            futures = {executor.submit(self.fetch_info, fonunvantip, start_date_initial, end_date_initial): fonunvantip for fonunvantip in fonunvantips}

            # Retrieve results as they complete
            for future in concurrent.futures.as_completed(futures):
                info = future.result()
                merged = pd.concat([merged, info])
                print(f"{fonunvantip} - {len(info)} records added total records: {len(merged)} " )

            return merged

    def fetch(
        self,
        start: Union[str, datetime],
        end: Optional[Union[str, datetime]] = None,
        columns: Optional[List[str]] = None,
        unvantip: bool = False,
    ) -> pd.DataFrame:

        start_date_initial = datetime.strptime(start, "%Y-%m-%d")
        end_date_initial = datetime.strptime(end or start, "%Y-%m-%d")

        merged = pd.DataFrame()

        if unvantip :
            fonunvantips = self.get_combobox_items(url="https://www.tefas.gov.tr/TarihselVeriler.aspx", select_id="DropDownListFundTypeExplanationYAT")
        else :
            fonunvantips = [""]

        if self.use_Proxy :
            proxy_address = self.get_free_proxy()
            self.proxies = {"http": proxy_address, "https": proxy_address}
            print(self.proxies)
        else :
            self.proxies = None

        if self.concurrently :
            merged = self.fetch_info_concurrently(fonunvantips, start_date_initial, end_date_initial)
        else :
            merged = self.fetch_info_serial(fonunvantips, start_date_initial, end_date_initial)

        merged = merged[columns] if columns and not merged.empty else merged

        return merged

    def get_free_proxy(self):
        proxy_address = FreeProxy(timeout=1, rand=True, https=True).get()
        return proxy_address

    def _do_post(self, data: Dict[str, str]) -> Dict[str, str]:
        timestamp = int(time.time() * 1000)  # Get current timestamp in milliseconds
        headers = {
         "Connection": "keep-alive",
         "Cache-Control": "no-cache",
         "Pragma": "no-cache",
         "X-Requested-With": "XMLHttpRequest",
         "Sec-Fetch-Mode": "cors",
         "Sec-Fetch-Site": "same-origin",
         "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
         "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
         "Accept": "application/json, text/javascript, */*; q=0.01",
         "Origin": "https://www.tefas.gov.tr",
         "Referer": f"https://www.tefas.gov.tr/TarihselVeriler.aspx?timestamp={timestamp}" ,
         }

        response = requests.post(
             url=f"{self.root_url}/{self.info_endpoint}",
             data=data,
             proxies=self.proxies,
             headers=headers,
         )
        # Check the response status code and content
        if response.status_code != 200:
            print(f"Request failed with status code: {response.status_code}")
            print(f"Response content: {response.text}")
            return {}  # Return an empty dictionary if the request failed
        try:
            return response.json().get("data", {})
        except ValueError as e:
            print(f"Error decoding JSON response: {e}")
            print(f"Response content: {response.text}")
            return {}

    def _parse_date(self, date: Union[str, datetime]) -> str:
        if isinstance(date, datetime):
            formatted = datetime.strftime(date, "%d.%m.%Y")
        elif isinstance(date, str):
            try:
                parsed = datetime.strptime(date, "%Y-%m-%d")
            except ValueError as exc:
                raise ValueError(
                    "Date string format is incorrect. " "It should be `YYYY-MM-DD`"
                ) from exc
            else:
                formatted = datetime.strftime(parsed, "%d.%m.%Y")
        else:
            raise ValueError(
                "`date` should be a string like 'YYYY-MM-DD' "
                "or a `datetime.datetime` object."
            )
        return formatted

tefas = tefas_get()

time_delta = 366
start_date_calc = date.today() - timedelta(days=time_delta)
date_start = start_date_calc.strftime("%Y-%m-%d")
date_end = date.today().strftime("%Y-%m-%d")

fetched_data = pd.DataFrame()
fetched_data = tefas.fetch(start=date_start, end=date_end, columns=["code", "date", "price", "market_cap", "number_of_shares", "number_of_investors"], unvantip=False)
fetched_data['date'] = pd.to_datetime(fetched_data['date'], errors='coerce')
fetched_data['date'].dt.strftime('%Y-%m-%d')
fetched_data['date'] = fetched_data['date'].dt.date
fetched_data['price'].astype(float,False)
fetched_data.rename(columns={'price': 'close'}, inplace=True)
fetched_data['market_cap'].astype(float,False)
fetched_data['number_of_shares'].astype(float,False)
fetched_data['number_of_investors'].astype(float,False)
fetched_data[(fetched_data!=0)&(pd.isnull(fetched_data))]

fetched_data_agg = tefas.fetch(start=date_end, end=date_end, columns=["code", "date", "price", "fonunvantip", "title"], unvantip=True)
fetched_data_agg = fetched_data_agg.groupby(['code', 'title', 'fonunvantip'])['price'].mean().reset_index()
fon_table = fetched_data_agg.pivot(index=['code','title'], columns='fonunvantip', values='price').notnull()
del fetched_data_agg

def calculate_rsi(group, window_length):
    rsi_values = ta.rsi(close=group['close'], length=window_length)
    group['rsi'] = rsi_values  # Assign RSI values directly
    return group

# Group by stock and apply the RSI calculation
window_length = 14
fetched_data = fetched_data.reset_index(drop=True)
fetched_data.sort_values(by=['code', 'date'], inplace=True)
fetched_data = fetched_data.groupby(['code']).apply(calculate_rsi, window_length=window_length)
fetched_data = fetched_data.fillna(0)
fetched_data['rsi'].astype(float,False)
#fetched_data.to_excel("fetched_data.xlsx", index=False)

 - 345633 records added total records: 345633 
Agresif Değişken - 4 records added total records: 4 
Alternatif - 3 records added total records: 7 
Altın - 28 records added total records: 35 
Atak Dinamik Büyüme Değişken - 0 records added total records: 35 
Borçlanma Araçları - 74 records added total records: 109 
Çalışanlarına Yönelik - 2 records added total records: 111 
Çoklu Varlık - 11 records added total records: 122 
Değişken - 127 records added total records: 249 
Dengeli Değişken - 3 records added total records: 252 
Diğer Fon Sepeti - 2 records added total records: 254 
Döviz - 472 records added total records: 726 
Döviz Cinsinden İhraç (Dolar) - 423 records added total records: 1149 
Döviz Cinsinden İhraç (Euro) - 46 records added total records: 1195 
Emtia - 8 records added total records: 1203 
Endeks - 16 records added total records: 1219 
Endeks Hisse Senedi - 4 records added total records: 1223 
Eurobond - 9 records added total records: 1232 
Fon Sepeti - 76 records added

  fetched_data = fetched_data.groupby(['code']).apply(calculate_rsi, window_length=window_length)


code        
AAK   80568      0.000000
      79308      0.000000
      78048      0.000000
      76787      0.000000
      75526      0.000000
                  ...    
ZZL   253852    79.333223
      345632    76.767816
      344123    80.362879
      342613    80.795507
      341101    79.723258
Name: rsi, Length: 345633, dtype: float64

In [10]:
fon_table

Unnamed: 0_level_0,fonunvantip,Agresif Değişken,Alternatif,Altın,Borçlanma Araçları,Dengeli Değişken,Değişken,Diğer Fon Sepeti,Döviz,Döviz Cinsinden İhraç (Dolar),Döviz Cinsinden İhraç (Euro),...,Sektör,Serbest,Sürdürülebilirlik Fonları,Uzun Vadeli,Yabancı,Yabancı Fon Sepeti,Çalışanlarına Yönelik,Çoklu Varlık,Özel,İştirak
code,title,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAK,ATA PORTFÖY ÇOKLU VARLIK DEĞİŞKEN FON,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
AAL,ATA PORTFÖY PARA PİYASASI (TL) FONU,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AAS,ATA PORTFÖY FON SEPETİ SERBEST FONU,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
AAV,ATA PORTFÖY İKİNCİ HİSSE SENEDİ (TL) FONU (HİSSE SENEDİ YOĞUN FON),False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
ABG,ATLAS PORTFÖY DÖRDÜNCÜ SERBEST (TL) FON,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZVB,ATLAS PORTFÖY SERBEST (TL) FON,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
ZVO,ZİRAAT PORTFÖY ÜÇÜNCÜ SERBEST (TL) FON,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
ZYC,ZİRAAT PORTFÖY DÖRDÜNCÜ SERBEST (TL) FON,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
ZYD,ZİRAAT PORTFÖY BEŞİNCİ SERBEST (TL) FON,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False


In [11]:
fetched_data

Unnamed: 0_level_0,Unnamed: 1_level_0,code,date,close,market_cap,number_of_shares,number_of_investors,rsi
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAK,80568,AAK,2023-07-31,13.960016,87043644.53,6235211.0,583.0,0.000000
AAK,79308,AAK,2023-08-01,14.138141,88166281.90,6236059.0,596.0,0.000000
AAK,78048,AAK,2023-08-02,14.055781,87784817.37,6245460.0,598.0,0.000000
AAK,76787,AAK,2023-08-03,14.130122,87946088.26,6224015.0,612.0,0.000000
AAK,75526,AAK,2023-08-04,14.164886,88619221.88,6256261.0,617.0,0.000000
...,...,...,...,...,...,...,...,...
ZZL,253852,ZZL,2024-07-24,45.605468,23227731.47,509319.0,15.0,79.333223
ZZL,345632,ZZL,2024-07-25,45.547069,23197987.84,509319.0,15.0,76.767816
ZZL,344123,ZZL,2024-07-26,45.854076,23354351.95,509319.0,15.0,80.362879
ZZL,342613,ZZL,2024-07-29,45.895577,23375489.60,509319.0,15.0,80.795507
