In [1]:
import pandas as pd
import yfinance as yf
import json
import time
from collections import Counter

In [2]:

def get_all_info():
    
    all_info = {}

    df = pd.read_csv('./nasdaq.csv')
    df = df["Symbol"]

    tickers = []
    for d in df:
        tickers.append(d)

    for ticker in tickers:
        print(f" || {tickers.index(ticker)} / {len(tickers)} || {float(tickers.index(ticker))/float(len(tickers))*100:.4f}% || {ticker}")
        time.sleep(1)
        try: 
            all_info[ticker] = yf.Ticker(ticker).info
            json.dump(all_info, open('./all_info.json', "w"))
        except Exception as e: print(f'{ticker} falied becasue of {e}')

In [3]:
def get_ratios():
    ratio_sales = {}
    ratio_book = {}
    ratio_margins = {}
    all_info = json.load(open('./all_info.json'))
    for ticker in all_info:

        try:
            sales = float(all_info[ticker]["priceToSalesTrailing12Months"])
            ratio_sales[ticker] = sales
            json.dump(ratio_sales, open('./ratio_sales.json', "w"))
        except: pass

        try:
            book = float(all_info[ticker]["priceToBook"])
            ratio_book[ticker] = book
            json.dump(ratio_book, open('./ratio_book.json', "w"))
        except: pass

        try:
            margins = float(all_info[ticker]["profitMargins"])
            ratio_margins[ticker] = margins
            json.dump(ratio_margins, open('./ratio_margins.json', "w"))
        except: pass

In [4]:
def sort_ratios():

    ratio_book = json.load(open('./ratio_book.json'))
    sorted_ratio_book = {key: value for key, value in sorted(ratio_book.items(), key=lambda item: item[1])}
    json.dump(sorted_ratio_book, open('./ratio_book.json', "w"))

    ratio_sales = json.load(open('./ratio_sales.json'))
    sorted_ratio_sales = {key: value for key, value in sorted(ratio_sales.items(), key=lambda item: item[1])}
    json.dump(sorted_ratio_sales, open('./ratio_sales.json', "w"))

    ratio_margins = json.load(open('./ratio_margins.json'))
    sorted_ratio_margins = {key: value for key, value in sorted(ratio_margins.items(), key=lambda item: item[1])}
    json.dump(sorted_ratio_margins, open('./ratio_margins.json', "w"))


In [5]:
def get_filtered_margins():

    ratio_book = json.load(open('./ratio_book.json'))
    filtered_book = {key: value for key, value in ratio_book.items() if value < 1}
    json.dump(filtered_book, open('./filtered_book.json', "w"))

    ratio_sales = json.load(open('./ratio_sales.json'))
    filtered_sales = {key: value for key, value in ratio_sales.items() if value < 1}
    json.dump(filtered_sales, open('./filtered_sales.json', "w"))

    ratio_margins = json.load(open('./ratio_margins.json'))
    filtered_margins = {key: value for key, value in ratio_margins.items() if value > 0}
    json.dump(filtered_margins, open('./filtered_margins.json', "w"))

In [6]:
def get_super_filtered():
    book = json.load(open('./filtered_book.json'))
    sales = json.load(open('./filtered_sales.json'))
    margins = json.load(open('./filtered_margins.json'))

    tickers = [k for k in margins if k in book and k in sales]

    super_filtered_results = {key: value for key, value in margins.items() if key in tickers}
    json.dump(super_filtered_results, open('./super_filtered_results.json', "w"))

In [7]:
def rm_china():
    results = json.load(open('./super_filtered_results.json'))
    all_info = json.load(open('./all_info.json'))
    non_china = {}
    for ticker in results:
        try:
            country = all_info[ticker]["country"]
            if country != "China":
                non_china[ticker] = results[ticker]
        except: pass
    json.dump(non_china, open('./super_filtered_results.json', "w"))
    print(len(json.load(open('./super_filtered_results.json'))))


In [8]:

def get_tags():
    results = json.load(open('./super_filtered_results.json'))
    all_info = json.load(open('./all_info.json'))
    sectors = []
    industries = []
    tags = []
    for ticker in results:
        try:
            sector = all_info[ticker]["sector"]
            sectors.append(sector)
            industry = all_info[ticker]['industry']
            industries.append(industry)
            tag = str(sector) + ", " + str(industry)
            tags.append(tag)
        except: pass

    print(Counter(tags))
    print("*"*50)
    print(Counter(sectors))
    print("*"*50)
    print(Counter(industries))
    print("*"*50)

# get_tags()

In [9]:
def get_some_info():
    some_info = {}
    all_info = json.load(open('./all_info.json')) 
    for ticker in all_info: 
        try:
            country = all_info[ticker]["country"]
            if country != "China":
                sales = float(all_info[ticker]["priceToSalesTrailing12Months"])
                assert sales > 0 # and sales < float(inf)
                book = float(all_info[ticker]["priceToBook"])
                assert book > 0 # and book < float(inf)
                margins = float(all_info[ticker]["profitMargins"])
                assert margins > 0 # and margins < float(inf)

                sector = all_info[ticker]["sector"]
                assert len(sector) > 0
                industry = all_info[ticker]['industry']
                assert len(industry) > 0

                some_info[ticker] = {"sales": sales, 'book': book, 'margins': margins, 'sector':sector, 'industry': industry}

        except: pass
    json.dump(some_info, open('./some_info.json', "w"))

# get_some_info()

In [10]:
def get_industry_averages():
    info = json.load(open('./some_info.json'))
    industries = {}
    values = ["sales", 'book', 'margins']
    for i in info:
        industry = info[i]['industry']
        # print(industry)
        try: exists = bool(len(industries[industry]))
        except: exists = False
        if exists:
            for v in values:
                industries[industry][v].append(info[i][v])
        else:
            industries[industry] = {"sales": [info[i]['sales']], 'book': [info[i]['book']], 'margins': [info[i]['margins']]}

    averages = {}
    for industry in industries:
        averages[industry] = {}
        for v in values:
            averages[industry][v] = sum(industries[industry][v]) / len(industries[industry][v])

    print(averages)
    json.dump(averages, open('./averages.json', "w"))

# get_industry_averages()

In [11]:
def get_nets():
    nets = {}
    info = json.load(open('./some_info.json'))
    for ticker in info:
        try:

            sales = float(info[ticker]["sales"])
            assert sales < 1 # and sales < float(inf)
            book = float(info[ticker]["book"])
            assert book < 1 # and book < float(inf)
            margins = float(info[ticker]["margins"])

            sector = info[ticker]["sector"]
            assert len(sector) > 0
            industry = info[ticker]['industry']
            assert len(industry) > 0

            nets[ticker] = {"sales": sales, 'book': book, 'margins': margins, 'sector':sector, 'industry': industry}

        except: pass
    json.dump(nets, open('./super_filtered_results.json', "w"))

# get_nets()

In [12]:
def get_norm_deviation():
    all_averages = json.load(open('./averages.json'))
    info = json.load(open('./super_filtered_results.json'))
    values = ["sales", 'book'] # , 'margins']
    for ticker in info:
        industry = info[ticker]['industry']
        averages = all_averages[industry]
        for v in values:
            p = float(info[ticker][v]) / float(all_averages[industry][v]) * 100
            info[ticker][f'{v}_deviation'] = 100 - p
    json.dump(info, open('./super_filtered_results_2.json', "w"))

get_norm_deviation()