<a href="https://colab.research.google.com/github/suparuek2405/Stock-screening-with-python/blob/main/Group_assignment_Tacocat_set_yfinance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#install libraries
# !pip install pandas
# !pip install numpy
# !pip install yfinance

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
from google.colab import drive

In [4]:
# version 1
def rename_columns_to_year_end(df):
    df_rt = df.copy()
    years = df_rt.columns.year
    year_end_dates = [pd.Timestamp(f"{year}-12-31") for year in years]
    df_rt.columns = year_end_dates
    return df_rt

def get_balance_sheet(ticker):
  balance_sheet_df = pd.DataFrame()
  n = len(ticker)
  print("start getting balance sheet")
  for c,i in enumerate(ticker):
    #retrive data from yfinance
    stock = yf.Ticker(i)
    raw = stock.balancesheet

    #rename column
    bs = rename_columns_to_year_end(raw)

      #reset index and rename it
    bs.reset_index(inplace=True)
    bs.rename(columns={'index': 'data'}, inplace=True)
    bs['symbol'] = i.split('.')[0]

    # Check for duplicates in the 'data' column and drop them
    bs = bs[~bs.duplicated(subset=['data'])]
    bs.set_index(['symbol', 'data'], inplace=True)

    #change type data of column for better look
    bs.columns = pd.to_datetime(bs.columns)

    #append to dataframe
    balance_sheet_df = pd.concat([balance_sheet_df, bs])
    print(f"{c+1}/{n} {i}")
  print("finish getting balance sheet")
  return balance_sheet_df

def get_income_statement(ticker):
  income_statement_df = pd.DataFrame()
  n = len(ticker)
  print("start getting income statement")
  for c,i in enumerate(ticker):
    #retrive data from yfinance
    stock = yf.Ticker(i)
    raw = stock.financials

    #rename column
    is_df = rename_columns_to_year_end(raw)

    #reset index and rename it
    is_df.reset_index(inplace=True)
    is_df.rename(columns={'index': 'data'}, inplace=True)
    is_df['symbol'] = i.split('.')[0]

    # Check for duplicates in the 'data' column and drop them
    is_df = is_df[~is_df.duplicated(subset=['data'])]
    is_df.set_index(['symbol', 'data'], inplace=True)

    #change type data of column for better look
    is_df.columns = pd.to_datetime(is_df.columns)

    #append to dataframe
    income_statement_df = pd.concat([income_statement_df, is_df])
    print(f"{c+1}/{n} {i}")
  print("finish getting income statement")
  return income_statement_df

def get_cashflow_statement(ticker):
  cashflow_statement_df = pd.DataFrame()
  n = len(ticker)
  print("start getting cashflow statement")
  for c,i in enumerate(ticker):
    #retrive data from yfinance
    stock = yf.Ticker(i)
    raw = stock.cashflow

    #rename column
    cf_df = rename_columns_to_year_end(raw)

    #reset index and rename it
    cf_df.reset_index(inplace=True)
    cf_df.rename(columns={'index': 'data'}, inplace=True)
    cf_df['symbol'] = i.split('.')[0]

    # Check for duplicates in the 'data' column and drop them
    cf_df = cf_df[~cf_df.duplicated(subset=['data'])]
    cf_df.set_index(['symbol', 'data'], inplace=True)

    #change type data of column for better look
    cf_df.columns = pd.to_datetime(cf_df.columns)

    #append to dataframe
    cashflow_statement_df = pd.concat([cashflow_statement_df, cf_df])
    print(f"{c+1}/{n} {i}")
  print("finish getting cashflow statement")
  return cashflow_statement_df

def get_monthly_stock_prices(ticker,start_date = "2022-01-01", end_date="2024-09-30"):
  stock = yf.Ticker(ticker)
  data = stock.history(start=start_date, end=end_date, interval="1mo")
  data = data.set_index(pd.to_datetime(data.index))

  # Resample to the start of each month
  monthly_prices = data['Close'].resample('MS').first()
  monthly_prices = pd.DataFrame(monthly_prices)
  monthly_prices['symbol'] = ticker.split('.')[0]
  monthly_prices.index = monthly_prices.index.to_period('M').to_timestamp(how='start')
  return monthly_prices

def get_dividend_data(ticker, start_date="2022-01-01", end_date="2024-09-30"):
  stock = yf.Ticker(ticker)
  dividends = stock.dividends
  # Filter dividends based on the specified date range
  dividends = dividends[(dividends.index >= start_date) & (dividends.index <= end_date)]
  if not dividends.empty:
    dividends_df = pd.DataFrame(dividends)
    dividends_df['symbol'] = ticker.split('.')[0]
    dividends_df = dividends_df.rename(columns={'Dividends':'amount'})
    dividends_df.index = dividends_df.index.to_period('M').to_timestamp(how='start')
    return dividends_df

def get_financial_data(ticker):
  balance_sheet = get_balance_sheet(ticker)
  income_statement = get_income_statement(ticker)
  cashflow_statement = get_cashflow_statement(ticker)
  return balance_sheet, income_statement, cashflow_statement

In [8]:
#df = pd.read_csv('/content/drive/My Drive/Tools/Group Assignment/set_scraped.csv')
df = pd.read_csv('https://raw.githubusercontent.com/suparuek2405/Stock-screening-with-python/main/set_scraped.csv')

In [10]:
bkk = df.copy()
bkk = bkk[['symbol', 'text-start', 'text-start 2', 'symbol 2', 'symbol 3']]
bkk.columns = ['symbol', 'company_name', 'market', 'industry', 'sector']
bkk['ticker'] = bkk['symbol'] + '.BK'
bkk.head(10)

Unnamed: 0,symbol,company_name,market,industry,sector,ticker
0,24CS,บริษัท ทเวนตี้ โฟร์ คอน แอนด์ ซัพพลาย จำกัด (ม...,mai,PROPCON,,24CS.BK
1,2S,บริษัท 2 เอส เมทัล จำกัด (มหาชน),SET,INDUS,STEEL,2S.BK
2,3K-BAT,บริษัท ไทย เอ็นเนอร์จี สโตเรจ เทคโนโลยี จำกัด ...,SET,INDUS,AUTO,3K-BAT.BK
3,A,บริษัท อารียา พรอพเพอร์ตี้ จำกัด (มหาชน),SET,PROPCON,PROP,A.BK
4,A5,บริษัท แอสเซท ไฟว์ กรุ๊ป จำกัด (มหาชน),mai,PROPCON,,A5.BK
5,AAI,บริษัท เอเชี่ยน อะไลอันซ์ อินเตอร์เนชั่นแนล จำ...,SET,AGRO,FOOD,AAI.BK
6,AAV,บริษัท เอเชีย เอวิเอชั่น จำกัด (มหาชน),SET,SERVICE,TRANS,AAV.BK
7,ABM,บริษัท เอเชีย ไบโอแมส จำกัด (มหาชน),mai,RESOURC,,ABM.BK
8,ACAP,บริษัท เอเชีย แคปปิตอล กรุ๊ป จำกัด (มหาชน),mai,FINCIAL,,ACAP.BK
9,ACC,บริษัท แอดวานซ์ คอนเนคชั่น คอร์ปอเรชั่น จำกัด ...,SET,RESOURC,ENERG,ACC.BK


In [None]:
set_list = bkk['ticker']
ticker_error = ['DIMET.BK'] #exclude error ticker
set_list = set_list[set_list.isin(ticker_error) == False]

In [None]:
#run this code to start retrive data from yfinance
bl_st, ic_st, cf_st = get_financial_data(set_list)

start getting balance sheet
1/911 24CS.BK
2/911 2S.BK
3/911 3K-BAT.BK
4/911 A.BK
5/911 A5.BK
6/911 AAI.BK
7/911 AAV.BK
8/911 ABM.BK
9/911 ACAP.BK
10/911 ACC.BK
11/911 ACE.BK
12/911 ACG.BK
13/911 ADB.BK
14/911 ADD.BK
15/911 ADVANC.BK
16/911 ADVICE.BK
17/911 AE.BK
18/911 AEONTS.BK
19/911 AF.BK
20/911 AFC.BK
21/911 AGE.BK
22/911 AH.BK
23/911 AHC.BK
24/911 AI.BK
25/911 AIE.BK
26/911 AIMCG.BK
27/911 AIMIRT.BK
28/911 AIRA.BK
29/911 AIT.BK
30/911 AJ.BK
31/911 AJA.BK
32/911 AKP.BK
33/911 AKR.BK
34/911 AKS.BK
35/911 ALLA.BK
36/911 ALLY.BK
37/911 ALPHAX.BK
38/911 ALT.BK
39/911 ALUCON.BK
40/911 AMA.BK
41/911 AMANAH.BK
42/911 AMARC.BK
43/911 AMARIN.BK
44/911 AMATA.BK
45/911 AMATAR.BK
46/911 AMATAV.BK
47/911 AMC.BK
48/911 AMR.BK
49/911 ANAN.BK
50/911 ANI.BK
51/911 AOT.BK
52/911 AP.BK
53/911 APCO.BK
54/911 APCS.BK
55/911 APEX.BK
56/911 APO.BK
57/911 APP.BK
58/911 APURE.BK
59/911 AQUA.BK
60/911 ARIN.BK
61/911 ARIP.BK
62/911 ARROW.BK
63/911 AS.BK
64/911 ASAP.BK
65/911 ASEFA.BK
66/911 ASIA.BK
67/911 AS

In [None]:
# run this code below to download file
# bl_st.to_csv('balance_sheet.csv')
# ic_st.to_csv('income_statement.csv')
# cf_st.to_csv('cashflow_statement.csv')

# from google.colab import files
# files.download('balance_sheet.csv')
# files.download('income_statement.csv')
# files.download('cashflow_statement.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

------

## After get data


In [13]:
bl = pd.read_csv('https://raw.githubusercontent.com/suparuek2405/Stock-screening-with-python/refs/heads/main/balance_sheet.csv')
ic = pd.read_csv('https://raw.githubusercontent.com/suparuek2405/Stock-screening-with-python/refs/heads/main/income_statement.csv')
cf = pd.read_csv('https://raw.githubusercontent.com/suparuek2405/Stock-screening-with-python/refs/heads/main/cashflow_statement.csv')

In [14]:
bl.data.unique()

array(['Treasury Shares Number', 'Ordinary Shares Number', 'Share Issued',
       'Net Debt', 'Total Debt', 'Tangible Book Value',
       'Invested Capital', 'Working Capital', 'Net Tangible Assets',
       'Capital Lease Obligations', 'Common Stock Equity',
       'Total Capitalization', 'Total Equity Gross Minority Interest',
       'Stockholders Equity', 'Other Equity Interest',
       'Gains Losses Not Affecting Retained Earnings',
       'Other Equity Adjustments', 'Retained Earnings',
       'Additional Paid In Capital', 'Capital Stock', 'Common Stock',
       'Total Liabilities Net Minority Interest',
       'Total Non Current Liabilities Net Minority Interest',
       'Other Non Current Liabilities', 'Employee Benefits',
       'Non Current Pension And Other Postretirement Benefit Plans',
       'Long Term Debt And Capital Lease Obligation',
       'Long Term Capital Lease Obligation', 'Long Term Debt',
       'Current Liabilities', 'Other Current Liabilities',
       'Current 

In [15]:
ic.data.unique()

array(['Tax Effect Of Unusual Items', 'Tax Rate For Calcs',
       'Normalized EBITDA', 'Total Unusual Items',
       'Total Unusual Items Excluding Goodwill',
       'Net Income From Continuing Operation Net Minority Interest',
       'Reconciled Depreciation', 'Reconciled Cost Of Revenue', 'EBITDA',
       'EBIT', 'Net Interest Income', 'Interest Expense',
       'Interest Income', 'Normalized Income',
       'Net Income From Continuing And Discontinued Operation',
       'Total Expenses', 'Diluted Average Shares', 'Basic Average Shares',
       'Diluted EPS', 'Basic EPS', 'Diluted NI Availto Com Stockholders',
       'Net Income Common Stockholders', 'Net Income',
       'Net Income Including Noncontrolling Interests',
       'Net Income Continuous Operations', 'Tax Provision',
       'Pretax Income', 'Other Income Expense',
       'Other Non Operating Income Expenses', 'Special Income Charges',
       'Gain On Sale Of Ppe', 'Other Special Charges',
       'Gain On Sale Of Security'

In [16]:
cf.data.unique()

array(['Free Cash Flow', 'Repayment Of Debt', 'Issuance Of Debt',
       'Issuance Of Capital Stock', 'Capital Expenditure',
       'End Cash Position', 'Other Cash Adjustment Outside Changein Cash',
       'Beginning Cash Position', 'Changes In Cash',
       'Financing Cash Flow',
       'Cash Flow From Continuing Financing Activities',
       'Net Other Financing Charges', 'Cash Dividends Paid',
       'Common Stock Dividend Paid', 'Net Common Stock Issuance',
       'Common Stock Issuance', 'Net Issuance Payments Of Debt',
       'Net Short Term Debt Issuance', 'Short Term Debt Payments',
       'Short Term Debt Issuance', 'Net Long Term Debt Issuance',
       'Long Term Debt Payments', 'Long Term Debt Issuance',
       'Investing Cash Flow',
       'Cash Flow From Continuing Investing Activities',
       'Net Other Investing Changes', 'Interest Received Cfi',
       'Net Business Purchase And Sale', 'Purchase Of Business',
       'Net Intangibles Purchase And Sale', 'Purchase Of In