In [2]:
import feedparser
import pandas as pd
from datetime import date, timedelta
from sqlalchemy import create_engine

engine = create_engine("sqlite:///c:\\ruby\\portlt\\db\\development.sqlite3")
conlt = engine.connect()

clean_path = "../data/clean/"
raw_path = "../data/raw/"
csv_path = "\\Users\\User\\iCloudDrive\\"
box_path = "\\Users\\User\\Dropbox\\"
one_path = "\\Users\\User\\OneDrive\\Documents\\Data\\"
pdf_path = "../PDF/"

pd.set_option('display.max_colwidth', 255)
pd.set_option('display.max_rows',None)
url = "https://feeds.feedburner.com/Setorth-form45-en"

today = date.today()
year = 2022
mmdd_str = today.strftime('%m%d')
mmdd_str

'0824'

In [3]:
#today = date(2022, 8, 11)
mmdd_str = today.strftime('%m%d')
mmdd_str

'0824'

In [4]:
rss_source = feedparser.parse(url)
f45_number = len(rss_source.entries)
print("Number of F45: ", f45_number)

Number of F45:  3


In [5]:
f45_items = []

for x in range(f45_number):
    f45_content = rss_source.entries[x]
    f45_item = {}
    
    print("\n----------------------------------\n")
    
    print("F45: " + str(x))
    title_ary = f45_content.title.partition(' ')
    f45_item['name'] = title_ary[0].strip() 
    print("Title: ", f45_item['name'])  
    f45_item['year'] = year
    print("Year: ", f45_item['year'])      
    qtr_ary = title_ary[2].partition(' (F45)')
    f45_item['quarter'] = qtr_ary[0][-1]    
    print("Quarter: ", f45_item['quarter'])    
    f45_item['link'] = f45_content.link
    print("Link: ", f45_item['link'])
    f45_item['published'] = f45_content.published
    print("Published: ", f45_item['published'])  
    f45_items.append(f45_item)


----------------------------------

F45: 0
Title:  UOBKH
Year:  2022
Quarter:  n
Link:  https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974219950
Published:  Wed, 24 Aug 2022 19:36:04 +0700

----------------------------------

F45: 1
Title:  KGI
Year:  2022
Quarter:  n
Link:  https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974218200
Published:  Wed, 24 Aug 2022 18:23:14 +0700

----------------------------------

F45: 2
Title:  UOBKH
Year:  2022
Quarter:  r
Link:  https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974214270
Published:  Wed, 24 Aug 2022 17:27:14 +0700


In [6]:
df = pd.DataFrame(f45_items)
df[['name','year','quarter','published']]

Unnamed: 0,name,year,quarter,published
0,UOBKH,2022,n,"Wed, 24 Aug 2022 19:36:04 +0700"
1,KGI,2022,n,"Wed, 24 Aug 2022 18:23:14 +0700"
2,UOBKH,2022,r,"Wed, 24 Aug 2022 17:27:14 +0700"


In [7]:
df.dtypes

name         object
year          int64
quarter      object
link         object
published    object
dtype: object

In [9]:
df.loc[(df.quarter == 'n') ,['year','quarter']] = ['2022','2']
df.groupby(['year','quarter']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,link,published
year,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,2,3,3,3


In [10]:
df.quarter = df.quarter.astype(int)
df.groupby(['year','quarter']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,link,published
year,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,2,3,3,3


In [11]:
df.shape

(3, 5)

In [12]:
### First equals to latest published pdf file
df = df.drop_duplicates(subset='name', keep='first')
df.shape

(2, 5)

In [13]:
file_name = 'F45-RAW-' + mmdd_str + '.csv'
raw_file = raw_path + file_name
output_file = csv_path + file_name
box_file = box_path + file_name
one_file = one_path + file_name

df[['name','year','quarter','published']].to_csv(output_file, header=True, index=False, sep=',')
df[['name','year','quarter','published']].to_csv(box_file,    header=True, index=False, sep=',')
df[['name','year','quarter','published']].to_csv(one_file,    header=True, index=False, sep=',')
df[['name','year','quarter','published']].to_csv(raw_file,    header=True, index=False, sep=',')

In [14]:
sql = '''
SELECT *
FROM exempts
ORDER BY name'''
df_exempts = pd.read_sql(sql, conlt)
df_exempts.shape[0]

405

In [15]:
df_merge = pd.merge(df, df_exempts, on='name', how='outer', indicator=True)
df_merge.shape

(406, 7)

### Tickers that are in Exempts table

In [16]:
in_exempts = df_merge.loc[
    df_merge['_merge'] == 'both',
    ['name','year','quarter','published','link']
    
]
in_exempts.year = in_exempts.year.astype(int)
in_exempts.quarter = in_exempts.quarter.astype(int)
in_exempts.sort_values(by=['published'],ascending=[False])

Unnamed: 0,name,year,quarter,published,link
0,UOBKH,2022,2,"Wed, 24 Aug 2022 19:36:04 +0700",https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974219950


In [17]:
in_exempts.sort_values(by=['published'],ascending=[False]).shape[0]

1

### Not in exempts table

In [18]:
df_out = df_merge.loc[
    df_merge['_merge'] == 'left_only',
    ['name','year','quarter','published','link']
]
df_out.year = df_out.year.astype(int)
df_out.quarter = df_out.quarter.astype(int)
df_out.sort_values(by=['published'],ascending=[False])

Unnamed: 0,name,year,quarter,published,link
1,KGI,2022,2,"Wed, 24 Aug 2022 18:23:14 +0700",https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974218200


In [19]:
#df_out = df_out.drop(df_out.index[df_out['name'] == "SCC"])
df_out.shape[0]

1

In [20]:
sql = '''
SELECT *
FROM tickers
ORDER BY name'''
df_tickers = pd.read_sql(sql, conlt)
df_tickers.shape

(402, 9)

In [21]:
df_merge2 = pd.merge(df_out, df_tickers, on='name', how='outer', indicator=True)
df_merge2.shape

(402, 14)

### There are no ticker records

In [22]:
df_merge2.loc[
    df_merge2['_merge'] == 'left_only',
    ['name','year','quarter','published','link']
]

Unnamed: 0,name,year,quarter,published,link


In [23]:
df_merge2.loc[
    df_merge2['_merge'] == 'left_only',
    ['name','year','quarter','published','link','id','market']
].shape

(0, 7)

### There are ticker records

In [24]:
df_out2 = df_merge2.loc[
    df_merge2['_merge'] == 'both',
    ['name','year','quarter','published','link','id','market']
]
df_out2

Unnamed: 0,name,year,quarter,published,link,id,market
0,KGI,2022.0,2.0,"Wed, 24 Aug 2022 18:23:14 +0700",https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974218200,252,sSET


In [25]:
df_out2 = df_out2[df_out2.year.notnull()]
df_out2.shape

(1, 7)

In [26]:
df_out2['year'] = df_out2['year'].astype(int)
df_out2['quarter'] = df_out2['quarter'].astype(int)
df_out2.shape

(1, 7)

In [27]:
file_name = 'F45-CLEAN-' + mmdd_str + '.csv'
clean_file = clean_path + file_name
output_file = csv_path + file_name
box_file = box_path + file_name
one_file = one_path + file_name

df_out2[['name','year','quarter','published','market']].sort_values(['published'],ascending=[False]).to_csv(output_file, header=True, index=False, sep=',')
df_out2[['name','year','quarter','published','market']].sort_values(['published'],ascending=[False]).to_csv(clean_file, header=True, index=False, sep=',')
df_out2[['name','year','quarter','published','market']].sort_values(['published'],ascending=[False]).to_csv(box_file, header=True, index=False, sep=',')
df_out2[['name','year','quarter','published','market']].sort_values(['published'],ascending=[False]).to_csv(one_file, header=True, index=False, sep=',')

In [28]:
sql = '''
SELECT * 
FROM epss
WHERE year = 2022'''
df_epss = pd.read_sql(sql, conlt)
df_epss.shape

(451, 14)

In [29]:
df_merge3 = pd.merge(df_out2, df_epss, on=['name','year','quarter'], how='outer', indicator=True)
df_merge3.shape

(452, 19)

### Already input, display profit amt & eps to check with new F45

In [30]:
df_merge3[df_merge3['_merge'] == 'both']

Unnamed: 0,name,year,quarter,published,link,id_x,market,id_y,q_amt,y_amt,aq_amt,ay_amt,q_eps,y_eps,aq_eps,ay_eps,ticker_id,publish_date,_merge


In [31]:
df_merge3[df_merge3['_merge'] == 'both'].shape

(0, 19)

### New F-45

In [32]:
df_inp2 = df_merge3[df_merge3['_merge'] == 'left_only']
df_inp3 = df_inp2.copy()
df_inp3.shape

(1, 19)

In [33]:
df_inp3['year'] = df_inp3['year'].astype(str)
df_inp3['quarter'] = df_inp3['quarter'].astype(str)
final = df_inp3.sort_values('name',ascending=True)
final_str = final.name+' '+final.year+' '+final.quarter+' '+final.link
final_str

0    KGI 2022 2 https://classic.set.or.th/set/newsdetails.do?language=en&country=US&newsId=16612974218200
dtype: object

In [34]:
df_inp3_q3 = df_inp3[df_inp3['quarter'] == '3']
final_q3 = df_inp3_q3.sort_values('name',ascending=True)
final_str_q3 = final_q3.name+' '+final_q3.year+' '+final_q3.quarter+' '+final_q3.link
final_str_q3

Series([], dtype: object)

In [35]:
df_inp3_q1 = df_inp3[df_inp3['quarter'] == '1']
final_q1 = df_inp3_q1.sort_values('name',ascending=True)
final_str_q1 = final_q1.name+' '+final_q1.year+' '+final_q1.quarter+' '+final_q1.link
final_str_q1

Series([], dtype: object)