In [677]:
import pandas as pd, numpy as np, sqlite3 as sql, datetime as dt, re, time, yfinance as yf
from dateutil.relativedelta import relativedelta

In [1057]:
with sql.connect('../data/interim/companies.db') as con:
        port = pd.read_sql(f"SELECT * FROM daily ORDER BY Date", con=con).drop_duplicates(subset=['Date', 'symbol'])
        recommends = pd.read_sql(f"SELECT Date date, symbol, Firm, new_grade, prev_grade, Action from recommendations ORDER BY Date", con=con)
        arts =pd.read_sql("SELECT date, symbol, publisher, pos_sent, neu_sent, neg_sent, comp_sent FROM articles ORDER BY date", con=con)
        crypt_arts = pd.read_sql("SELECT date, symbol, publisher,pos_sent, neu_sent, neg_sent, comp_sent  FROM news_sentiment ORDER BY date", con=con)
        articles = pd.concat([arts, crypt_arts], axis=0, ignore_index=True)
        comments = pd.read_sql(f"SELECT DATE(timestamp) date, channel, symbols, pos_sent, neu_sent, neg_sent, comp_sent from symbol_comments ORDER BY timestamp", con=con)
        comments.loc[:, "symbols"] = comments.symbols.apply(lambda x: x.replace('BTC', 'BTC-USD'))
        companies = tuple(port.symbol.unique())
        c_data = pd.read_sql(f"SELECT * from mentions WHERE symbol IN {companies}", con=con, index_col='pk')

In [1058]:
symbols_re = re.compile(r"\[|\]|\'|\'")
last_index = comments.index.max()

In [1059]:
# decompose for single symbol
for i, row in comments.iterrows():
    symbols = re.sub(symbols_re, "", row.symbols)
    symbols = symbols.split(',')
    for sym in symbols:
        last_index+=1
        comments.loc[last_index, ["symbols"]] = sym
        comments.loc[last_index, ["comment_index"]] = i
        comments.loc[last_index, ["date", "channel", "pos_sent", "neu_sent", "neg_sent", "comp_sent"]] = row.date, row.channel, row.pos_sent, row.neu_sent,  row.neg_sent, row.comp_sent


In [1060]:
comments = comments[lambda x:~( x.comment_index.isnull())]

In [1075]:
comments.loc[comments.index, "date"] = comments.date.apply(pd.to_datetime)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [1062]:
comments

Unnamed: 0,date,channel,symbols,pos_sent,neu_sent,neg_sent,comp_sent,comment_index
3394,2019-08-08,wetlqd-ideas,AMD,0.293,0.707,0.000,0.4404,0.0
3395,2019-08-13,wetlqd-ideas,MSFT,0.182,0.818,0.000,0.4404,1.0
3396,2019-08-13,wetlqd-ideas,MSFT,0.294,0.706,0.000,0.3612,2.0
3397,2019-08-14,wetlqd-ideas,ON,0.000,0.597,0.403,-0.4019,3.0
3398,2019-08-14,wetlqd-ideas,MA,0.000,0.597,0.403,-0.4019,3.0
...,...,...,...,...,...,...,...,...
7660,2020-12-08,option-trading,OOOO,0.000,1.000,0.000,0.0000,3389.0
7661,2020-12-08,option-trading,ING,0.900,0.100,0.000,0.8402,3390.0
7662,2020-12-08,trading,FSLY,0.000,1.000,0.000,0.0000,3391.0
7663,2020-12-08,trading,IT,0.000,1.000,0.000,0.0000,3392.0


In [1063]:
articles.loc[articles.index, "date"] = articles.date.apply(pd.to_datetime)

In [1064]:
recommendsDict = {"Very Bearish": 1, "Bearish": 2, "Neutral": 3, "Bullish": 4, "Very Bullish": 5}

In [1065]:
recommends=recommends.assign(new_sent = lambda x: x.new_grade.apply(lambda g: recommendsDict[g]))\
    .assign(prev_sent = lambda x: x.prev_grade.apply(lambda g: recommendsDict[g])).assign(date = lambda x: x.date.apply(pd.to_datetime))

In [1066]:
comments = comments[lambda x: x.symbols.isin(companies)]

In [1067]:
port.loc[port.index, "Date"] = port.Date.apply(pd.to_datetime)

In [1094]:
port = port.rename(columns={'Date': 'date'})

In [None]:
def create_daily_data(ticker):
    tick = yf.ticker.Ticker(ticker)
    historical_data = tick.history("1mo")
    outstanding = tick.info.get("sharesOutstanding")
    if outstanding == None:
        outstanding = 1
    daily_close = historical_data["Close"]
    pct_change = daily_close.pct_change().fillna(0)
    periods = 2
    # calc volatility
    vola = (pct_change.rolling(periods).std() * np.sqrt(periods)).fillna(0)
    historical_data = historical_data.assign(Volatility = vola)
    historical_data = historical_data.assign(Turnover = lambda x: x.Volume / outstanding)
    historical_data = historical_data.assign(symbol = ticker)
    return historical_data.reset_index().round({"Volatility": 6, "Turnover": 6})

In [None]:
full_daily_data = pd.DataFrame()
for i, l in enumerate(companies):
    new_df = create_daily_data(l)
    full_daily_data = pd.concat([full_daily_data, new_df], axis=0, ignore_index=True)
    if i % 5 == 0:
        print(f"Finished with {i}")

Finished with 0
Finished with 5
Finished with 10
Finished with 15
Finished with 20
Finished with 25
Finished with 30


In [None]:
_22 = full_daily_data[lambda x: x.Date > dt.datetime(2021, 12, 31)]

In [None]:
_22 = _22.rename(columns={"Stock Splits": "Stock_Splits"})

In [None]:
_22 = _22.loc[:, port.drop('pk', axis=1).columns]

In [None]:
new_port = pd.concat([port.drop('pk', axis=1), _22], axis=0, ignore_index=True)

In [1095]:
# take aggregations over wanted frequency; make buy decisions based off of the frequency of data points and sentiments
# return port with new information: shares and cost * shares
class EAT():
    def __init__(self, portfolio, articles, comments, recs, start, end):
        self.portfolio = portfolio.copy(deep=True)
        self.postions = []
        self.start = start
        self.end = end
        self.articles = articles[lambda x: (x.date >= start) & (x.date <= end)]
        self.comments =  comments[lambda x: (x.date >= start) & (x.date <= end)]
        self.recs = recs[lambda x: (x.date >= start) & (x.date <= end)]

        self.aggs = {}

    def aggregate(self):
        articles_agg = self.articles.groupby([pd.Grouper(key="date", freq="1Y"), 'symbol'])\
            .agg({'pos_sent': ['mean'], 'neg_sent': ['mean'], 'neu_sent': ['mean'], 'comp_sent': ['mean', 'count']}).assign(type=lambda x: 'News')
        comments_agg = self.comments.groupby([pd.Grouper(key="date", freq="1Y"), 'symbols'])\
            .agg({'pos_sent': ['mean'], 'neg_sent': ['mean'], 'neu_sent': ['mean'], 'comp_sent': ['mean', 'count']}).assign(type=lambda x: 'Chats')
        recommends_agg = self.recs.groupby([pd.Grouper(key="date", freq="1Y"), 'symbol'])\
            .agg({'new_sent': ['mean'], 'prev_sent': ['mean', 'count']}).assign(type=lambda x: 'Analysts')
        recommends_agg = recommends_agg.reset_index()
        comments_agg = comments_agg.reset_index()
        articles_agg = articles_agg.reset_index()
        recommends_agg.columns = recommends_agg.columns.droplevel(1)
        comments_agg.columns = comments_agg.columns.droplevel(1)
        articles_agg.columns = articles_agg.columns.droplevel(1)
        
        articles_agg.columns = ['date', 'symbol', 'pos_sent', 'neg_sent', 'neu_sent', 'comp_sent',
       'counts', 'type']
        comments_agg.columns = ['date', 'symbol', 'pos_sent', 'neg_sent', 'neu_sent', 'comp_sent',
       'counts', 'type']
        recommends_agg.columns = ['date', 'symbol', 'new_sent', 'prev_sent', 'counts', 'type']
        # comments_agg=comments_agg.assign(date = lambda x: x.date.apply(lambda x: x.date))
        self.aggs['recommendations'] = recommends_agg
        self.aggs['articles'] = articles_agg
        self.aggs['comments'] = comments_agg
        return None 


    def tradeSents(self, agg, label, min_samples, min_comp_sent, shares):
        # add action, shares, cost
        returns = self.aggs[agg][lambda x: (x.date >= self.start) & (x[label] >= min_comp_sent) & (x.counts >= min_samples)]
        # query portfolio for first cost add columns
        indexes = pd.Int64Index([])
        for date, sym in returns.loc[:, ['date', 'symbol']].values:
            # ns = returns[lambda x: x.date == date].shape[0]
            if sym not in self.postions:
                self.postions.append(sym)
                f1_date = (date + relativedelta(years=1)).to_pydatetime()
                indexes = self.portfolio[lambda x: ((x.date > date) & (x.symbol == sym) & (x.date <= f1_date))].index
                self.portfolio.loc[indexes, "shares"] = shares
            else:
                self.postions.append(sym)
                f1_date = (date + relativedelta(years=1)).to_pydatetime()
                indexes = self.portfolio[lambda x: ((x.date > date) & (x.symbol == sym) & (x.date <= f1_date))].index
                self.portfolio.loc[indexes, "shares"] = shares * self.postions.count(sym)
            
            i = returns[lambda x: (x.date == date) & (x.symbol == sym)].index
            if not indexes.empty:
                returns.loc[i, 'cost'] = shares * self.portfolio.loc[indexes[0], "Close"]
                returns.loc[i, 'returns'] = shares * self.portfolio.loc[indexes[-1], "Close"]
            else:
                indexes = self.portfolio[lambda x: (x.symbol == sym)].index
                returns.loc[i, 'cost'] = shares * self.portfolio.loc[indexes[-1], "Open"]
                returns.loc[i, 'returns'] = shares * self.portfolio.loc[indexes[-1], "Close"]

        return returns#self.portfolio
        


In [1099]:
eat = EAT(port, articles, comments, recommends, dt.datetime(2018, 1, 1), dt.datetime(2022, 1, 30))

In [1100]:
eat.aggregate()

In [1101]:
ret = eat.tradeSents("comments", "comp_sent", min_samples=1, min_comp_sent=0.15, shares=10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [956]:
ret = eat.tradeSents("articles", "comp_sent", 100, 0.5, 10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [957]:
ret = eat.tradeSents("recommendations", "new_sent", 25, 4, 10)

In [958]:
# eat.tradeSents("articles", "comp_sent", 100, 0.5, 10)
# eat.tradeSents("recommendations", "new_sent", 25, 4, 10)

In [1102]:
ret.groupby('date').sum().assign(r_pct = lambda x: (x.returns - x.cost) / x.cost)

Unnamed: 0_level_0,pos_sent,neg_sent,neu_sent,comp_sent,counts,cost,returns,r_pct
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-12-31,5.265895,0.623934,15.110028,8.133532,93,42329.746435,67754.449959,0.600634
2020-12-31,4.164832,0.921737,24.733701,5.878339,1405,330494.633604,502545.053391,0.520585


In [808]:
with sql.connect('../data/interim/companies.db') as con:
    new_port.to_sql('daily', con=con, index=True, index_label="pk", if_exists='replace')