In [None]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objs as go
from plotly.colors import qualitative as pc
import yfinance as yf
import logging
import os
import json
import time
pio.renderers.default = 'notebook'

class StockResearch:
    def __init__(self,file=None,configs=None):
        self.logger = logging.getLogger(__name__)
        self.file = file
        self.configs = configs
        self.stocks_existing, configs_existing = self._load_stocks(), self._load_configs()
        if self.stocks_existing:
            if not configs_existing:
                self.interval = '1wk'
                self.freq = 'quarterly'
            self._get_data()
        
    def _load_stocks(self):
        if self.file is None:
            return False
        try:
            with open(self.file,'r') as f:
                self.tickers = [i.strip() for i in f.readlines()]
                if self.tickers:
                    return True
                else:
                    return False
        except FileNotFoundError:
            self.logger.error(f'{self.file} not found in cache')
            return False
        
    def _load_configs(self):
        if self.configs is None:
            return False
        try:
            with open(self.configs,'r') as f:
                content = json.load(f)
                try:
                    self.interval = content['interval']
                    self.freq = content['freq']
                    return True
                except KeyError:
                    self.logger.error('Configs is missing required data')
                    return False
        except FileNotFoundError:
            self.logger.warning('Configs file not found in cache')
            return False
        
    def _get_data(self):
        for ticker in self.tickers:
            if not os.path.exists(f'{ticker}_fundamental_data_{self.freq}.csv'):
                self._get_fundamentals(ticker)
                time.sleep(0.5)
            if not os.path.exists(f'{ticker}_price_data_{self.interval}.csv'):
                self._get_ohlc(ticker)
                time.sleep(0.5)
                
    def _get_fundamentals(self,ticker):
        try:
            obj = yf.Ticker(ticker=ticker)
            balance = obj.get_balancesheet(freq=self.freq)
            income = obj.get_income_stmt(freq=self.freq)
            cashflow = obj.get_cashflow(freq=self.freq)
            df = pd.concat([income,cashflow],join='inner',axis=0)
            df = df[df.columns[::-1]]
            if self.freq == 'quarterly':
                df.columns = df.columns.to_period('Q').astype('str')
            else:
                df.columns = df.columns.year.astype('str')
            df.to_csv(f'{ticker}_fundamental_data_{self.freq}.csv')
        except Exception as e:
            self.logger.warning(f'Could not fetch Income-Statement for {ticker}: {e}')
    
    def _get_ohlc(self,ticker):
        try:
            df = yf.download(ticker,interval=self.interval,period='max',auto_adjust=True,progress=False)
            df.columns = ['Open','High','Low','Close','Volume']
            df.to_csv(f'{ticker}_price_data_{self.interval}.csv')
        except Exception as e:
            self.logger.warning(f'Could not fetch OHLC for {ticker}: {e}')
            
    def price_to_earnings(self):
        if self.stocks_existing:
            pe_df = pd.DataFrame()
            for ticker in self.tickers:
                try:
                    df = pd.read_csv(f'{ticker}_fundamental_data_{self.freq}.csv',index_col=0)
                    if self.freq == 'yearly':
                        diluted_eps = df.loc['DilutedEPS'].dropna().iloc[-1]
                    else:
                        if len(df.loc['DilutedEPS'].dropna()) < 4:
                            fill = (4-len(df.loc['DilutedEPS'].dropna()))* df.loc['DilutedEPS'].dropna().mean()
                            diluted_eps = df.loc['DilutedEPS'].dropna().sum() + fill
                        else:
                            diluted_eps = df.loc['DilutedEPS'].dropna().iloc[-4:].sum()
                        
                    df = pd.read_csv(f'{ticker}_price_data_{self.interval}.csv',index_col=0)
                    price = df['Close'].iloc[-1]
                    pe = price/diluted_eps
                    if pe < 0:
                        pe = np.NaN
                    pe_df.loc[ticker,'PE_ttm'] = round(pe,2)
                    
                except Exception as e:
                    pe = np.NaN
                    pe_df.loc[ticker,'PE_ttm'] = pe
                    self.logger.warning(f'Diluted EPS of {ticker} not available')
                    
                try:
                    time.sleep(0.5)
                    obj = yf.Ticker(ticker=ticker)
                    fpe = obj.info['forwardPE']
                    
                    pe_df.loc[ticker,'Forward_PE'] = round(fpe,2)
                
                except Exception as e:
                    fpe = np.NaN
                    pe_df.loc[ticker,'Forward_PE'] = fpe
                    self.logger.warning(f'No forward EPS for {ticker} available')
                
                discount = (pe/fpe-1)*100 if (pe and fpe and fpe != 0) else np.NaN
                pe_df.loc[ticker,'Difference(%)'] = round(discount,2)
                    
                    
            if not pe_df.empty:
                pe_df.sort_values(by='Difference(%)',ascending=False,inplace=True)
                return list(pe_df.dropna().index[:int(len(pe_df)*0.33)])
            else:
                return []
        
    def return_correlation(self,stocks=None):
        if self.stocks_existing:
            if stocks is not None and stocks:
                tickers = stocks
            else:
                tickers = self.tickers
                
            to_merge = []
            for ticker in tickers:
                try:
                    df = pd.read_csv(f'{ticker}_price_data_{self.interval}.csv',index_col=0,parse_dates=True)
                    to_merge.append(df['Close'].rename(ticker))
                except FileNotFoundError:
                    continue
            
            merged_df = pd.concat(to_merge,join='inner',axis=1)
            corr = merged_df.pct_change(axis=0).corr()
            
            fig = go.Figure()
            fig.add_trace(
                go.Heatmap(
                    z=corr,
                    x=corr.columns,
                    y=corr.columns,
                    colorscale='Viridis'
                )
            )
            fig.update_layout(
                font=dict(size=12,color='#fff'),
                paper_bgcolor='#000',
                plot_bgcolor='#000',
                title=dict(text='Correlation Heatmap',font=dict(size=16,weight='bold')
                )
            )
            fig.show()
        return True
    
    def growth(self,stocks=None):
        if self.stocks_existing:
            if stocks is not None and stocks:
                tickers = stocks
            else:
                tickers = self.tickers
                
            for ticker in tickers:
                try:
                    df = pd.read_csv(f'{ticker}_fundamental_data_{self.freq}.csv',index_col=0)
                except FileNotFoundError:
                    continue
                
                try:
                    required = df.loc[['NetIncome','FreeCashFlow','TotalRevenue']].dropna(axis=1)
                    netmargin = required.loc['NetIncome']/required.loc['TotalRevenue']*100
                    fcfmargin = required.loc['FreeCashFlow']/required.loc['TotalRevenue']*100
                    revgrowth = required.loc['TotalRevenue'].pct_change().fillna(0.0) *100
                    margins = {'FCF-Margin':fcfmargin,'Net-Margin':netmargin}
                except KeyError:
                    self.logger.warning(f'{ticker} has not sufficient data')
                    continue
                
                fig = go.Figure()
                for j,i in enumerate(margins):
                    fig.add_trace(
                        go.Bar(
                            x=margins[i].index,
                            y=margins[i],
                            name=i,
                            marker=dict(color=pc.Plotly[j],line=dict(color='#fff',width=2)),
                            hovertemplate='<b>%{x}</b><br>%{y:.0f}%'
                        )
                    )
                
                fig.add_trace(
                    go.Scatter(
                        x=revgrowth.index,
                        y=revgrowth,
                        name='Revenue-Growth',
                        mode='lines+markers',
                        line=dict(color=pc.Plotly[len(margins)],shape='spline',width=3),
                        marker=dict(color=pc.Plotly[len(margins)],symbol='diamond',size=11,line=dict(color='#fff',width=2)),
                        hovertemplate='<b>%{x}</b><br>%{y:.0f}%'
                    )
                )
                fig.update_layout(
                font=dict(size=12,color='#fff'),
                paper_bgcolor='#000',
                plot_bgcolor='#000',
                xaxis=dict(gridcolor='#000'),
                yaxis=dict(gridcolor='#444',title='(%)'),
                title=dict(text=f'{ticker} Profitability & Momentum',font=dict(size=16,weight='bold'))
                )
                fig.show()
    
    def risk_and_return(self,startdate=None,stocks=None):
        if self.stocks_existing:
            if stocks is not None and stocks:
                tickers = stocks
            else:
                tickers = self.tickers
            if startdate is None:
                self.logger.warning('Stats are calculated with different startpoints for each stock')
            else:
                try:
                    startdate = pd.to_datetime(startdate)
                except Exception as e:
                    self.logger.error('Invalid Startdate')
                    return
            
            to_merge = []
            stats = pd.DataFrame()
            for ticker in tickers:
                try:
                    df = pd.read_csv(f'{ticker}_price_data_{self.interval}.csv',index_col=0,parse_dates=True)['Close']
                    if startdate is not None:
                        df = df.loc[df.index>=startdate]
                        
                except FileNotFoundError:
                    continue
                
                to_merge.append(df.rename(ticker))
                if self.interval == '1d':
                    annualize = 252
                elif self.interval == '1wk':
                    annualize = 52
                elif self.interval == '1mo':
                    annualize = 12
                else:
                    annualize = None
                    
                returns = df.pct_change().dropna()*100
                if annualize is not None:
                    fixed_rfr = 4
                    mean = returns.mean()*annualize
                    std = returns.std()*np.sqrt(annualize)
                    sr = (mean-fixed_rfr)/std
                    
                    stats.loc[ticker,'Average_Return'] = round(mean,2)
                    stats.loc[ticker,'Return_Volatility'] = round(std,2)
                    stats.loc[ticker,'Sharpe_Ratio'] = round(sr,2)
                    
            if annualize is not None:
                allstocks_df = pd.concat(to_merge,join='inner',axis=1)
                allstocksreturn = (allstocks_df.pct_change()*100).dropna().mean(axis=1)
                allstocks_mean = allstocksreturn.mean() * annualize
                allstocks_std = allstocksreturn.std() * np.sqrt(annualize)
                allstocks_sr = (allstocks_mean-fixed_rfr)/allstocks_std

                stats.loc['— Mean —','Average_Return'] = round(allstocks_mean,2)
                stats.loc['— Mean —','Return_Volatility'] = round(allstocks_std,2)
                stats.loc['— Mean —','Sharpe_Ratio'] = round(allstocks_sr,2)
            
            if not stats.empty:
                print(stats.sort_values(by='Sharpe_Ratio',ascending=False))
                
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(
                    x=allstocks_df.index[1:],
                    y=(allstocks_df.mean(axis=1).iloc[1:]/allstocks_df.iloc[0].mean()-1)*100,
                    mode='lines',
                    line=dict(color=pc.Plotly[0],width=1),
                    fill='tozeroy',
                    name='Return',
                    hovertemplate='<b>%{x}</b><br>%{y:.0f}%'
                )
            )
            fig.update_layout(
                paper_bgcolor='#000',
                plot_bgcolor='#000',
                xaxis_gridcolor='#000',
                yaxis=dict(gridcolor='#444',title='Return(%)'),
                font=dict(color='#fff',size=12),
                title=dict(text='Historic eqw-Return',font=dict(size=16,weight='bold'))
            )
            fig.show()
            
            if annualize is None:
                self.logger.warning('Stats only available for [1d,1wk,1mo]')
            return True
                                 
    
    def __call__(self):
        dc = self.price_to_earnings()
        self.growth(dc)
        self.return_correlation(dc)
        self.risk_and_return('2023-01-01',dc)
                
            
if __name__ == '__main__':
    logging.basicConfig(level=logging.WARNING)
    obj = StockResearch('DJI.txt','configs2.json')
    obj()