In [1]:
import pandas as pd
from pandas_datareader import data
import numpy as np
from datetime import datetime
from datetime import timedelta
import re
import math
import statsmodels.api as sm
import seaborn as sns
from matplotlib import pyplot as plt
plt.style.use('ggplot')

In [2]:
with open('tickers.txt','r') as file:
    tickers = file.readlines()
    
tickers = [item.strip() for item in tickers]

In [3]:
columns = ['transaction_date', 'transaction_type', 'company_name',
           'ticker', 'name', 'position', 'shares_traded', 'avg_price',
           'total_price', 'shares_owned', 'own_type']



In [4]:
def file_readr(ticker_list):
    my_dict = {}
    for ticker in ticker_list:
        try:
            current_df = pd.read_csv(f'./insiders_{ticker}.csv', header = None, names = columns)
            if current_df.shape[0] > 0:
                my_dict[ticker] = current_df
            else:
                continue
            del current_df
        except:
            continue
    
    return my_dict



In [5]:
dict_insiders = file_readr(tickers)




### cleaning insiders dictionary 

In [6]:

def clean_dict(x_dict):
    
    for ticker in x_dict.keys():
        df = x_dict[ticker]
    
        df.set_index(pd.to_datetime(df.transaction_date), inplace = True)
        
        print(f'{ticker}')

        df.drop('transaction_date',axis=1, inplace = True)
    
        transactions_list = [item.split('(')[0] for item in df.transaction_type.tolist()]
    
        df['sale_num'] = [1 if item == 'Sale' else 0 for item in transactions_list]
        df['buy_num'] = [1 if item == 'Purchase' else 0 for item in transactions_list]

        weekly = df.set_index(df.index-timedelta(days=7)).resample('W-SUN')
    
        shares_sum = weekly.shares_traded.sum()
        total_price = weekly.total_price.sum()
        insider_sell = weekly.sale_num.sum()
        insider_buy = weekly.buy_num.sum()
    
        left = pd.merge(shares_sum,total_price,on=shares_sum.index)
        right = pd.merge(insider_sell,insider_buy,on=insider_sell.index)

        left = left.set_index('key_0')
        right = right.set_index('key_0')

        agg = pd.merge(left,right,on=left.index)
        agg = agg.set_index('key_0')
        agg.index.name = 'Date'
    
        x_dict[ticker] = agg
        
        print('cleaned successfully')
    
    return x_dict
    

In [7]:
dict_insiders = clean_dict(dict_insiders)

FCX
cleaned successfully
MAS
cleaned successfully
HAS
cleaned successfully
AMZN
cleaned successfully
MU
cleaned successfully
VAR
cleaned successfully
PEG
cleaned successfully
LEG
cleaned successfully
JBHT
cleaned successfully
CAT
cleaned successfully
K
cleaned successfully
HES
cleaned successfully
NDAQ
cleaned successfully
BLL
cleaned successfully
AAP
cleaned successfully
VTR
cleaned successfully
LW
cleaned successfully
PG
cleaned successfully
NSC
cleaned successfully
PBCT
cleaned successfully
AMAT
cleaned successfully
MAR
cleaned successfully
INCY
cleaned successfully
ORLY
cleaned successfully
ROL
cleaned successfully
JWN
cleaned successfully
NEE
cleaned successfully
QRVO
cleaned successfully
CVX
cleaned successfully
HUM
cleaned successfully
NBL
cleaned successfully
BDX
cleaned successfully
AWK
cleaned successfully
SJM
cleaned successfully
PXD
cleaned successfully
CHD
cleaned successfully
AMP
cleaned successfully
DRI
cleaned successfully
AEE
cleaned successfully
ABMD
cleaned successfu

### cleaning stock price data

In [27]:
def get_stocks(x_dict):
    stocks_dict = {}
    
    for item in x_dict.keys():
        ticker = item
        start_date = str(x_dict[ticker].index[0]).split()[0]
        end_date = str(x_dict[ticker].index[-1]).split()[0]
        
        try:
            stocks_df = data.DataReader(ticker,'yahoo',start_date,end_date)
        except:
            continue
        
        stocks_dict[ticker]=stocks_df
        
    return stocks_dict
        
        

In [31]:
def clean_stocks(x_dict):    
    stocks_dict = {}
    
    for item in x_dict.keys():
        ticker = item
        stocks_df = x_dict[ticker]
        stocks_df['Return'] = stocks_df['Adj Close'].diff()
        
        stocks_weekly = stocks_df.set_index(stocks_df.index-timedelta(days=7)).resample('W-SUN').std()[['Return']]
        
        stocks_dict[ticker]=stocks_weekly
        
    return stocks_dict

In [29]:
stocks_dict_pre = get_stocks(dict_insiders)

In [40]:
stocks_dict = clean_stocks(stocks_dict_pre)

In [59]:
stocks_dict['MAS']

Unnamed: 0_level_0,Return
Date,Unnamed: 1_level_1
2003-11-30,0.084395
2003-12-07,0.195385
2003-12-14,0.134954
2003-12-21,0.079434
2003-12-28,0.231513
2004-01-04,0.112806
2004-01-11,0.107068
2004-01-18,0.174923
2004-01-25,0.178033
2004-02-01,0.187877


In [58]:
dict_insiders['MAS']

Unnamed: 0_level_0,shares_traded,total_price,sale_num,buy_num
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-11-30,81538,2278064.0,3,0
2003-12-07,0,0.0,0,0
2003-12-14,0,0.0,0,0
2003-12-21,0,0.0,0,0
2003-12-28,0,0.0,0,0
2004-01-04,0,0.0,0,0
2004-01-11,0,0.0,0,0
2004-01-18,0,0.0,0,0
2004-01-25,0,0.0,0,0
2004-02-01,0,0.0,0,0


#### merging and cleaning insider & stock data

In [53]:
def merge_dicts(x,y):
    full_dict = {}
    x_dict = x
    y_dict = y
    
    for item in x_dict.keys():
        ticker = item
        
        if ticker not in y_dict.keys():
            del x_dict[ticker]
        else:
            
            left = x_dict[ticker]
            right = y_dict[ticker]
            
            try:
                
                full_df = pd.merge(left,right, on=left.index)
            
            except:
                
                print('could not merge')
                
            full_df = full_df.set_index('key_0')
            
            full_df.index.name = 'Date'
            
            full_df.sale_num[full_df.sale_num > 0] = 1
            
            full_df.buy_num[full_df.buy_num>0] = 1
            
            full_df['risk_dummy'] = np.where(full_df.Return > 1.5, 1, 0)
        
            full_dict[ticker]=full_df
            
    return full_dict
        
        
        

In [54]:
full_dict = merge_dicts(dict_insiders,stocks_dict)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


could not merge


KeyError: 'key_0'

In [None]:
### plotting insider trades

In [None]:

def plot_dict(insider_dict,stocks_dict):
    plt.style.use('bmh')
    fig = plt.figure(figsize=(20,15))
    
    for item in insider_dict.keys():
        
        ticker = item
        
        if ticker not in stocks_dict.keys():
            
            raise ValueError('Keys must match in both dictionaries')
            
        else:
            
            stock_df = stocks_dict[ticker]
            insider_df = insider_dict[ticker]
            
            sns.lineplot(x=stock_df.index,y=stock_df['Adj Close'])
            for x in insider_df.index.tolist():
                plt.axvline(x,linestyle=':',linewidth=0.1,c='b')
                    
        

In [None]:
plot_dict(dict_insiders,stocks_dict_pre)

### fitting logistic models

In [None]:
def logit_dict(x_dict):
    fit_dict = {}
    
    for item in x_dict.keys():
        
        ticker = item
        print(ticker)
        df = x_dict[ticker]
        
        X = df[['shares_traded','sale_num','buy_num']]
        Y = df.return_dummy
        logit_model = sm.Logit(Y,X)
        result = logit_model.fit()
        
        fit_dict[ticker]=result
        
        
    return fit_dict

In [None]:
sm.Logit().get

In [None]:
models_dict = logit_dict(full_dict)



In [None]:
for key,value in models_dict.items():
    print(key)
    print('-'*30)
    print(value.pvalues)
    print('*'*30)


